d8/d30/fmdpLearner_8h_source.html

 /***************************************************************************
  *   Copyright (C) 2005 by Christophe GONZALES and Pierre-Henri WUILLEMIN  *
  *   {prenom.nom}_at_lip6.fr                                               *
  *                                                                         *
  *   This program is free software; you can redistribute it and/or modify  *
  *   it under the terms of the GNU General Public License as published by  *
  *   the Free Software Foundation; either version 2 of the License, or     *
  *   (at your option) any later version.                                   *
  *                                                                         *
  *   This program is distributed in the hope that it will be useful,       *
  *   but WITHOUT ANY WARRANTY; without even the implied warranty of        *
  *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the         *
  *   GNU General Public License for more details.                          *
  *                                                                         *
  *   You should have received a copy of the GNU General Public License     *
  *   along with this program; if not, write to the                         *
  *   Free Software Foundation, Inc.,                                       *
  *   59 Temple Place - Suite 330, Boston, MA  02111-1307, USA.             *
  ***************************************************************************/
 // =========================================================================
 #ifndef GUM_FMDP_LEARNER_H
 #define GUM_FMDP_LEARNER_H
 // =========================================================================
 #include <agrum/core/hashTable.h>
 // =========================================================================
 #include <agrum/FMDP/SDyna/Strategies/ILearningStrategy.h>
 #include <agrum/FMDP/fmdp.h>
 #include <agrum/FMDP/learning/datastructure/imddi.h>
 #include <agrum/FMDP/learning/datastructure/iti.h>
 #include <agrum/FMDP/learning/observation.h>
 // =========================================================================
 #include <agrum/variables/discreteVariable.h>
 // =========================================================================

 namespace gum {

   template < TESTNAME    VariableAttributeSelection,
              TESTNAME    RewardAttributeSelection,
              LEARNERNAME LearnerSelection >
   class FMDPLearner : public ILearningStrategy {
     typedef
        typename LearnerSelect< LearnerSelection,
                                IMDDI< VariableAttributeSelection, false >,
                                ITI< VariableAttributeSelection, false > >::type
           VariableLearnerType;

     typedef typename LearnerSelect< LearnerSelection,
                                     IMDDI< RewardAttributeSelection, true >,
                                     ITI< RewardAttributeSelection, true > >::type
        RewardLearnerType;

     typedef HashTable< const DiscreteVariable*, VariableLearnerType* >
        VarLearnerTable;

     public:
     // ==========================================================================
     // ==========================================================================

     // ###################################################################
     // ###################################################################
     FMDPLearner(double learningThreshold,
                 bool   actionReward,
                 double similarityThreshold = 0.05);

     // ###################################################################
     // ###################################################################
     ~FMDPLearner();


     // ###################################################################
     // ###################################################################
     public:
     // ==========================================================================
     // ==========================================================================
     void initialize(FMDP< double >* fmdp);

     // ==========================================================================
     // ==========================================================================
     MultiDimFunctionGraph< double >* __instantiateFunctionGraph() {
       return __instantiateFunctionGraph(Int2Type< LearnerSelection >());
     }

     MultiDimFunctionGraph< double >*
        __instantiateFunctionGraph(Int2Type< IMDDILEARNER >) {
       return MultiDimFunctionGraph< double, ExactTerminalNodePolicy >::
          getReducedAndOrderedInstance();
     }

     MultiDimFunctionGraph< double >*
        __instantiateFunctionGraph(Int2Type< ITILEARNER >) {
       return MultiDimFunctionGraph< double,
                                     ExactTerminalNodePolicy >::getTreeInstance();
     }


     // ==========================================================================
     // ==========================================================================
     VariableLearnerType*
        __instantiateVarLearner(MultiDimFunctionGraph< double >* target,
                                Set< const DiscreteVariable* >&  mainVariables,
                                const DiscreteVariable*          learnedVar) {
       return __instantiateVarLearner(
          target, mainVariables, learnedVar, Int2Type< LearnerSelection >());
     }

     VariableLearnerType*
        __instantiateVarLearner(MultiDimFunctionGraph< double >* target,
                                Set< const DiscreteVariable* >&  mainVariables,
                                const DiscreteVariable*          learnedVar,
                                Int2Type< IMDDILEARNER >) {
       return new VariableLearnerType(target,
                                      __learningThreshold,
                                      __similarityThreshold,
                                      mainVariables,
                                      learnedVar);
     }

     VariableLearnerType*
        __instantiateVarLearner(MultiDimFunctionGraph< double >* target,
                                Set< const DiscreteVariable* >&  mainVariables,
                                const DiscreteVariable*          learnedVar,
                                Int2Type< ITILEARNER >) {
       return new VariableLearnerType(
          target, __learningThreshold, mainVariables, learnedVar);
     }


     // ==========================================================================
     // ==========================================================================
     RewardLearnerType*
        __instantiateRewardLearner(MultiDimFunctionGraph< double >* target,
                                   Set< const DiscreteVariable* >&  mainVariables) {
       return __instantiateRewardLearner(
          target, mainVariables, Int2Type< LearnerSelection >());
     }

     RewardLearnerType*
        __instantiateRewardLearner(MultiDimFunctionGraph< double >* target,
                                   Set< const DiscreteVariable* >&  mainVariables,
                                   Int2Type< IMDDILEARNER >) {
       return new RewardLearnerType(
          target, __learningThreshold, __similarityThreshold, mainVariables);
     }

     RewardLearnerType*
        __instantiateRewardLearner(MultiDimFunctionGraph< double >* target,
                                   Set< const DiscreteVariable* >&  mainVariables,
                                   Int2Type< ITILEARNER >) {
       return new RewardLearnerType(target, __learningThreshold, mainVariables);
     }


     // ###################################################################
     // ###################################################################
     public:
     // ==========================================================================
     // ==========================================================================
     bool addObservation(Idx actionId, const Observation* obs);


     // ==========================================================================
     // ==========================================================================
     void updateFMDP();


     // ###################################################################
     // ###################################################################
     public:
     // ==========================================================================
     // ==========================================================================
     Size size();

     // ==========================================================================
     // ==========================================================================
     const IVisitableGraphLearner* varLearner(Idx                     actionId,
                                              const DiscreteVariable* var) const {
       return __actionLearners[actionId]->getWithDefault(var, nullptr);
     }

     virtual double rMax() const { return __rmax; }

     private:
     double __rmax;

     public:
     virtual double modaMax() const { return __modaMax; }

     private:
     double __modaMax;


     private:
     FMDP< double >* __fmdp;

     HashTable< Idx, VarLearnerTable* > __actionLearners;

     bool                                 __actionReward;
     HashTable< Idx, RewardLearnerType* > __actionRewardLearners;
     RewardLearnerType*                   __rewardLearner;

     const double __learningThreshold;
     const double __similarityThreshold;
   };


 } /* namespace gum */

 #include <agrum/FMDP/learning/fmdpLearner_tpl.h>

 #endif   // GUM_FMDP_LEARNER_H
gum::FMDPLearner::__instantiateFunctionGraph
MultiDimFunctionGraph< double > * __instantiateFunctionGraph(Int2Type< IMDDILEARNER >)
Initializes the learner.
Definition: fmdpLearner.h:110

gum::ITI
Learn a graphical representation of a function as a decision tree.
Definition: iti.h:59

discreteVariable.h
Base class for discrete random variable.

gum::FMDPLearner::__instantiateFunctionGraph
MultiDimFunctionGraph< double > * __instantiateFunctionGraph()
Initializes the learner.
Definition: fmdpLearner.h:105

gum::FMDPLearner::initialize
void initialize(FMDP< double > *fmdp)
Initializes the learner.
Definition: fmdpLearner_tpl.h:94

gum::FMDPLearner::__actionReward
bool __actionReward
Definition: fmdpLearner.h:251

ILearningStrategy.h
Headers of the Learning Strategy interface.

fmdpLearner_tpl.h
Template Implementations of the FMDPLearner class.

gum::LearnerSelect
Definition: templateStrategy.h:62

gum::FMDPLearner::__actionLearners
HashTable< Idx, VarLearnerTable *> __actionLearners
Definition: fmdpLearner.h:249

iti.h
Headers of the ITI class.

double

gum::FMDPLearner::varLearner
const IVisitableGraphLearner * varLearner(Idx actionId, const DiscreteVariable *var) const
extractCount
Definition: fmdpLearner.h:226

gum::IVisitableGraphLearner
<agrum/FMDP/SDyna/IVisitableGraphLearner.h>
Definition: IVisitableGraphLearner.h:50

gum::FMDPLearner::VariableLearnerType
LearnerSelect< LearnerSelection, IMDDI< VariableAttributeSelection, false >, ITI< VariableAttributeSelection, false > >::type VariableLearnerType
Definition: fmdpLearner.h:62

gum::FMDPLearner::FMDPLearner
FMDPLearner(double learningThreshold, bool actionReward, double similarityThreshold=0.05)
Default constructor.
Definition: fmdpLearner_tpl.h:45

gum::FMDP< double >

gum::DiscreteVariable
Base class for discrete random variable.
Definition: discreteVariable.h:57

gum
gum is the global namespace for all aGrUM entities
Definition: agrum.h:25

gum::FMDPLearner::~FMDPLearner
~FMDPLearner()
Default destructor.
Definition: fmdpLearner_tpl.h:63

gum::FMDPLearner::addObservation
bool addObservation(Idx actionId, const Observation *obs)
Gives to the learner a new transition.
Definition: fmdpLearner_tpl.h:155

gum::LEARNERNAME
LEARNERNAME
Definition: templateStrategy.h:60

gum::HashTable
The class for generic Hash Tables.
Definition: hashTable.h:676

gum::FMDPLearner::__learningThreshold
const double __learningThreshold
Definition: fmdpLearner.h:255

gum::FMDPLearner::modaMax
virtual double modaMax() const
learnerSize
Definition: fmdpLearner.h:237

gum::FMDPLearner::__rmax
double __rmax
learnerSize
Definition: fmdpLearner.h:234

gum::FMDPLearner::__fmdp
FMDP< double > * __fmdp
The FMDP to store the learned model.
Definition: fmdpLearner.h:247

gum::Observation
Definition: observation.h:50

gum::FMDPLearner::__instantiateRewardLearner
RewardLearnerType * __instantiateRewardLearner(MultiDimFunctionGraph< double > *target, Set< const DiscreteVariable * > &mainVariables)
Initializes the learner.
Definition: fmdpLearner.h:159

gum::Set
Representation of a setA Set is a structure that contains arbitrary elements.
Definition: set.h:162

fmdp.h
Class for implementation of factored markov decision process.

gum::FMDPLearner::size
Size size()
learnerSize
Definition: fmdpLearner_tpl.h:189

gum::ILearningStrategy
<agrum/FMDP/SDyna/ILearningStrategy.h>
Definition: ILearningStrategy.h:52

gum::FMDPLearner::VarLearnerTable
HashTable< const DiscreteVariable *, VariableLearnerType *> VarLearnerTable
Definition: fmdpLearner.h:70

gum::FMDPLearner::__actionRewardLearners
HashTable< Idx, RewardLearnerType *> __actionRewardLearners
Definition: fmdpLearner.h:252

gum::TESTNAME
TESTNAME
Definition: templateStrategy.h:37

gum::FMDPLearner
Definition: fmdpLearner.h:57

gum::Int2Type
Definition: templateStrategy.h:33

gum::MultiDimFunctionGraph< double >

observation.h
Headers of the Observation class.

gum::FMDPLearner::__instantiateRewardLearner
RewardLearnerType * __instantiateRewardLearner(MultiDimFunctionGraph< double > *target, Set< const DiscreteVariable * > &mainVariables, Int2Type< ITILEARNER >)
Initializes the learner.
Definition: fmdpLearner.h:174

gum::FMDPLearner::RewardLearnerType
LearnerSelect< LearnerSelection, IMDDI< RewardAttributeSelection, true >, ITI< RewardAttributeSelection, true > >::type RewardLearnerType
Definition: fmdpLearner.h:67

gum::FMDPLearner::updateFMDP
void updateFMDP()
Starts an update of datastructure in the associated FMDP.
Definition: fmdpLearner_tpl.h:218

gum::FMDPLearner::rMax
virtual double rMax() const
learnerSize
Definition: fmdpLearner.h:231

gum::FMDPLearner::__instantiateVarLearner
VariableLearnerType * __instantiateVarLearner(MultiDimFunctionGraph< double > *target, Set< const DiscreteVariable * > &mainVariables, const DiscreteVariable *learnedVar, Int2Type< IMDDILEARNER >)
Initializes the learner.
Definition: fmdpLearner.h:134

gum::FMDPLearner::__instantiateVarLearner
VariableLearnerType * __instantiateVarLearner(MultiDimFunctionGraph< double > *target, Set< const DiscreteVariable * > &mainVariables, const DiscreteVariable *learnedVar)
Initializes the learner.
Definition: fmdpLearner.h:126

gum::Idx
Size Idx
Type for indexes.
Definition: types.h:50

gum::FMDPLearner::__modaMax
double __modaMax
learnerSize
Definition: fmdpLearner.h:240

gum::FMDPLearner::__instantiateRewardLearner
RewardLearnerType * __instantiateRewardLearner(MultiDimFunctionGraph< double > *target, Set< const DiscreteVariable * > &mainVariables, Int2Type< IMDDILEARNER >)
Initializes the learner.
Definition: fmdpLearner.h:166

gum::ExactTerminalNodePolicy
Implementation of a Terminal Node Policy that maps nodeid directly to value.
Definition: ExactTerminalNodePolicy.h:47

gum::Size
std::size_t Size
In aGrUM, hashed values are unsigned long int.
Definition: types.h:45

imddi.h
Headers of the IMDDI class.

gum::FMDPLearner::__instantiateFunctionGraph
MultiDimFunctionGraph< double > * __instantiateFunctionGraph(Int2Type< ITILEARNER >)
Initializes the learner.
Definition: fmdpLearner.h:116

gum::FMDPLearner::__rewardLearner
RewardLearnerType * __rewardLearner
Definition: fmdpLearner.h:253

gum::FMDPLearner::__similarityThreshold
const double __similarityThreshold
Definition: fmdpLearner.h:256

gum::IMDDI
Definition: imddi.h:55

gum::FMDPLearner::__instantiateVarLearner
VariableLearnerType * __instantiateVarLearner(MultiDimFunctionGraph< double > *target, Set< const DiscreteVariable * > &mainVariables, const DiscreteVariable *learnedVar, Int2Type< ITILEARNER >)
Initializes the learner.
Definition: fmdpLearner.h:146

hashTable.h
Class hash tables iterators.

gum::MultiDimFunctionGraph::getReducedAndOrderedInstance
static MultiDimFunctionGraph< GUM_SCALAR, TerminalNodePolicy > * getReducedAndOrderedInstance()
Returns a reduced and ordered instance.
Definition: multiDimFunctionGraph_tpl.h:638