aGrUM  0.16.0
gum::FMDPLearner< VariableAttributeSelection, RewardAttributeSelection, LearnerSelection > Class Template Reference

#include <agrum/FMDP/learning/fmdpLearner.h>

+ Inheritance diagram for gum::FMDPLearner< VariableAttributeSelection, RewardAttributeSelection, LearnerSelection >:
+ Collaboration diagram for gum::FMDPLearner< VariableAttributeSelection, RewardAttributeSelection, LearnerSelection >:

Public Member Functions

Constructor & destructor.
 FMDPLearner (double learningThreshold, bool actionReward, double similarityThreshold=0.05)
 Default constructor. More...
 
 ~FMDPLearner ()
 Default destructor. More...
 
Initialization
void initialize (FMDP< double > *fmdp)
 Initializes the learner. More...
 
MultiDimFunctionGraph< double > * __instantiateFunctionGraph ()
 Initializes the learner. More...
 
MultiDimFunctionGraph< double > * __instantiateFunctionGraph (Int2Type< IMDDILEARNER >)
 Initializes the learner. More...
 
MultiDimFunctionGraph< double > * __instantiateFunctionGraph (Int2Type< ITILEARNER >)
 Initializes the learner. More...
 
VariableLearnerType__instantiateVarLearner (MultiDimFunctionGraph< double > *target, Set< const DiscreteVariable * > &mainVariables, const DiscreteVariable *learnedVar)
 Initializes the learner. More...
 
VariableLearnerType__instantiateVarLearner (MultiDimFunctionGraph< double > *target, Set< const DiscreteVariable * > &mainVariables, const DiscreteVariable *learnedVar, Int2Type< IMDDILEARNER >)
 Initializes the learner. More...
 
VariableLearnerType__instantiateVarLearner (MultiDimFunctionGraph< double > *target, Set< const DiscreteVariable * > &mainVariables, const DiscreteVariable *learnedVar, Int2Type< ITILEARNER >)
 Initializes the learner. More...
 
RewardLearnerType__instantiateRewardLearner (MultiDimFunctionGraph< double > *target, Set< const DiscreteVariable * > &mainVariables)
 Initializes the learner. More...
 
RewardLearnerType__instantiateRewardLearner (MultiDimFunctionGraph< double > *target, Set< const DiscreteVariable * > &mainVariables, Int2Type< IMDDILEARNER >)
 Initializes the learner. More...
 
RewardLearnerType__instantiateRewardLearner (MultiDimFunctionGraph< double > *target, Set< const DiscreteVariable * > &mainVariables, Int2Type< ITILEARNER >)
 Initializes the learner. More...
 
Incremental methods
bool addObservation (Idx actionId, const Observation *obs)
 Gives to the learner a new transition. More...
 
void updateFMDP ()
 Starts an update of datastructure in the associated FMDP. More...
 

Miscelleanous methods

double __rmax
 learnerSize More...
 
double __modaMax
 learnerSize More...
 
Size size ()
 learnerSize More...
 
const IVisitableGraphLearnervarLearner (Idx actionId, const DiscreteVariable *var) const
 extractCount More...
 
virtual double rMax () const
 learnerSize More...
 
virtual double modaMax () const
 learnerSize More...
 

Detailed Description

template<TESTNAME VariableAttributeSelection, TESTNAME RewardAttributeSelection, LEARNERNAME LearnerSelection>
class gum::FMDPLearner< VariableAttributeSelection, RewardAttributeSelection, LearnerSelection >

Definition at line 60 of file fmdpLearner.h.

Member Typedef Documentation

◆ RewardLearnerType

template<TESTNAME VariableAttributeSelection, TESTNAME RewardAttributeSelection, LEARNERNAME LearnerSelection>
typedef LearnerSelect< LearnerSelection, IMDDI< RewardAttributeSelection, true >, ITI< RewardAttributeSelection, true > >::type gum::FMDPLearner< VariableAttributeSelection, RewardAttributeSelection, LearnerSelection >::RewardLearnerType
private

Definition at line 70 of file fmdpLearner.h.

◆ VariableLearnerType

template<TESTNAME VariableAttributeSelection, TESTNAME RewardAttributeSelection, LEARNERNAME LearnerSelection>
typedef LearnerSelect< LearnerSelection, IMDDI< VariableAttributeSelection, false >, ITI< VariableAttributeSelection, false > >::type gum::FMDPLearner< VariableAttributeSelection, RewardAttributeSelection, LearnerSelection >::VariableLearnerType
private

Definition at line 65 of file fmdpLearner.h.

◆ VarLearnerTable

template<TESTNAME VariableAttributeSelection, TESTNAME RewardAttributeSelection, LEARNERNAME LearnerSelection>
typedef HashTable< const DiscreteVariable*, VariableLearnerType* > gum::FMDPLearner< VariableAttributeSelection, RewardAttributeSelection, LearnerSelection >::VarLearnerTable
private

Definition at line 73 of file fmdpLearner.h.

Constructor & Destructor Documentation

◆ FMDPLearner()

template<TESTNAME VariableAttributeSelection, TESTNAME RewardAttributeSelection, LEARNERNAME LearnerSelection>
gum::FMDPLearner< VariableAttributeSelection, RewardAttributeSelection, LearnerSelection >::FMDPLearner ( double  learningThreshold,
bool  actionReward,
double  similarityThreshold = 0.05 
)

Default constructor.

Definition at line 48 of file fmdpLearner_tpl.h.

References gum::FMDPLearner< VariableAttributeSelection, RewardAttributeSelection, LearnerSelection >::__rewardLearner.

50  :
51  __actionReward(actionReward),
53  GUM_CONSTRUCTOR(FMDPLearner);
54  __rewardLearner = nullptr;
55  }
FMDPLearner(double learningThreshold, bool actionReward, double similarityThreshold=0.05)
Default constructor.
const double __learningThreshold
Definition: fmdpLearner.h:258
RewardLearnerType * __rewardLearner
Definition: fmdpLearner.h:256
const double __similarityThreshold
Definition: fmdpLearner.h:259

◆ ~FMDPLearner()

template<TESTNAME VariableAttributeSelection, TESTNAME RewardAttributeSelection, LEARNERNAME LearnerSelection>
gum::FMDPLearner< VariableAttributeSelection, RewardAttributeSelection, LearnerSelection >::~FMDPLearner ( )

Default destructor.

Definition at line 66 of file fmdpLearner_tpl.h.

References gum::FMDPLearner< VariableAttributeSelection, RewardAttributeSelection, LearnerSelection >::__actionLearners, gum::FMDPLearner< VariableAttributeSelection, RewardAttributeSelection, LearnerSelection >::__actionRewardLearners, and gum::FMDPLearner< VariableAttributeSelection, RewardAttributeSelection, LearnerSelection >::__rewardLearner.

66  {
67  for (auto actionIter = __actionLearners.beginSafe();
68  actionIter != __actionLearners.endSafe();
69  ++actionIter) {
70  for (auto learnerIter = actionIter.val()->beginSafe();
71  learnerIter != actionIter.val()->endSafe();
72  ++learnerIter)
73  delete learnerIter.val();
74  delete actionIter.val();
75  if (__actionRewardLearners.exists(actionIter.key()))
76  delete __actionRewardLearners[actionIter.key()];
77  }
78 
79  if (__rewardLearner) delete __rewardLearner;
80 
81  GUM_DESTRUCTOR(FMDPLearner);
82  }
HashTable< Idx, VarLearnerTable *> __actionLearners
Definition: fmdpLearner.h:252
FMDPLearner(double learningThreshold, bool actionReward, double similarityThreshold=0.05)
Default constructor.
HashTable< Idx, RewardLearnerType *> __actionRewardLearners
Definition: fmdpLearner.h:255
RewardLearnerType * __rewardLearner
Definition: fmdpLearner.h:256

Member Function Documentation

◆ __instantiateFunctionGraph() [1/3]

template<TESTNAME VariableAttributeSelection, TESTNAME RewardAttributeSelection, LEARNERNAME LearnerSelection>
MultiDimFunctionGraph< double >* gum::FMDPLearner< VariableAttributeSelection, RewardAttributeSelection, LearnerSelection >::__instantiateFunctionGraph ( )
inline

Initializes the learner.

Definition at line 108 of file fmdpLearner.h.

Referenced by gum::FMDPLearner< VariableAttributeSelection, RewardAttributeSelection, LearnerSelection >::initialize().

108  {
109  return __instantiateFunctionGraph(Int2Type< LearnerSelection >());
110  }
MultiDimFunctionGraph< double > * __instantiateFunctionGraph()
Initializes the learner.
Definition: fmdpLearner.h:108
+ Here is the caller graph for this function:

◆ __instantiateFunctionGraph() [2/3]

template<TESTNAME VariableAttributeSelection, TESTNAME RewardAttributeSelection, LEARNERNAME LearnerSelection>
MultiDimFunctionGraph< double >* gum::FMDPLearner< VariableAttributeSelection, RewardAttributeSelection, LearnerSelection >::__instantiateFunctionGraph ( Int2Type< IMDDILEARNER )
inline

Initializes the learner.

Definition at line 113 of file fmdpLearner.h.

References gum::MultiDimFunctionGraph< GUM_SCALAR, TerminalNodePolicy >::getReducedAndOrderedInstance().

113  {
116  }
static MultiDimFunctionGraph< GUM_SCALAR, TerminalNodePolicy > * getReducedAndOrderedInstance()
Returns a reduced and ordered instance.
+ Here is the call graph for this function:

◆ __instantiateFunctionGraph() [3/3]

template<TESTNAME VariableAttributeSelection, TESTNAME RewardAttributeSelection, LEARNERNAME LearnerSelection>
MultiDimFunctionGraph< double >* gum::FMDPLearner< VariableAttributeSelection, RewardAttributeSelection, LearnerSelection >::__instantiateFunctionGraph ( Int2Type< ITILEARNER )
inline

Initializes the learner.

Definition at line 119 of file fmdpLearner.h.

119  {
120  return MultiDimFunctionGraph< double,
121  ExactTerminalNodePolicy >::getTreeInstance();
122  }

◆ __instantiateRewardLearner() [1/3]

template<TESTNAME VariableAttributeSelection, TESTNAME RewardAttributeSelection, LEARNERNAME LearnerSelection>
RewardLearnerType* gum::FMDPLearner< VariableAttributeSelection, RewardAttributeSelection, LearnerSelection >::__instantiateRewardLearner ( MultiDimFunctionGraph< double > *  target,
Set< const DiscreteVariable * > &  mainVariables 
)
inline

Initializes the learner.

Definition at line 162 of file fmdpLearner.h.

Referenced by gum::FMDPLearner< VariableAttributeSelection, RewardAttributeSelection, LearnerSelection >::initialize().

163  {
165  target, mainVariables, Int2Type< LearnerSelection >());
166  }
RewardLearnerType * __instantiateRewardLearner(MultiDimFunctionGraph< double > *target, Set< const DiscreteVariable * > &mainVariables)
Initializes the learner.
Definition: fmdpLearner.h:162
+ Here is the caller graph for this function:

◆ __instantiateRewardLearner() [2/3]

template<TESTNAME VariableAttributeSelection, TESTNAME RewardAttributeSelection, LEARNERNAME LearnerSelection>
RewardLearnerType* gum::FMDPLearner< VariableAttributeSelection, RewardAttributeSelection, LearnerSelection >::__instantiateRewardLearner ( MultiDimFunctionGraph< double > *  target,
Set< const DiscreteVariable * > &  mainVariables,
Int2Type< IMDDILEARNER  
)
inline

Initializes the learner.

Definition at line 169 of file fmdpLearner.h.

References gum::FMDPLearner< VariableAttributeSelection, RewardAttributeSelection, LearnerSelection >::__learningThreshold, and gum::FMDPLearner< VariableAttributeSelection, RewardAttributeSelection, LearnerSelection >::__similarityThreshold.

171  {
172  return new RewardLearnerType(
173  target, __learningThreshold, __similarityThreshold, mainVariables);
174  }
const double __learningThreshold
Definition: fmdpLearner.h:258
LearnerSelect< LearnerSelection, IMDDI< RewardAttributeSelection, true >, ITI< RewardAttributeSelection, true > >::type RewardLearnerType
Definition: fmdpLearner.h:70
const double __similarityThreshold
Definition: fmdpLearner.h:259

◆ __instantiateRewardLearner() [3/3]

template<TESTNAME VariableAttributeSelection, TESTNAME RewardAttributeSelection, LEARNERNAME LearnerSelection>
RewardLearnerType* gum::FMDPLearner< VariableAttributeSelection, RewardAttributeSelection, LearnerSelection >::__instantiateRewardLearner ( MultiDimFunctionGraph< double > *  target,
Set< const DiscreteVariable * > &  mainVariables,
Int2Type< ITILEARNER  
)
inline

Initializes the learner.

Definition at line 177 of file fmdpLearner.h.

References gum::FMDPLearner< VariableAttributeSelection, RewardAttributeSelection, LearnerSelection >::__learningThreshold, gum::FMDPLearner< VariableAttributeSelection, RewardAttributeSelection, LearnerSelection >::addObservation(), gum::FMDPLearner< VariableAttributeSelection, RewardAttributeSelection, LearnerSelection >::size(), and gum::FMDPLearner< VariableAttributeSelection, RewardAttributeSelection, LearnerSelection >::updateFMDP().

179  {
180  return new RewardLearnerType(target, __learningThreshold, mainVariables);
181  }
const double __learningThreshold
Definition: fmdpLearner.h:258
LearnerSelect< LearnerSelection, IMDDI< RewardAttributeSelection, true >, ITI< RewardAttributeSelection, true > >::type RewardLearnerType
Definition: fmdpLearner.h:70
+ Here is the call graph for this function:

◆ __instantiateVarLearner() [1/3]

template<TESTNAME VariableAttributeSelection, TESTNAME RewardAttributeSelection, LEARNERNAME LearnerSelection>
VariableLearnerType* gum::FMDPLearner< VariableAttributeSelection, RewardAttributeSelection, LearnerSelection >::__instantiateVarLearner ( MultiDimFunctionGraph< double > *  target,
Set< const DiscreteVariable * > &  mainVariables,
const DiscreteVariable learnedVar 
)
inline

Initializes the learner.

Definition at line 129 of file fmdpLearner.h.

Referenced by gum::FMDPLearner< VariableAttributeSelection, RewardAttributeSelection, LearnerSelection >::initialize().

131  {
133  target, mainVariables, learnedVar, Int2Type< LearnerSelection >());
134  }
VariableLearnerType * __instantiateVarLearner(MultiDimFunctionGraph< double > *target, Set< const DiscreteVariable * > &mainVariables, const DiscreteVariable *learnedVar)
Initializes the learner.
Definition: fmdpLearner.h:129
+ Here is the caller graph for this function:

◆ __instantiateVarLearner() [2/3]

template<TESTNAME VariableAttributeSelection, TESTNAME RewardAttributeSelection, LEARNERNAME LearnerSelection>
VariableLearnerType* gum::FMDPLearner< VariableAttributeSelection, RewardAttributeSelection, LearnerSelection >::__instantiateVarLearner ( MultiDimFunctionGraph< double > *  target,
Set< const DiscreteVariable * > &  mainVariables,
const DiscreteVariable learnedVar,
Int2Type< IMDDILEARNER  
)
inline

Initializes the learner.

Definition at line 137 of file fmdpLearner.h.

References gum::FMDPLearner< VariableAttributeSelection, RewardAttributeSelection, LearnerSelection >::__learningThreshold, and gum::FMDPLearner< VariableAttributeSelection, RewardAttributeSelection, LearnerSelection >::__similarityThreshold.

140  {
141  return new VariableLearnerType(target,
144  mainVariables,
145  learnedVar);
146  }
LearnerSelect< LearnerSelection, IMDDI< VariableAttributeSelection, false >, ITI< VariableAttributeSelection, false > >::type VariableLearnerType
Definition: fmdpLearner.h:65
const double __learningThreshold
Definition: fmdpLearner.h:258
const double __similarityThreshold
Definition: fmdpLearner.h:259

◆ __instantiateVarLearner() [3/3]

template<TESTNAME VariableAttributeSelection, TESTNAME RewardAttributeSelection, LEARNERNAME LearnerSelection>
VariableLearnerType* gum::FMDPLearner< VariableAttributeSelection, RewardAttributeSelection, LearnerSelection >::__instantiateVarLearner ( MultiDimFunctionGraph< double > *  target,
Set< const DiscreteVariable * > &  mainVariables,
const DiscreteVariable learnedVar,
Int2Type< ITILEARNER  
)
inline

Initializes the learner.

Definition at line 149 of file fmdpLearner.h.

References gum::FMDPLearner< VariableAttributeSelection, RewardAttributeSelection, LearnerSelection >::__learningThreshold.

152  {
153  return new VariableLearnerType(
154  target, __learningThreshold, mainVariables, learnedVar);
155  }
LearnerSelect< LearnerSelection, IMDDI< VariableAttributeSelection, false >, ITI< VariableAttributeSelection, false > >::type VariableLearnerType
Definition: fmdpLearner.h:65
const double __learningThreshold
Definition: fmdpLearner.h:258

◆ addObservation()

template<TESTNAME VariableAttributeSelection, TESTNAME RewardAttributeSelection, LEARNERNAME LearnerSelection>
bool gum::FMDPLearner< VariableAttributeSelection, RewardAttributeSelection, LearnerSelection >::addObservation ( Idx  actionId,
const Observation obs 
)
virtual

Gives to the learner a new transition.

Parameters
actionId: the action on which the transition was made
obs: the observed transition
Returns
true if learning this transition implies structural changes (can trigger a new planning)

Implements gum::ILearningStrategy.

Definition at line 158 of file fmdpLearner_tpl.h.

References gum::FMDPLearner< VariableAttributeSelection, RewardAttributeSelection, LearnerSelection >::__actionLearners, gum::FMDPLearner< VariableAttributeSelection, RewardAttributeSelection, LearnerSelection >::__actionReward, gum::FMDPLearner< VariableAttributeSelection, RewardAttributeSelection, LearnerSelection >::__actionRewardLearners, gum::FMDPLearner< VariableAttributeSelection, RewardAttributeSelection, LearnerSelection >::__fmdp, gum::FMDPLearner< VariableAttributeSelection, RewardAttributeSelection, LearnerSelection >::__rewardLearner, gum::FMDPLearner< VariableAttributeSelection, RewardAttributeSelection, LearnerSelection >::__rmax, gum::FMDP< GUM_SCALAR >::beginVariables(), gum::FMDP< GUM_SCALAR >::endVariables(), and gum::Observation::reward().

Referenced by gum::FMDPLearner< VariableAttributeSelection, RewardAttributeSelection, LearnerSelection >::__instantiateRewardLearner().

159  {
160  for (SequenceIteratorSafe< const DiscreteVariable* > varIter =
162  varIter != __fmdp->endVariables();
163  ++varIter) {
164  __actionLearners[actionId]
165  ->getWithDefault(*varIter, nullptr)
166  ->addObservation(newObs);
167  __actionLearners[actionId]->getWithDefault(*varIter, nullptr)->updateGraph();
168  }
169 
170  if (__actionReward) {
171  __actionRewardLearners[actionId]->addObservation(newObs);
172  __actionRewardLearners[actionId]->updateGraph();
173  } else {
174  __rewardLearner->addObservation(newObs);
175  __rewardLearner->updateGraph();
176  }
177 
178  __rmax =
179  __rmax < std::abs(newObs->reward()) ? std::abs(newObs->reward()) : __rmax;
180 
181  return false;
182  }
HashTable< Idx, VarLearnerTable *> __actionLearners
Definition: fmdpLearner.h:252
SequenceIteratorSafe< const DiscreteVariable *> beginVariables() const
Returns an iterator reference to he beginning of the list of variables.
Definition: fmdp.h:95
double __rmax
learnerSize
Definition: fmdpLearner.h:237
FMDP< double > * __fmdp
The FMDP to store the learned model.
Definition: fmdpLearner.h:250
HashTable< Idx, RewardLearnerType *> __actionRewardLearners
Definition: fmdpLearner.h:255
SequenceIteratorSafe< const DiscreteVariable *> endVariables() const
Returns an iterator reference to the end of the list of variables.
Definition: fmdp.h:102
RewardLearnerType * __rewardLearner
Definition: fmdpLearner.h:256
+ Here is the call graph for this function:
+ Here is the caller graph for this function:

◆ initialize()

template<TESTNAME VariableAttributeSelection, TESTNAME RewardAttributeSelection, LEARNERNAME LearnerSelection>
void gum::FMDPLearner< VariableAttributeSelection, RewardAttributeSelection, LearnerSelection >::initialize ( FMDP< double > *  fmdp)
virtual

Initializes the learner.

Implements gum::ILearningStrategy.

Definition at line 97 of file fmdpLearner_tpl.h.

References gum::FMDPLearner< VariableAttributeSelection, RewardAttributeSelection, LearnerSelection >::__actionLearners, gum::FMDPLearner< VariableAttributeSelection, RewardAttributeSelection, LearnerSelection >::__actionReward, gum::FMDPLearner< VariableAttributeSelection, RewardAttributeSelection, LearnerSelection >::__actionRewardLearners, gum::FMDPLearner< VariableAttributeSelection, RewardAttributeSelection, LearnerSelection >::__fmdp, gum::FMDPLearner< VariableAttributeSelection, RewardAttributeSelection, LearnerSelection >::__instantiateFunctionGraph(), gum::FMDPLearner< VariableAttributeSelection, RewardAttributeSelection, LearnerSelection >::__instantiateRewardLearner(), gum::FMDPLearner< VariableAttributeSelection, RewardAttributeSelection, LearnerSelection >::__instantiateVarLearner(), gum::FMDPLearner< VariableAttributeSelection, RewardAttributeSelection, LearnerSelection >::__modaMax, gum::FMDPLearner< VariableAttributeSelection, RewardAttributeSelection, LearnerSelection >::__rewardLearner, gum::FMDPLearner< VariableAttributeSelection, RewardAttributeSelection, LearnerSelection >::__rmax, gum::FMDP< GUM_SCALAR >::actionName(), gum::FMDP< GUM_SCALAR >::addReward(), gum::FMDP< GUM_SCALAR >::addRewardForAction(), gum::FMDP< GUM_SCALAR >::addTransitionForAction(), gum::FMDP< GUM_SCALAR >::beginActions(), gum::FMDP< GUM_SCALAR >::beginVariables(), gum::FMDP< GUM_SCALAR >::endActions(), gum::FMDP< GUM_SCALAR >::endVariables(), gum::Set< Key, Alloc >::insert(), gum::FMDP< GUM_SCALAR >::main2prime(), and gum::MultiDimFunctionGraph< GUM_SCALAR, TerminalNodePolicy >::setTableName().

97  {
98  __fmdp = fmdp;
99 
100  __modaMax = 0;
101  __rmax = 0.0;
102 
103  Set< const DiscreteVariable* > mainVariables;
104  for (auto varIter = __fmdp->beginVariables();
105  varIter != __fmdp->endVariables();
106  ++varIter) {
107  mainVariables.insert(*varIter);
108  __modaMax = __modaMax < (*varIter)->domainSize() ? (*varIter)->domainSize()
109  : __modaMax;
110  }
111 
112  for (auto actionIter = __fmdp->beginActions();
113  actionIter != __fmdp->endActions();
114  ++actionIter) {
115  // Adding a Hashtable for the action
116  __actionLearners.insert(*actionIter, new VarLearnerTable());
117 
118  // Adding a learner for each variable
119  for (auto varIter = __fmdp->beginVariables();
120  varIter != __fmdp->endVariables();
121  ++varIter) {
123  varTrans->setTableName("ACTION : " + __fmdp->actionName(*actionIter)
124  + " - VARIABLE : " + (*varIter)->name());
125  __fmdp->addTransitionForAction(*actionIter, *varIter, varTrans);
126  __actionLearners[*actionIter]->insert(
127  (*varIter),
129  varTrans, mainVariables, __fmdp->main2prime(*varIter)));
130  }
131 
132  if (__actionReward) {
134  reward->setTableName("REWARD - ACTION : "
135  + __fmdp->actionName(*actionIter));
136  __fmdp->addRewardForAction(*actionIter, reward);
137  __actionRewardLearners.insert(
138  *actionIter, __instantiateRewardLearner(reward, mainVariables));
139  }
140  }
141 
142  if (!__actionReward) {
144  reward->setTableName("REWARD");
145  __fmdp->addReward(reward);
146  __rewardLearner = __instantiateRewardLearner(reward, mainVariables);
147  }
148  }
MultiDimFunctionGraph< double > * __instantiateFunctionGraph()
Initializes the learner.
Definition: fmdpLearner.h:108
SequenceIteratorSafe< Idx > beginActions() const
Returns an iterator reference to he beginning of the list of actions.
Definition: fmdp.h:137
void addReward(const MultiDimImplementation< GUM_SCALAR > *reward)
Adds a default variable reward.
Definition: fmdp.h:228
const std::string & actionName(Idx actionId) const
Returns name of action given in parameter.
Definition: fmdp_tpl.h:347
HashTable< Idx, VarLearnerTable *> __actionLearners
Definition: fmdpLearner.h:252
SequenceIteratorSafe< const DiscreteVariable *> beginVariables() const
Returns an iterator reference to he beginning of the list of variables.
Definition: fmdp.h:95
double __rmax
learnerSize
Definition: fmdpLearner.h:237
FMDP< double > * __fmdp
The FMDP to store the learned model.
Definition: fmdpLearner.h:250
RewardLearnerType * __instantiateRewardLearner(MultiDimFunctionGraph< double > *target, Set< const DiscreteVariable * > &mainVariables)
Initializes the learner.
Definition: fmdpLearner.h:162
HashTable< const DiscreteVariable *, VariableLearnerType *> VarLearnerTable
Definition: fmdpLearner.h:73
void addTransitionForAction(Idx actionId, const DiscreteVariable *var, const MultiDimImplementation< GUM_SCALAR > *transition)
Adds a variable transition table to specified action.
Definition: fmdp_tpl.h:198
HashTable< Idx, RewardLearnerType *> __actionRewardLearners
Definition: fmdpLearner.h:255
void addRewardForAction(Idx actionId, const MultiDimImplementation< GUM_SCALAR > *reward)
Adds a default variable reward.
Definition: fmdp_tpl.h:302
const DiscreteVariable * main2prime(const DiscreteVariable *mainVar) const
Returns the primed variable associate to the given main variable.
Definition: fmdp.h:109
SequenceIteratorSafe< Idx > endActions() const
Returns an iterator reference to the end of the list of actions.
Definition: fmdp.h:144
VariableLearnerType * __instantiateVarLearner(MultiDimFunctionGraph< double > *target, Set< const DiscreteVariable * > &mainVariables, const DiscreteVariable *learnedVar)
Initializes the learner.
Definition: fmdpLearner.h:129
double __modaMax
learnerSize
Definition: fmdpLearner.h:243
SequenceIteratorSafe< const DiscreteVariable *> endVariables() const
Returns an iterator reference to the end of the list of variables.
Definition: fmdp.h:102
RewardLearnerType * __rewardLearner
Definition: fmdpLearner.h:256
void setTableName(const std::string &name)
Sets the name of the table represented by this structure.
+ Here is the call graph for this function:

◆ modaMax()

template<TESTNAME VariableAttributeSelection, TESTNAME RewardAttributeSelection, LEARNERNAME LearnerSelection>
virtual double gum::FMDPLearner< VariableAttributeSelection, RewardAttributeSelection, LearnerSelection >::modaMax ( ) const
inlinevirtual

learnerSize

Returns

Implements gum::ILearningStrategy.

Definition at line 240 of file fmdpLearner.h.

References gum::FMDPLearner< VariableAttributeSelection, RewardAttributeSelection, LearnerSelection >::__modaMax.

240 { return __modaMax; }
double __modaMax
learnerSize
Definition: fmdpLearner.h:243

◆ rMax()

template<TESTNAME VariableAttributeSelection, TESTNAME RewardAttributeSelection, LEARNERNAME LearnerSelection>
virtual double gum::FMDPLearner< VariableAttributeSelection, RewardAttributeSelection, LearnerSelection >::rMax ( ) const
inlinevirtual

learnerSize

Returns

Implements gum::ILearningStrategy.

Definition at line 234 of file fmdpLearner.h.

References gum::FMDPLearner< VariableAttributeSelection, RewardAttributeSelection, LearnerSelection >::__rmax.

234 { return __rmax; }
double __rmax
learnerSize
Definition: fmdpLearner.h:237

◆ size()

template<TESTNAME VariableAttributeSelection, TESTNAME RewardAttributeSelection, LEARNERNAME LearnerSelection>
Size gum::FMDPLearner< VariableAttributeSelection, RewardAttributeSelection, LearnerSelection >::size ( )
virtual

learnerSize

Returns

Implements gum::ILearningStrategy.

Definition at line 192 of file fmdpLearner_tpl.h.

References gum::FMDPLearner< VariableAttributeSelection, RewardAttributeSelection, LearnerSelection >::__actionLearners, gum::FMDPLearner< VariableAttributeSelection, RewardAttributeSelection, LearnerSelection >::__actionReward, gum::FMDPLearner< VariableAttributeSelection, RewardAttributeSelection, LearnerSelection >::__actionRewardLearners, gum::FMDPLearner< VariableAttributeSelection, RewardAttributeSelection, LearnerSelection >::__fmdp, gum::FMDPLearner< VariableAttributeSelection, RewardAttributeSelection, LearnerSelection >::__rewardLearner, gum::FMDP< GUM_SCALAR >::beginActions(), gum::FMDP< GUM_SCALAR >::beginVariables(), gum::FMDP< GUM_SCALAR >::endActions(), and gum::FMDP< GUM_SCALAR >::endVariables().

Referenced by gum::FMDPLearner< VariableAttributeSelection, RewardAttributeSelection, LearnerSelection >::__instantiateRewardLearner().

192  {
193  Size s = 0;
194  for (SequenceIteratorSafe< Idx > actionIter = __fmdp->beginActions();
195  actionIter != __fmdp->endActions();
196  ++actionIter) {
197  for (SequenceIteratorSafe< const DiscreteVariable* > varIter =
199  varIter != __fmdp->endVariables();
200  ++varIter)
201  s += __actionLearners[*actionIter]
202  ->getWithDefault(*varIter, nullptr)
203  ->size();
204  if (__actionReward) s += __actionRewardLearners[*actionIter]->size();
205  }
206 
207  if (!__actionReward) s += __rewardLearner->size();
208 
209  return s;
210  }
SequenceIteratorSafe< Idx > beginActions() const
Returns an iterator reference to he beginning of the list of actions.
Definition: fmdp.h:137
HashTable< Idx, VarLearnerTable *> __actionLearners
Definition: fmdpLearner.h:252
SequenceIteratorSafe< const DiscreteVariable *> beginVariables() const
Returns an iterator reference to he beginning of the list of variables.
Definition: fmdp.h:95
FMDP< double > * __fmdp
The FMDP to store the learned model.
Definition: fmdpLearner.h:250
HashTable< Idx, RewardLearnerType *> __actionRewardLearners
Definition: fmdpLearner.h:255
SequenceIteratorSafe< Idx > endActions() const
Returns an iterator reference to the end of the list of actions.
Definition: fmdp.h:144
SequenceIteratorSafe< const DiscreteVariable *> endVariables() const
Returns an iterator reference to the end of the list of variables.
Definition: fmdp.h:102
std::size_t Size
In aGrUM, hashed values are unsigned long int.
Definition: types.h:48
RewardLearnerType * __rewardLearner
Definition: fmdpLearner.h:256
+ Here is the call graph for this function:
+ Here is the caller graph for this function:

◆ updateFMDP()

template<TESTNAME VariableAttributeSelection, TESTNAME RewardAttributeSelection, LEARNERNAME LearnerSelection>
void gum::FMDPLearner< VariableAttributeSelection, RewardAttributeSelection, LearnerSelection >::updateFMDP ( )
virtual

Starts an update of datastructure in the associated FMDP.

Implements gum::ILearningStrategy.

Definition at line 221 of file fmdpLearner_tpl.h.

References gum::FMDPLearner< VariableAttributeSelection, RewardAttributeSelection, LearnerSelection >::__actionLearners, gum::FMDPLearner< VariableAttributeSelection, RewardAttributeSelection, LearnerSelection >::__actionReward, gum::FMDPLearner< VariableAttributeSelection, RewardAttributeSelection, LearnerSelection >::__actionRewardLearners, gum::FMDPLearner< VariableAttributeSelection, RewardAttributeSelection, LearnerSelection >::__fmdp, gum::FMDPLearner< VariableAttributeSelection, RewardAttributeSelection, LearnerSelection >::__rewardLearner, gum::FMDP< GUM_SCALAR >::beginActions(), gum::FMDP< GUM_SCALAR >::beginVariables(), gum::FMDP< GUM_SCALAR >::endActions(), and gum::FMDP< GUM_SCALAR >::endVariables().

Referenced by gum::FMDPLearner< VariableAttributeSelection, RewardAttributeSelection, LearnerSelection >::__instantiateRewardLearner().

221  {
222  for (SequenceIteratorSafe< Idx > actionIter = __fmdp->beginActions();
223  actionIter != __fmdp->endActions();
224  ++actionIter) {
225  for (SequenceIteratorSafe< const DiscreteVariable* > varIter =
227  varIter != __fmdp->endVariables();
228  ++varIter)
229  __actionLearners[*actionIter]
230  ->getWithDefault(*varIter, nullptr)
231  ->updateFunctionGraph();
232  if (__actionReward)
233  __actionRewardLearners[*actionIter]->updateFunctionGraph();
234  }
235 
236  if (!__actionReward) __rewardLearner->updateFunctionGraph();
237  }
SequenceIteratorSafe< Idx > beginActions() const
Returns an iterator reference to he beginning of the list of actions.
Definition: fmdp.h:137
HashTable< Idx, VarLearnerTable *> __actionLearners
Definition: fmdpLearner.h:252
SequenceIteratorSafe< const DiscreteVariable *> beginVariables() const
Returns an iterator reference to he beginning of the list of variables.
Definition: fmdp.h:95
FMDP< double > * __fmdp
The FMDP to store the learned model.
Definition: fmdpLearner.h:250
HashTable< Idx, RewardLearnerType *> __actionRewardLearners
Definition: fmdpLearner.h:255
SequenceIteratorSafe< Idx > endActions() const
Returns an iterator reference to the end of the list of actions.
Definition: fmdp.h:144
SequenceIteratorSafe< const DiscreteVariable *> endVariables() const
Returns an iterator reference to the end of the list of variables.
Definition: fmdp.h:102
RewardLearnerType * __rewardLearner
Definition: fmdpLearner.h:256
+ Here is the call graph for this function:
+ Here is the caller graph for this function:

◆ varLearner()

template<TESTNAME VariableAttributeSelection, TESTNAME RewardAttributeSelection, LEARNERNAME LearnerSelection>
const IVisitableGraphLearner* gum::FMDPLearner< VariableAttributeSelection, RewardAttributeSelection, LearnerSelection >::varLearner ( Idx  actionId,
const DiscreteVariable var 
) const
inlinevirtual

extractCount

Implements gum::ILearningStrategy.

Definition at line 229 of file fmdpLearner.h.

References gum::FMDPLearner< VariableAttributeSelection, RewardAttributeSelection, LearnerSelection >::__actionLearners.

230  {
231  return __actionLearners[actionId]->getWithDefault(var, nullptr);
232  }
HashTable< Idx, VarLearnerTable *> __actionLearners
Definition: fmdpLearner.h:252

Member Data Documentation

◆ __actionLearners

◆ __actionReward

◆ __actionRewardLearners

◆ __fmdp

◆ __learningThreshold

template<TESTNAME VariableAttributeSelection, TESTNAME RewardAttributeSelection, LEARNERNAME LearnerSelection>
const double gum::FMDPLearner< VariableAttributeSelection, RewardAttributeSelection, LearnerSelection >::__learningThreshold
private

◆ __modaMax

template<TESTNAME VariableAttributeSelection, TESTNAME RewardAttributeSelection, LEARNERNAME LearnerSelection>
double gum::FMDPLearner< VariableAttributeSelection, RewardAttributeSelection, LearnerSelection >::__modaMax
private

◆ __rewardLearner

◆ __rmax

template<TESTNAME VariableAttributeSelection, TESTNAME RewardAttributeSelection, LEARNERNAME LearnerSelection>
double gum::FMDPLearner< VariableAttributeSelection, RewardAttributeSelection, LearnerSelection >::__rmax
private

◆ __similarityThreshold

template<TESTNAME VariableAttributeSelection, TESTNAME RewardAttributeSelection, LEARNERNAME LearnerSelection>
const double gum::FMDPLearner< VariableAttributeSelection, RewardAttributeSelection, LearnerSelection >::__similarityThreshold
private

The documentation for this class was generated from the following files: