aGrUM  0.14.2
gum::FMDPLearner< VariableAttributeSelection, RewardAttributeSelection, LearnerSelection > Class Template Reference

#include <agrum/FMDP/learning/fmdpLearner.h>

+ Inheritance diagram for gum::FMDPLearner< VariableAttributeSelection, RewardAttributeSelection, LearnerSelection >:
+ Collaboration diagram for gum::FMDPLearner< VariableAttributeSelection, RewardAttributeSelection, LearnerSelection >:

Public Member Functions

Constructor & destructor.
 FMDPLearner (double learningThreshold, bool actionReward, double similarityThreshold=0.05)
 Default constructor. More...
 
 ~FMDPLearner ()
 Default destructor. More...
 
Initialization
void initialize (FMDP< double > *fmdp)
 Initializes the learner. More...
 
MultiDimFunctionGraph< double > * __instantiateFunctionGraph ()
 Initializes the learner. More...
 
MultiDimFunctionGraph< double > * __instantiateFunctionGraph (Int2Type< IMDDILEARNER >)
 Initializes the learner. More...
 
MultiDimFunctionGraph< double > * __instantiateFunctionGraph (Int2Type< ITILEARNER >)
 Initializes the learner. More...
 
VariableLearnerType__instantiateVarLearner (MultiDimFunctionGraph< double > *target, Set< const DiscreteVariable * > &mainVariables, const DiscreteVariable *learnedVar)
 Initializes the learner. More...
 
VariableLearnerType__instantiateVarLearner (MultiDimFunctionGraph< double > *target, Set< const DiscreteVariable * > &mainVariables, const DiscreteVariable *learnedVar, Int2Type< IMDDILEARNER >)
 Initializes the learner. More...
 
VariableLearnerType__instantiateVarLearner (MultiDimFunctionGraph< double > *target, Set< const DiscreteVariable * > &mainVariables, const DiscreteVariable *learnedVar, Int2Type< ITILEARNER >)
 Initializes the learner. More...
 
RewardLearnerType__instantiateRewardLearner (MultiDimFunctionGraph< double > *target, Set< const DiscreteVariable * > &mainVariables)
 Initializes the learner. More...
 
RewardLearnerType__instantiateRewardLearner (MultiDimFunctionGraph< double > *target, Set< const DiscreteVariable * > &mainVariables, Int2Type< IMDDILEARNER >)
 Initializes the learner. More...
 
RewardLearnerType__instantiateRewardLearner (MultiDimFunctionGraph< double > *target, Set< const DiscreteVariable * > &mainVariables, Int2Type< ITILEARNER >)
 Initializes the learner. More...
 
Incremental methods
bool addObservation (Idx actionId, const Observation *obs)
 Gives to the learner a new transition. More...
 
void updateFMDP ()
 Starts an update of datastructure in the associated FMDP. More...
 

Miscelleanous methods

double __rmax
 learnerSize More...
 
double __modaMax
 learnerSize More...
 
Size size ()
 learnerSize More...
 
const IVisitableGraphLearnervarLearner (Idx actionId, const DiscreteVariable *var) const
 extractCount More...
 
virtual double rMax () const
 learnerSize More...
 
virtual double modaMax () const
 learnerSize More...
 

Detailed Description

template<TESTNAME VariableAttributeSelection, TESTNAME RewardAttributeSelection, LEARNERNAME LearnerSelection>
class gum::FMDPLearner< VariableAttributeSelection, RewardAttributeSelection, LearnerSelection >

Definition at line 57 of file fmdpLearner.h.

Member Typedef Documentation

◆ RewardLearnerType

template<TESTNAME VariableAttributeSelection, TESTNAME RewardAttributeSelection, LEARNERNAME LearnerSelection>
typedef LearnerSelect< LearnerSelection, IMDDI< RewardAttributeSelection, true >, ITI< RewardAttributeSelection, true > >::type gum::FMDPLearner< VariableAttributeSelection, RewardAttributeSelection, LearnerSelection >::RewardLearnerType
private

Definition at line 67 of file fmdpLearner.h.

◆ VariableLearnerType

template<TESTNAME VariableAttributeSelection, TESTNAME RewardAttributeSelection, LEARNERNAME LearnerSelection>
typedef LearnerSelect< LearnerSelection, IMDDI< VariableAttributeSelection, false >, ITI< VariableAttributeSelection, false > >::type gum::FMDPLearner< VariableAttributeSelection, RewardAttributeSelection, LearnerSelection >::VariableLearnerType
private

Definition at line 62 of file fmdpLearner.h.

◆ VarLearnerTable

template<TESTNAME VariableAttributeSelection, TESTNAME RewardAttributeSelection, LEARNERNAME LearnerSelection>
typedef HashTable< const DiscreteVariable*, VariableLearnerType* > gum::FMDPLearner< VariableAttributeSelection, RewardAttributeSelection, LearnerSelection >::VarLearnerTable
private

Definition at line 70 of file fmdpLearner.h.

Constructor & Destructor Documentation

◆ FMDPLearner()

template<TESTNAME VariableAttributeSelection, TESTNAME RewardAttributeSelection, LEARNERNAME LearnerSelection>
gum::FMDPLearner< VariableAttributeSelection, RewardAttributeSelection, LearnerSelection >::FMDPLearner ( double  learningThreshold,
bool  actionReward,
double  similarityThreshold = 0.05 
)

Default constructor.

Definition at line 45 of file fmdpLearner_tpl.h.

References gum::FMDPLearner< VariableAttributeSelection, RewardAttributeSelection, LearnerSelection >::__rewardLearner.

47  :
48  __actionReward(actionReward),
50  GUM_CONSTRUCTOR(FMDPLearner);
51  __rewardLearner = nullptr;
52  }
FMDPLearner(double learningThreshold, bool actionReward, double similarityThreshold=0.05)
Default constructor.
const double __learningThreshold
Definition: fmdpLearner.h:255
RewardLearnerType * __rewardLearner
Definition: fmdpLearner.h:253
const double __similarityThreshold
Definition: fmdpLearner.h:256

◆ ~FMDPLearner()

template<TESTNAME VariableAttributeSelection, TESTNAME RewardAttributeSelection, LEARNERNAME LearnerSelection>
gum::FMDPLearner< VariableAttributeSelection, RewardAttributeSelection, LearnerSelection >::~FMDPLearner ( )

Default destructor.

Definition at line 63 of file fmdpLearner_tpl.h.

References gum::FMDPLearner< VariableAttributeSelection, RewardAttributeSelection, LearnerSelection >::__actionLearners, gum::FMDPLearner< VariableAttributeSelection, RewardAttributeSelection, LearnerSelection >::__actionRewardLearners, and gum::FMDPLearner< VariableAttributeSelection, RewardAttributeSelection, LearnerSelection >::__rewardLearner.

63  {
64  for (auto actionIter = __actionLearners.beginSafe();
65  actionIter != __actionLearners.endSafe();
66  ++actionIter) {
67  for (auto learnerIter = actionIter.val()->beginSafe();
68  learnerIter != actionIter.val()->endSafe();
69  ++learnerIter)
70  delete learnerIter.val();
71  delete actionIter.val();
72  if (__actionRewardLearners.exists(actionIter.key()))
73  delete __actionRewardLearners[actionIter.key()];
74  }
75 
76  if (__rewardLearner) delete __rewardLearner;
77 
78  GUM_DESTRUCTOR(FMDPLearner);
79  }
HashTable< Idx, VarLearnerTable *> __actionLearners
Definition: fmdpLearner.h:249
FMDPLearner(double learningThreshold, bool actionReward, double similarityThreshold=0.05)
Default constructor.
HashTable< Idx, RewardLearnerType *> __actionRewardLearners
Definition: fmdpLearner.h:252
RewardLearnerType * __rewardLearner
Definition: fmdpLearner.h:253

Member Function Documentation

◆ __instantiateFunctionGraph() [1/3]

template<TESTNAME VariableAttributeSelection, TESTNAME RewardAttributeSelection, LEARNERNAME LearnerSelection>
MultiDimFunctionGraph< double >* gum::FMDPLearner< VariableAttributeSelection, RewardAttributeSelection, LearnerSelection >::__instantiateFunctionGraph ( )
inline

Initializes the learner.

Definition at line 105 of file fmdpLearner.h.

Referenced by gum::FMDPLearner< VariableAttributeSelection, RewardAttributeSelection, LearnerSelection >::initialize().

105  {
106  return __instantiateFunctionGraph(Int2Type< LearnerSelection >());
107  }
MultiDimFunctionGraph< double > * __instantiateFunctionGraph()
Initializes the learner.
Definition: fmdpLearner.h:105
+ Here is the caller graph for this function:

◆ __instantiateFunctionGraph() [2/3]

template<TESTNAME VariableAttributeSelection, TESTNAME RewardAttributeSelection, LEARNERNAME LearnerSelection>
MultiDimFunctionGraph< double >* gum::FMDPLearner< VariableAttributeSelection, RewardAttributeSelection, LearnerSelection >::__instantiateFunctionGraph ( Int2Type< IMDDILEARNER )
inline

Initializes the learner.

Definition at line 110 of file fmdpLearner.h.

References gum::MultiDimFunctionGraph< GUM_SCALAR, TerminalNodePolicy >::getReducedAndOrderedInstance().

110  {
113  }
static MultiDimFunctionGraph< GUM_SCALAR, TerminalNodePolicy > * getReducedAndOrderedInstance()
Returns a reduced and ordered instance.
+ Here is the call graph for this function:

◆ __instantiateFunctionGraph() [3/3]

template<TESTNAME VariableAttributeSelection, TESTNAME RewardAttributeSelection, LEARNERNAME LearnerSelection>
MultiDimFunctionGraph< double >* gum::FMDPLearner< VariableAttributeSelection, RewardAttributeSelection, LearnerSelection >::__instantiateFunctionGraph ( Int2Type< ITILEARNER )
inline

Initializes the learner.

Definition at line 116 of file fmdpLearner.h.

116  {
117  return MultiDimFunctionGraph< double,
118  ExactTerminalNodePolicy >::getTreeInstance();
119  }

◆ __instantiateRewardLearner() [1/3]

template<TESTNAME VariableAttributeSelection, TESTNAME RewardAttributeSelection, LEARNERNAME LearnerSelection>
RewardLearnerType* gum::FMDPLearner< VariableAttributeSelection, RewardAttributeSelection, LearnerSelection >::__instantiateRewardLearner ( MultiDimFunctionGraph< double > *  target,
Set< const DiscreteVariable * > &  mainVariables 
)
inline

Initializes the learner.

Definition at line 159 of file fmdpLearner.h.

Referenced by gum::FMDPLearner< VariableAttributeSelection, RewardAttributeSelection, LearnerSelection >::initialize().

160  {
162  target, mainVariables, Int2Type< LearnerSelection >());
163  }
RewardLearnerType * __instantiateRewardLearner(MultiDimFunctionGraph< double > *target, Set< const DiscreteVariable * > &mainVariables)
Initializes the learner.
Definition: fmdpLearner.h:159
+ Here is the caller graph for this function:

◆ __instantiateRewardLearner() [2/3]

template<TESTNAME VariableAttributeSelection, TESTNAME RewardAttributeSelection, LEARNERNAME LearnerSelection>
RewardLearnerType* gum::FMDPLearner< VariableAttributeSelection, RewardAttributeSelection, LearnerSelection >::__instantiateRewardLearner ( MultiDimFunctionGraph< double > *  target,
Set< const DiscreteVariable * > &  mainVariables,
Int2Type< IMDDILEARNER  
)
inline

Initializes the learner.

Definition at line 166 of file fmdpLearner.h.

References gum::FMDPLearner< VariableAttributeSelection, RewardAttributeSelection, LearnerSelection >::__learningThreshold, and gum::FMDPLearner< VariableAttributeSelection, RewardAttributeSelection, LearnerSelection >::__similarityThreshold.

168  {
169  return new RewardLearnerType(
170  target, __learningThreshold, __similarityThreshold, mainVariables);
171  }
const double __learningThreshold
Definition: fmdpLearner.h:255
LearnerSelect< LearnerSelection, IMDDI< RewardAttributeSelection, true >, ITI< RewardAttributeSelection, true > >::type RewardLearnerType
Definition: fmdpLearner.h:67
const double __similarityThreshold
Definition: fmdpLearner.h:256

◆ __instantiateRewardLearner() [3/3]

template<TESTNAME VariableAttributeSelection, TESTNAME RewardAttributeSelection, LEARNERNAME LearnerSelection>
RewardLearnerType* gum::FMDPLearner< VariableAttributeSelection, RewardAttributeSelection, LearnerSelection >::__instantiateRewardLearner ( MultiDimFunctionGraph< double > *  target,
Set< const DiscreteVariable * > &  mainVariables,
Int2Type< ITILEARNER  
)
inline

Initializes the learner.

Definition at line 174 of file fmdpLearner.h.

References gum::FMDPLearner< VariableAttributeSelection, RewardAttributeSelection, LearnerSelection >::__learningThreshold, gum::FMDPLearner< VariableAttributeSelection, RewardAttributeSelection, LearnerSelection >::addObservation(), gum::FMDPLearner< VariableAttributeSelection, RewardAttributeSelection, LearnerSelection >::size(), and gum::FMDPLearner< VariableAttributeSelection, RewardAttributeSelection, LearnerSelection >::updateFMDP().

176  {
177  return new RewardLearnerType(target, __learningThreshold, mainVariables);
178  }
const double __learningThreshold
Definition: fmdpLearner.h:255
LearnerSelect< LearnerSelection, IMDDI< RewardAttributeSelection, true >, ITI< RewardAttributeSelection, true > >::type RewardLearnerType
Definition: fmdpLearner.h:67
+ Here is the call graph for this function:

◆ __instantiateVarLearner() [1/3]

template<TESTNAME VariableAttributeSelection, TESTNAME RewardAttributeSelection, LEARNERNAME LearnerSelection>
VariableLearnerType* gum::FMDPLearner< VariableAttributeSelection, RewardAttributeSelection, LearnerSelection >::__instantiateVarLearner ( MultiDimFunctionGraph< double > *  target,
Set< const DiscreteVariable * > &  mainVariables,
const DiscreteVariable learnedVar 
)
inline

Initializes the learner.

Definition at line 126 of file fmdpLearner.h.

Referenced by gum::FMDPLearner< VariableAttributeSelection, RewardAttributeSelection, LearnerSelection >::initialize().

128  {
130  target, mainVariables, learnedVar, Int2Type< LearnerSelection >());
131  }
VariableLearnerType * __instantiateVarLearner(MultiDimFunctionGraph< double > *target, Set< const DiscreteVariable * > &mainVariables, const DiscreteVariable *learnedVar)
Initializes the learner.
Definition: fmdpLearner.h:126
+ Here is the caller graph for this function:

◆ __instantiateVarLearner() [2/3]

template<TESTNAME VariableAttributeSelection, TESTNAME RewardAttributeSelection, LEARNERNAME LearnerSelection>
VariableLearnerType* gum::FMDPLearner< VariableAttributeSelection, RewardAttributeSelection, LearnerSelection >::__instantiateVarLearner ( MultiDimFunctionGraph< double > *  target,
Set< const DiscreteVariable * > &  mainVariables,
const DiscreteVariable learnedVar,
Int2Type< IMDDILEARNER  
)
inline

Initializes the learner.

Definition at line 134 of file fmdpLearner.h.

References gum::FMDPLearner< VariableAttributeSelection, RewardAttributeSelection, LearnerSelection >::__learningThreshold, and gum::FMDPLearner< VariableAttributeSelection, RewardAttributeSelection, LearnerSelection >::__similarityThreshold.

137  {
138  return new VariableLearnerType(target,
141  mainVariables,
142  learnedVar);
143  }
LearnerSelect< LearnerSelection, IMDDI< VariableAttributeSelection, false >, ITI< VariableAttributeSelection, false > >::type VariableLearnerType
Definition: fmdpLearner.h:62
const double __learningThreshold
Definition: fmdpLearner.h:255
const double __similarityThreshold
Definition: fmdpLearner.h:256

◆ __instantiateVarLearner() [3/3]

template<TESTNAME VariableAttributeSelection, TESTNAME RewardAttributeSelection, LEARNERNAME LearnerSelection>
VariableLearnerType* gum::FMDPLearner< VariableAttributeSelection, RewardAttributeSelection, LearnerSelection >::__instantiateVarLearner ( MultiDimFunctionGraph< double > *  target,
Set< const DiscreteVariable * > &  mainVariables,
const DiscreteVariable learnedVar,
Int2Type< ITILEARNER  
)
inline

Initializes the learner.

Definition at line 146 of file fmdpLearner.h.

References gum::FMDPLearner< VariableAttributeSelection, RewardAttributeSelection, LearnerSelection >::__learningThreshold.

149  {
150  return new VariableLearnerType(
151  target, __learningThreshold, mainVariables, learnedVar);
152  }
LearnerSelect< LearnerSelection, IMDDI< VariableAttributeSelection, false >, ITI< VariableAttributeSelection, false > >::type VariableLearnerType
Definition: fmdpLearner.h:62
const double __learningThreshold
Definition: fmdpLearner.h:255

◆ addObservation()

template<TESTNAME VariableAttributeSelection, TESTNAME RewardAttributeSelection, LEARNERNAME LearnerSelection>
bool gum::FMDPLearner< VariableAttributeSelection, RewardAttributeSelection, LearnerSelection >::addObservation ( Idx  actionId,
const Observation obs 
)
virtual

Gives to the learner a new transition.

Parameters
actionId: the action on which the transition was made
obs: the observed transition
Returns
true if learning this transition implies structural changes (can trigger a new planning)

Implements gum::ILearningStrategy.

Definition at line 155 of file fmdpLearner_tpl.h.

References gum::FMDPLearner< VariableAttributeSelection, RewardAttributeSelection, LearnerSelection >::__actionLearners, gum::FMDPLearner< VariableAttributeSelection, RewardAttributeSelection, LearnerSelection >::__actionReward, gum::FMDPLearner< VariableAttributeSelection, RewardAttributeSelection, LearnerSelection >::__actionRewardLearners, gum::FMDPLearner< VariableAttributeSelection, RewardAttributeSelection, LearnerSelection >::__fmdp, gum::FMDPLearner< VariableAttributeSelection, RewardAttributeSelection, LearnerSelection >::__rewardLearner, gum::FMDPLearner< VariableAttributeSelection, RewardAttributeSelection, LearnerSelection >::__rmax, gum::FMDP< GUM_SCALAR >::beginVariables(), gum::FMDP< GUM_SCALAR >::endVariables(), and gum::Observation::reward().

Referenced by gum::FMDPLearner< VariableAttributeSelection, RewardAttributeSelection, LearnerSelection >::__instantiateRewardLearner().

156  {
157  for (SequenceIteratorSafe< const DiscreteVariable* > varIter =
159  varIter != __fmdp->endVariables();
160  ++varIter) {
161  __actionLearners[actionId]
162  ->getWithDefault(*varIter, nullptr)
163  ->addObservation(newObs);
164  __actionLearners[actionId]->getWithDefault(*varIter, nullptr)->updateGraph();
165  }
166 
167  if (__actionReward) {
168  __actionRewardLearners[actionId]->addObservation(newObs);
169  __actionRewardLearners[actionId]->updateGraph();
170  } else {
171  __rewardLearner->addObservation(newObs);
172  __rewardLearner->updateGraph();
173  }
174 
175  __rmax =
176  __rmax < std::abs(newObs->reward()) ? std::abs(newObs->reward()) : __rmax;
177 
178  return false;
179  }
HashTable< Idx, VarLearnerTable *> __actionLearners
Definition: fmdpLearner.h:249
SequenceIteratorSafe< const DiscreteVariable *> beginVariables() const
Returns an iterator reference to he beginning of the list of variables.
Definition: fmdp.h:92
double __rmax
learnerSize
Definition: fmdpLearner.h:234
FMDP< double > * __fmdp
The FMDP to store the learned model.
Definition: fmdpLearner.h:247
HashTable< Idx, RewardLearnerType *> __actionRewardLearners
Definition: fmdpLearner.h:252
SequenceIteratorSafe< const DiscreteVariable *> endVariables() const
Returns an iterator reference to the end of the list of variables.
Definition: fmdp.h:99
RewardLearnerType * __rewardLearner
Definition: fmdpLearner.h:253
+ Here is the call graph for this function:
+ Here is the caller graph for this function:

◆ initialize()

template<TESTNAME VariableAttributeSelection, TESTNAME RewardAttributeSelection, LEARNERNAME LearnerSelection>
void gum::FMDPLearner< VariableAttributeSelection, RewardAttributeSelection, LearnerSelection >::initialize ( FMDP< double > *  fmdp)
virtual

Initializes the learner.

Implements gum::ILearningStrategy.

Definition at line 94 of file fmdpLearner_tpl.h.

References gum::FMDPLearner< VariableAttributeSelection, RewardAttributeSelection, LearnerSelection >::__actionLearners, gum::FMDPLearner< VariableAttributeSelection, RewardAttributeSelection, LearnerSelection >::__actionReward, gum::FMDPLearner< VariableAttributeSelection, RewardAttributeSelection, LearnerSelection >::__actionRewardLearners, gum::FMDPLearner< VariableAttributeSelection, RewardAttributeSelection, LearnerSelection >::__fmdp, gum::FMDPLearner< VariableAttributeSelection, RewardAttributeSelection, LearnerSelection >::__instantiateFunctionGraph(), gum::FMDPLearner< VariableAttributeSelection, RewardAttributeSelection, LearnerSelection >::__instantiateRewardLearner(), gum::FMDPLearner< VariableAttributeSelection, RewardAttributeSelection, LearnerSelection >::__instantiateVarLearner(), gum::FMDPLearner< VariableAttributeSelection, RewardAttributeSelection, LearnerSelection >::__modaMax, gum::FMDPLearner< VariableAttributeSelection, RewardAttributeSelection, LearnerSelection >::__rewardLearner, gum::FMDPLearner< VariableAttributeSelection, RewardAttributeSelection, LearnerSelection >::__rmax, gum::FMDP< GUM_SCALAR >::actionName(), gum::FMDP< GUM_SCALAR >::addReward(), gum::FMDP< GUM_SCALAR >::addRewardForAction(), gum::FMDP< GUM_SCALAR >::addTransitionForAction(), gum::FMDP< GUM_SCALAR >::beginActions(), gum::FMDP< GUM_SCALAR >::beginVariables(), gum::FMDP< GUM_SCALAR >::endActions(), gum::FMDP< GUM_SCALAR >::endVariables(), gum::Set< Key, Alloc >::insert(), gum::FMDP< GUM_SCALAR >::main2prime(), and gum::MultiDimFunctionGraph< GUM_SCALAR, TerminalNodePolicy >::setTableName().

94  {
95  __fmdp = fmdp;
96 
97  __modaMax = 0;
98  __rmax = 0.0;
99 
100  Set< const DiscreteVariable* > mainVariables;
101  for (auto varIter = __fmdp->beginVariables();
102  varIter != __fmdp->endVariables();
103  ++varIter) {
104  mainVariables.insert(*varIter);
105  __modaMax = __modaMax < (*varIter)->domainSize() ? (*varIter)->domainSize()
106  : __modaMax;
107  }
108 
109  for (auto actionIter = __fmdp->beginActions();
110  actionIter != __fmdp->endActions();
111  ++actionIter) {
112  // Adding a Hashtable for the action
113  __actionLearners.insert(*actionIter, new VarLearnerTable());
114 
115  // Adding a learner for each variable
116  for (auto varIter = __fmdp->beginVariables();
117  varIter != __fmdp->endVariables();
118  ++varIter) {
120  varTrans->setTableName("ACTION : " + __fmdp->actionName(*actionIter)
121  + " - VARIABLE : " + (*varIter)->name());
122  __fmdp->addTransitionForAction(*actionIter, *varIter, varTrans);
123  __actionLearners[*actionIter]->insert(
124  (*varIter),
126  varTrans, mainVariables, __fmdp->main2prime(*varIter)));
127  }
128 
129  if (__actionReward) {
131  reward->setTableName("REWARD - ACTION : "
132  + __fmdp->actionName(*actionIter));
133  __fmdp->addRewardForAction(*actionIter, reward);
134  __actionRewardLearners.insert(
135  *actionIter, __instantiateRewardLearner(reward, mainVariables));
136  }
137  }
138 
139  if (!__actionReward) {
141  reward->setTableName("REWARD");
142  __fmdp->addReward(reward);
143  __rewardLearner = __instantiateRewardLearner(reward, mainVariables);
144  }
145  }
MultiDimFunctionGraph< double > * __instantiateFunctionGraph()
Initializes the learner.
Definition: fmdpLearner.h:105
SequenceIteratorSafe< Idx > beginActions() const
Returns an iterator reference to he beginning of the list of actions.
Definition: fmdp.h:134
void addReward(const MultiDimImplementation< GUM_SCALAR > *reward)
Adds a default variable reward.
Definition: fmdp.h:225
const std::string & actionName(Idx actionId) const
Returns name of action given in parameter.
Definition: fmdp_tpl.h:344
HashTable< Idx, VarLearnerTable *> __actionLearners
Definition: fmdpLearner.h:249
SequenceIteratorSafe< const DiscreteVariable *> beginVariables() const
Returns an iterator reference to he beginning of the list of variables.
Definition: fmdp.h:92
double __rmax
learnerSize
Definition: fmdpLearner.h:234
FMDP< double > * __fmdp
The FMDP to store the learned model.
Definition: fmdpLearner.h:247
RewardLearnerType * __instantiateRewardLearner(MultiDimFunctionGraph< double > *target, Set< const DiscreteVariable * > &mainVariables)
Initializes the learner.
Definition: fmdpLearner.h:159
HashTable< const DiscreteVariable *, VariableLearnerType *> VarLearnerTable
Definition: fmdpLearner.h:70
void addTransitionForAction(Idx actionId, const DiscreteVariable *var, const MultiDimImplementation< GUM_SCALAR > *transition)
Adds a variable transition table to specified action.
Definition: fmdp_tpl.h:195
HashTable< Idx, RewardLearnerType *> __actionRewardLearners
Definition: fmdpLearner.h:252
void addRewardForAction(Idx actionId, const MultiDimImplementation< GUM_SCALAR > *reward)
Adds a default variable reward.
Definition: fmdp_tpl.h:299
const DiscreteVariable * main2prime(const DiscreteVariable *mainVar) const
Returns the primed variable associate to the given main variable.
Definition: fmdp.h:106
SequenceIteratorSafe< Idx > endActions() const
Returns an iterator reference to the end of the list of actions.
Definition: fmdp.h:141
VariableLearnerType * __instantiateVarLearner(MultiDimFunctionGraph< double > *target, Set< const DiscreteVariable * > &mainVariables, const DiscreteVariable *learnedVar)
Initializes the learner.
Definition: fmdpLearner.h:126
double __modaMax
learnerSize
Definition: fmdpLearner.h:240
SequenceIteratorSafe< const DiscreteVariable *> endVariables() const
Returns an iterator reference to the end of the list of variables.
Definition: fmdp.h:99
RewardLearnerType * __rewardLearner
Definition: fmdpLearner.h:253
void setTableName(const std::string &name)
Sets the name of the table represented by this structure.
+ Here is the call graph for this function:

◆ modaMax()

template<TESTNAME VariableAttributeSelection, TESTNAME RewardAttributeSelection, LEARNERNAME LearnerSelection>
virtual double gum::FMDPLearner< VariableAttributeSelection, RewardAttributeSelection, LearnerSelection >::modaMax ( ) const
inlinevirtual

learnerSize

Returns

Implements gum::ILearningStrategy.

Definition at line 237 of file fmdpLearner.h.

References gum::FMDPLearner< VariableAttributeSelection, RewardAttributeSelection, LearnerSelection >::__modaMax.

237 { return __modaMax; }
double __modaMax
learnerSize
Definition: fmdpLearner.h:240

◆ rMax()

template<TESTNAME VariableAttributeSelection, TESTNAME RewardAttributeSelection, LEARNERNAME LearnerSelection>
virtual double gum::FMDPLearner< VariableAttributeSelection, RewardAttributeSelection, LearnerSelection >::rMax ( ) const
inlinevirtual

learnerSize

Returns

Implements gum::ILearningStrategy.

Definition at line 231 of file fmdpLearner.h.

References gum::FMDPLearner< VariableAttributeSelection, RewardAttributeSelection, LearnerSelection >::__rmax.

231 { return __rmax; }
double __rmax
learnerSize
Definition: fmdpLearner.h:234

◆ size()

template<TESTNAME VariableAttributeSelection, TESTNAME RewardAttributeSelection, LEARNERNAME LearnerSelection>
Size gum::FMDPLearner< VariableAttributeSelection, RewardAttributeSelection, LearnerSelection >::size ( )
virtual

learnerSize

Returns

Implements gum::ILearningStrategy.

Definition at line 189 of file fmdpLearner_tpl.h.

References gum::FMDPLearner< VariableAttributeSelection, RewardAttributeSelection, LearnerSelection >::__actionLearners, gum::FMDPLearner< VariableAttributeSelection, RewardAttributeSelection, LearnerSelection >::__actionReward, gum::FMDPLearner< VariableAttributeSelection, RewardAttributeSelection, LearnerSelection >::__actionRewardLearners, gum::FMDPLearner< VariableAttributeSelection, RewardAttributeSelection, LearnerSelection >::__fmdp, gum::FMDPLearner< VariableAttributeSelection, RewardAttributeSelection, LearnerSelection >::__rewardLearner, gum::FMDP< GUM_SCALAR >::beginActions(), gum::FMDP< GUM_SCALAR >::beginVariables(), gum::FMDP< GUM_SCALAR >::endActions(), and gum::FMDP< GUM_SCALAR >::endVariables().

Referenced by gum::FMDPLearner< VariableAttributeSelection, RewardAttributeSelection, LearnerSelection >::__instantiateRewardLearner().

189  {
190  Size s = 0;
191  for (SequenceIteratorSafe< Idx > actionIter = __fmdp->beginActions();
192  actionIter != __fmdp->endActions();
193  ++actionIter) {
194  for (SequenceIteratorSafe< const DiscreteVariable* > varIter =
196  varIter != __fmdp->endVariables();
197  ++varIter)
198  s += __actionLearners[*actionIter]
199  ->getWithDefault(*varIter, nullptr)
200  ->size();
201  if (__actionReward) s += __actionRewardLearners[*actionIter]->size();
202  }
203 
204  if (!__actionReward) s += __rewardLearner->size();
205 
206  return s;
207  }
SequenceIteratorSafe< Idx > beginActions() const
Returns an iterator reference to he beginning of the list of actions.
Definition: fmdp.h:134
HashTable< Idx, VarLearnerTable *> __actionLearners
Definition: fmdpLearner.h:249
SequenceIteratorSafe< const DiscreteVariable *> beginVariables() const
Returns an iterator reference to he beginning of the list of variables.
Definition: fmdp.h:92
FMDP< double > * __fmdp
The FMDP to store the learned model.
Definition: fmdpLearner.h:247
HashTable< Idx, RewardLearnerType *> __actionRewardLearners
Definition: fmdpLearner.h:252
SequenceIteratorSafe< Idx > endActions() const
Returns an iterator reference to the end of the list of actions.
Definition: fmdp.h:141
SequenceIteratorSafe< const DiscreteVariable *> endVariables() const
Returns an iterator reference to the end of the list of variables.
Definition: fmdp.h:99
std::size_t Size
In aGrUM, hashed values are unsigned long int.
Definition: types.h:45
RewardLearnerType * __rewardLearner
Definition: fmdpLearner.h:253
+ Here is the call graph for this function:
+ Here is the caller graph for this function:

◆ updateFMDP()

template<TESTNAME VariableAttributeSelection, TESTNAME RewardAttributeSelection, LEARNERNAME LearnerSelection>
void gum::FMDPLearner< VariableAttributeSelection, RewardAttributeSelection, LearnerSelection >::updateFMDP ( )
virtual

Starts an update of datastructure in the associated FMDP.

Implements gum::ILearningStrategy.

Definition at line 218 of file fmdpLearner_tpl.h.

References gum::FMDPLearner< VariableAttributeSelection, RewardAttributeSelection, LearnerSelection >::__actionLearners, gum::FMDPLearner< VariableAttributeSelection, RewardAttributeSelection, LearnerSelection >::__actionReward, gum::FMDPLearner< VariableAttributeSelection, RewardAttributeSelection, LearnerSelection >::__actionRewardLearners, gum::FMDPLearner< VariableAttributeSelection, RewardAttributeSelection, LearnerSelection >::__fmdp, gum::FMDPLearner< VariableAttributeSelection, RewardAttributeSelection, LearnerSelection >::__rewardLearner, gum::FMDP< GUM_SCALAR >::beginActions(), gum::FMDP< GUM_SCALAR >::beginVariables(), gum::FMDP< GUM_SCALAR >::endActions(), and gum::FMDP< GUM_SCALAR >::endVariables().

Referenced by gum::FMDPLearner< VariableAttributeSelection, RewardAttributeSelection, LearnerSelection >::__instantiateRewardLearner().

218  {
219  for (SequenceIteratorSafe< Idx > actionIter = __fmdp->beginActions();
220  actionIter != __fmdp->endActions();
221  ++actionIter) {
222  for (SequenceIteratorSafe< const DiscreteVariable* > varIter =
224  varIter != __fmdp->endVariables();
225  ++varIter)
226  __actionLearners[*actionIter]
227  ->getWithDefault(*varIter, nullptr)
228  ->updateFunctionGraph();
229  if (__actionReward)
230  __actionRewardLearners[*actionIter]->updateFunctionGraph();
231  }
232 
233  if (!__actionReward) __rewardLearner->updateFunctionGraph();
234  }
SequenceIteratorSafe< Idx > beginActions() const
Returns an iterator reference to he beginning of the list of actions.
Definition: fmdp.h:134
HashTable< Idx, VarLearnerTable *> __actionLearners
Definition: fmdpLearner.h:249
SequenceIteratorSafe< const DiscreteVariable *> beginVariables() const
Returns an iterator reference to he beginning of the list of variables.
Definition: fmdp.h:92
FMDP< double > * __fmdp
The FMDP to store the learned model.
Definition: fmdpLearner.h:247
HashTable< Idx, RewardLearnerType *> __actionRewardLearners
Definition: fmdpLearner.h:252
SequenceIteratorSafe< Idx > endActions() const
Returns an iterator reference to the end of the list of actions.
Definition: fmdp.h:141
SequenceIteratorSafe< const DiscreteVariable *> endVariables() const
Returns an iterator reference to the end of the list of variables.
Definition: fmdp.h:99
RewardLearnerType * __rewardLearner
Definition: fmdpLearner.h:253
+ Here is the call graph for this function:
+ Here is the caller graph for this function:

◆ varLearner()

template<TESTNAME VariableAttributeSelection, TESTNAME RewardAttributeSelection, LEARNERNAME LearnerSelection>
const IVisitableGraphLearner* gum::FMDPLearner< VariableAttributeSelection, RewardAttributeSelection, LearnerSelection >::varLearner ( Idx  actionId,
const DiscreteVariable var 
) const
inlinevirtual

extractCount

Implements gum::ILearningStrategy.

Definition at line 226 of file fmdpLearner.h.

References gum::FMDPLearner< VariableAttributeSelection, RewardAttributeSelection, LearnerSelection >::__actionLearners.

227  {
228  return __actionLearners[actionId]->getWithDefault(var, nullptr);
229  }
HashTable< Idx, VarLearnerTable *> __actionLearners
Definition: fmdpLearner.h:249

Member Data Documentation

◆ __actionLearners

◆ __actionReward

◆ __actionRewardLearners

◆ __fmdp

◆ __learningThreshold

template<TESTNAME VariableAttributeSelection, TESTNAME RewardAttributeSelection, LEARNERNAME LearnerSelection>
const double gum::FMDPLearner< VariableAttributeSelection, RewardAttributeSelection, LearnerSelection >::__learningThreshold
private

◆ __modaMax

template<TESTNAME VariableAttributeSelection, TESTNAME RewardAttributeSelection, LEARNERNAME LearnerSelection>
double gum::FMDPLearner< VariableAttributeSelection, RewardAttributeSelection, LearnerSelection >::__modaMax
private

◆ __rewardLearner

◆ __rmax

template<TESTNAME VariableAttributeSelection, TESTNAME RewardAttributeSelection, LEARNERNAME LearnerSelection>
double gum::FMDPLearner< VariableAttributeSelection, RewardAttributeSelection, LearnerSelection >::__rmax
private

◆ __similarityThreshold

template<TESTNAME VariableAttributeSelection, TESTNAME RewardAttributeSelection, LEARNERNAME LearnerSelection>
const double gum::FMDPLearner< VariableAttributeSelection, RewardAttributeSelection, LearnerSelection >::__similarityThreshold
private

The documentation for this class was generated from the following files: