aGrUM  0.16.0
fmdpLearner_tpl.h
Go to the documentation of this file.
1 
30 // =========================================================================
32 // =========================================================================
33 
34 namespace gum {
35 
36  // ==========================================================================
37  // Constructor & destructor.
38  // ==========================================================================
39 
40  // ###################################################################
41  // Default constructor
42  // ###################################################################
43  template < TESTNAME VariableAttributeSelection,
44  TESTNAME RewardAttributeSelection,
45  LEARNERNAME LearnerSelection >
46  FMDPLearner< VariableAttributeSelection,
47  RewardAttributeSelection,
48  LearnerSelection >::FMDPLearner(double lT,
49  bool actionReward,
50  double sT) :
51  __actionReward(actionReward),
52  __learningThreshold(lT), __similarityThreshold(sT) {
53  GUM_CONSTRUCTOR(FMDPLearner);
54  __rewardLearner = nullptr;
55  }
56 
57 
58  // ###################################################################
59  // Default destructor
60  // ###################################################################
61  template < TESTNAME VariableAttributeSelection,
62  TESTNAME RewardAttributeSelection,
63  LEARNERNAME LearnerSelection >
64  FMDPLearner< VariableAttributeSelection,
65  RewardAttributeSelection,
66  LearnerSelection >::~FMDPLearner() {
67  for (auto actionIter = __actionLearners.beginSafe();
68  actionIter != __actionLearners.endSafe();
69  ++actionIter) {
70  for (auto learnerIter = actionIter.val()->beginSafe();
71  learnerIter != actionIter.val()->endSafe();
72  ++learnerIter)
73  delete learnerIter.val();
74  delete actionIter.val();
75  if (__actionRewardLearners.exists(actionIter.key()))
76  delete __actionRewardLearners[actionIter.key()];
77  }
78 
79  if (__rewardLearner) delete __rewardLearner;
80 
81  GUM_DESTRUCTOR(FMDPLearner);
82  }
83 
84 
85  // ==========================================================================
86  //
87  // ==========================================================================
88 
89  // ###################################################################
90  //
91  // ###################################################################
92  template < TESTNAME VariableAttributeSelection,
93  TESTNAME RewardAttributeSelection,
94  LEARNERNAME LearnerSelection >
95  void FMDPLearner< VariableAttributeSelection,
96  RewardAttributeSelection,
97  LearnerSelection >::initialize(FMDP< double >* fmdp) {
98  __fmdp = fmdp;
99 
100  __modaMax = 0;
101  __rmax = 0.0;
102 
103  Set< const DiscreteVariable* > mainVariables;
104  for (auto varIter = __fmdp->beginVariables();
105  varIter != __fmdp->endVariables();
106  ++varIter) {
107  mainVariables.insert(*varIter);
108  __modaMax = __modaMax < (*varIter)->domainSize() ? (*varIter)->domainSize()
109  : __modaMax;
110  }
111 
112  for (auto actionIter = __fmdp->beginActions();
113  actionIter != __fmdp->endActions();
114  ++actionIter) {
115  // Adding a Hashtable for the action
116  __actionLearners.insert(*actionIter, new VarLearnerTable());
117 
118  // Adding a learner for each variable
119  for (auto varIter = __fmdp->beginVariables();
120  varIter != __fmdp->endVariables();
121  ++varIter) {
123  varTrans->setTableName("ACTION : " + __fmdp->actionName(*actionIter)
124  + " - VARIABLE : " + (*varIter)->name());
125  __fmdp->addTransitionForAction(*actionIter, *varIter, varTrans);
126  __actionLearners[*actionIter]->insert(
127  (*varIter),
129  varTrans, mainVariables, __fmdp->main2prime(*varIter)));
130  }
131 
132  if (__actionReward) {
134  reward->setTableName("REWARD - ACTION : "
135  + __fmdp->actionName(*actionIter));
136  __fmdp->addRewardForAction(*actionIter, reward);
137  __actionRewardLearners.insert(
138  *actionIter, __instantiateRewardLearner(reward, mainVariables));
139  }
140  }
141 
142  if (!__actionReward) {
144  reward->setTableName("REWARD");
145  __fmdp->addReward(reward);
146  __rewardLearner = __instantiateRewardLearner(reward, mainVariables);
147  }
148  }
149 
150  // ###################################################################
151  //
152  // ###################################################################
153  template < TESTNAME VariableAttributeSelection,
154  TESTNAME RewardAttributeSelection,
155  LEARNERNAME LearnerSelection >
156  bool FMDPLearner< VariableAttributeSelection,
157  RewardAttributeSelection,
158  LearnerSelection >::addObservation(Idx actionId,
159  const Observation* newObs) {
162  varIter != __fmdp->endVariables();
163  ++varIter) {
164  __actionLearners[actionId]
165  ->getWithDefault(*varIter, nullptr)
166  ->addObservation(newObs);
167  __actionLearners[actionId]->getWithDefault(*varIter, nullptr)->updateGraph();
168  }
169 
170  if (__actionReward) {
171  __actionRewardLearners[actionId]->addObservation(newObs);
172  __actionRewardLearners[actionId]->updateGraph();
173  } else {
174  __rewardLearner->addObservation(newObs);
175  __rewardLearner->updateGraph();
176  }
177 
178  __rmax =
179  __rmax < std::abs(newObs->reward()) ? std::abs(newObs->reward()) : __rmax;
180 
181  return false;
182  }
183 
184  // ###################################################################
185  //
186  // ###################################################################
187  template < TESTNAME VariableAttributeSelection,
188  TESTNAME RewardAttributeSelection,
189  LEARNERNAME LearnerSelection >
190  Size FMDPLearner< VariableAttributeSelection,
191  RewardAttributeSelection,
192  LearnerSelection >::size() {
193  Size s = 0;
194  for (SequenceIteratorSafe< Idx > actionIter = __fmdp->beginActions();
195  actionIter != __fmdp->endActions();
196  ++actionIter) {
199  varIter != __fmdp->endVariables();
200  ++varIter)
201  s += __actionLearners[*actionIter]
202  ->getWithDefault(*varIter, nullptr)
203  ->size();
204  if (__actionReward) s += __actionRewardLearners[*actionIter]->size();
205  }
206 
207  if (!__actionReward) s += __rewardLearner->size();
208 
209  return s;
210  }
211 
212 
213  // ###################################################################
214  //
215  // ###################################################################
216  template < TESTNAME VariableAttributeSelection,
217  TESTNAME RewardAttributeSelection,
218  LEARNERNAME LearnerSelection >
219  void FMDPLearner< VariableAttributeSelection,
220  RewardAttributeSelection,
221  LearnerSelection >::updateFMDP() {
222  for (SequenceIteratorSafe< Idx > actionIter = __fmdp->beginActions();
223  actionIter != __fmdp->endActions();
224  ++actionIter) {
227  varIter != __fmdp->endVariables();
228  ++varIter)
229  __actionLearners[*actionIter]
230  ->getWithDefault(*varIter, nullptr)
231  ->updateFunctionGraph();
232  if (__actionReward)
233  __actionRewardLearners[*actionIter]->updateFunctionGraph();
234  }
235 
236  if (!__actionReward) __rewardLearner->updateFunctionGraph();
237  }
238 } // End of namespace gum
Safe iterators for Sequence.
Definition: sequence.h:1206
MultiDimFunctionGraph< double > * __instantiateFunctionGraph()
Initializes the learner.
Definition: fmdpLearner.h:108
void initialize(FMDP< double > *fmdp)
Initializes the learner.
SequenceIteratorSafe< Idx > beginActions() const
Returns an iterator reference to he beginning of the list of actions.
Definition: fmdp.h:137
void addReward(const MultiDimImplementation< GUM_SCALAR > *reward)
Adds a default variable reward.
Definition: fmdp.h:228
const std::string & actionName(Idx actionId) const
Returns name of action given in parameter.
Definition: fmdp_tpl.h:347
HashTable< Idx, VarLearnerTable *> __actionLearners
Definition: fmdpLearner.h:252
SequenceIteratorSafe< const DiscreteVariable *> beginVariables() const
Returns an iterator reference to he beginning of the list of variables.
Definition: fmdp.h:95
Copyright 2005-2019 Pierre-Henri WUILLEMIN et Christophe GONZALES (LIP6) {prenom.nom}_at_lip6.fr.
Definition: agrum.h:25
~FMDPLearner()
Default destructor.
bool addObservation(Idx actionId, const Observation *obs)
Gives to the learner a new transition.
double __rmax
learnerSize
Definition: fmdpLearner.h:237
FMDP< double > * __fmdp
The FMDP to store the learned model.
Definition: fmdpLearner.h:250
RewardLearnerType * __instantiateRewardLearner(MultiDimFunctionGraph< double > *target, Set< const DiscreteVariable * > &mainVariables)
Initializes the learner.
Definition: fmdpLearner.h:162
Representation of a setA Set is a structure that contains arbitrary elements.
Definition: set.h:165
Size size()
learnerSize
HashTable< const DiscreteVariable *, VariableLearnerType *> VarLearnerTable
Definition: fmdpLearner.h:73
void addTransitionForAction(Idx actionId, const DiscreteVariable *var, const MultiDimImplementation< GUM_SCALAR > *transition)
Adds a variable transition table to specified action.
Definition: fmdp_tpl.h:198
double reward() const
Returns the modality assumed by the given variable in this observation.
Definition: observation.h:119
HashTable< Idx, RewardLearnerType *> __actionRewardLearners
Definition: fmdpLearner.h:255
void addRewardForAction(Idx actionId, const MultiDimImplementation< GUM_SCALAR > *reward)
Adds a default variable reward.
Definition: fmdp_tpl.h:302
const DiscreteVariable * main2prime(const DiscreteVariable *mainVar) const
Returns the primed variable associate to the given main variable.
Definition: fmdp.h:109
void updateFMDP()
Starts an update of datastructure in the associated FMDP.
SequenceIteratorSafe< Idx > endActions() const
Returns an iterator reference to the end of the list of actions.
Definition: fmdp.h:144
VariableLearnerType * __instantiateVarLearner(MultiDimFunctionGraph< double > *target, Set< const DiscreteVariable * > &mainVariables, const DiscreteVariable *learnedVar)
Initializes the learner.
Definition: fmdpLearner.h:129
Size Idx
Type for indexes.
Definition: types.h:53
double __modaMax
learnerSize
Definition: fmdpLearner.h:243
SequenceIteratorSafe< const DiscreteVariable *> endVariables() const
Returns an iterator reference to the end of the list of variables.
Definition: fmdp.h:102
std::size_t Size
In aGrUM, hashed values are unsigned long int.
Definition: types.h:48
RewardLearnerType * __rewardLearner
Definition: fmdpLearner.h:256
void setTableName(const std::string &name)
Sets the name of the table represented by this structure.
Copyright 2005-2019 Pierre-Henri WUILLEMIN et Christophe GONZALES (LIP6) {prenom.nom}_at_lip6.fr.
void insert(const Key &k)
Inserts a new element into the set.
Definition: set_tpl.h:613