aGrUM  0.14.2
fmdpLearner_tpl.h
Go to the documentation of this file.
1 /***************************************************************************
2  * Copyright (C) 2005 by Christophe GONZALES and Pierre-Henri WUILLEMIN *
3  * {prenom.nom}_at_lip6.fr *
4  * *
5  * This program is free software; you can redistribute it and/or modify *
6  * it under the terms of the GNU General Public License as published by *
7  * the Free Software Foundation; either version 2 of the License, or *
8  * (at your option) any later version. *
9  * *
10  * This program is distributed in the hope that it will be useful, *
11  * but WITHOUT ANY WARRANTY; without even the implied warranty of *
12  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the *
13  * GNU General Public License for more details. *
14  * *
15  * You should have received a copy of the GNU General Public License *
16  * along with this program; if not, write to the *
17  * Free Software Foundation, Inc., *
18  * 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. *
19  ***************************************************************************/
27 // =========================================================================
29 // =========================================================================
30 
31 namespace gum {
32 
33  // ==========================================================================
34  // Constructor & destructor.
35  // ==========================================================================
36 
37  // ###################################################################
38  // Default constructor
39  // ###################################################################
40  template < TESTNAME VariableAttributeSelection,
41  TESTNAME RewardAttributeSelection,
42  LEARNERNAME LearnerSelection >
43  FMDPLearner< VariableAttributeSelection,
44  RewardAttributeSelection,
45  LearnerSelection >::FMDPLearner(double lT,
46  bool actionReward,
47  double sT) :
48  __actionReward(actionReward),
49  __learningThreshold(lT), __similarityThreshold(sT) {
50  GUM_CONSTRUCTOR(FMDPLearner);
51  __rewardLearner = nullptr;
52  }
53 
54 
55  // ###################################################################
56  // Default destructor
57  // ###################################################################
58  template < TESTNAME VariableAttributeSelection,
59  TESTNAME RewardAttributeSelection,
60  LEARNERNAME LearnerSelection >
61  FMDPLearner< VariableAttributeSelection,
62  RewardAttributeSelection,
63  LearnerSelection >::~FMDPLearner() {
64  for (auto actionIter = __actionLearners.beginSafe();
65  actionIter != __actionLearners.endSafe();
66  ++actionIter) {
67  for (auto learnerIter = actionIter.val()->beginSafe();
68  learnerIter != actionIter.val()->endSafe();
69  ++learnerIter)
70  delete learnerIter.val();
71  delete actionIter.val();
72  if (__actionRewardLearners.exists(actionIter.key()))
73  delete __actionRewardLearners[actionIter.key()];
74  }
75 
76  if (__rewardLearner) delete __rewardLearner;
77 
78  GUM_DESTRUCTOR(FMDPLearner);
79  }
80 
81 
82  // ==========================================================================
83  //
84  // ==========================================================================
85 
86  // ###################################################################
87  //
88  // ###################################################################
89  template < TESTNAME VariableAttributeSelection,
90  TESTNAME RewardAttributeSelection,
91  LEARNERNAME LearnerSelection >
92  void FMDPLearner< VariableAttributeSelection,
93  RewardAttributeSelection,
94  LearnerSelection >::initialize(FMDP< double >* fmdp) {
95  __fmdp = fmdp;
96 
97  __modaMax = 0;
98  __rmax = 0.0;
99 
100  Set< const DiscreteVariable* > mainVariables;
101  for (auto varIter = __fmdp->beginVariables();
102  varIter != __fmdp->endVariables();
103  ++varIter) {
104  mainVariables.insert(*varIter);
105  __modaMax = __modaMax < (*varIter)->domainSize() ? (*varIter)->domainSize()
106  : __modaMax;
107  }
108 
109  for (auto actionIter = __fmdp->beginActions();
110  actionIter != __fmdp->endActions();
111  ++actionIter) {
112  // Adding a Hashtable for the action
113  __actionLearners.insert(*actionIter, new VarLearnerTable());
114 
115  // Adding a learner for each variable
116  for (auto varIter = __fmdp->beginVariables();
117  varIter != __fmdp->endVariables();
118  ++varIter) {
120  varTrans->setTableName("ACTION : " + __fmdp->actionName(*actionIter)
121  + " - VARIABLE : " + (*varIter)->name());
122  __fmdp->addTransitionForAction(*actionIter, *varIter, varTrans);
123  __actionLearners[*actionIter]->insert(
124  (*varIter),
126  varTrans, mainVariables, __fmdp->main2prime(*varIter)));
127  }
128 
129  if (__actionReward) {
131  reward->setTableName("REWARD - ACTION : "
132  + __fmdp->actionName(*actionIter));
133  __fmdp->addRewardForAction(*actionIter, reward);
134  __actionRewardLearners.insert(
135  *actionIter, __instantiateRewardLearner(reward, mainVariables));
136  }
137  }
138 
139  if (!__actionReward) {
141  reward->setTableName("REWARD");
142  __fmdp->addReward(reward);
143  __rewardLearner = __instantiateRewardLearner(reward, mainVariables);
144  }
145  }
146 
147  // ###################################################################
148  //
149  // ###################################################################
150  template < TESTNAME VariableAttributeSelection,
151  TESTNAME RewardAttributeSelection,
152  LEARNERNAME LearnerSelection >
153  bool FMDPLearner< VariableAttributeSelection,
154  RewardAttributeSelection,
155  LearnerSelection >::addObservation(Idx actionId,
156  const Observation* newObs) {
159  varIter != __fmdp->endVariables();
160  ++varIter) {
161  __actionLearners[actionId]
162  ->getWithDefault(*varIter, nullptr)
163  ->addObservation(newObs);
164  __actionLearners[actionId]->getWithDefault(*varIter, nullptr)->updateGraph();
165  }
166 
167  if (__actionReward) {
168  __actionRewardLearners[actionId]->addObservation(newObs);
169  __actionRewardLearners[actionId]->updateGraph();
170  } else {
171  __rewardLearner->addObservation(newObs);
172  __rewardLearner->updateGraph();
173  }
174 
175  __rmax =
176  __rmax < std::abs(newObs->reward()) ? std::abs(newObs->reward()) : __rmax;
177 
178  return false;
179  }
180 
181  // ###################################################################
182  //
183  // ###################################################################
184  template < TESTNAME VariableAttributeSelection,
185  TESTNAME RewardAttributeSelection,
186  LEARNERNAME LearnerSelection >
187  Size FMDPLearner< VariableAttributeSelection,
188  RewardAttributeSelection,
189  LearnerSelection >::size() {
190  Size s = 0;
191  for (SequenceIteratorSafe< Idx > actionIter = __fmdp->beginActions();
192  actionIter != __fmdp->endActions();
193  ++actionIter) {
196  varIter != __fmdp->endVariables();
197  ++varIter)
198  s += __actionLearners[*actionIter]
199  ->getWithDefault(*varIter, nullptr)
200  ->size();
201  if (__actionReward) s += __actionRewardLearners[*actionIter]->size();
202  }
203 
204  if (!__actionReward) s += __rewardLearner->size();
205 
206  return s;
207  }
208 
209 
210  // ###################################################################
211  //
212  // ###################################################################
213  template < TESTNAME VariableAttributeSelection,
214  TESTNAME RewardAttributeSelection,
215  LEARNERNAME LearnerSelection >
216  void FMDPLearner< VariableAttributeSelection,
217  RewardAttributeSelection,
218  LearnerSelection >::updateFMDP() {
219  for (SequenceIteratorSafe< Idx > actionIter = __fmdp->beginActions();
220  actionIter != __fmdp->endActions();
221  ++actionIter) {
224  varIter != __fmdp->endVariables();
225  ++varIter)
226  __actionLearners[*actionIter]
227  ->getWithDefault(*varIter, nullptr)
228  ->updateFunctionGraph();
229  if (__actionReward)
230  __actionRewardLearners[*actionIter]->updateFunctionGraph();
231  }
232 
233  if (!__actionReward) __rewardLearner->updateFunctionGraph();
234  }
235 } // End of namespace gum
Safe iterators for Sequence.
Definition: sequence.h:1203
MultiDimFunctionGraph< double > * __instantiateFunctionGraph()
Initializes the learner.
Definition: fmdpLearner.h:105
void initialize(FMDP< double > *fmdp)
Initializes the learner.
SequenceIteratorSafe< Idx > beginActions() const
Returns an iterator reference to he beginning of the list of actions.
Definition: fmdp.h:134
void addReward(const MultiDimImplementation< GUM_SCALAR > *reward)
Adds a default variable reward.
Definition: fmdp.h:225
const std::string & actionName(Idx actionId) const
Returns name of action given in parameter.
Definition: fmdp_tpl.h:344
HashTable< Idx, VarLearnerTable *> __actionLearners
Definition: fmdpLearner.h:249
SequenceIteratorSafe< const DiscreteVariable *> beginVariables() const
Returns an iterator reference to he beginning of the list of variables.
Definition: fmdp.h:92
gum is the global namespace for all aGrUM entities
Definition: agrum.h:25
~FMDPLearner()
Default destructor.
bool addObservation(Idx actionId, const Observation *obs)
Gives to the learner a new transition.
double __rmax
learnerSize
Definition: fmdpLearner.h:234
FMDP< double > * __fmdp
The FMDP to store the learned model.
Definition: fmdpLearner.h:247
RewardLearnerType * __instantiateRewardLearner(MultiDimFunctionGraph< double > *target, Set< const DiscreteVariable * > &mainVariables)
Initializes the learner.
Definition: fmdpLearner.h:159
Representation of a setA Set is a structure that contains arbitrary elements.
Definition: set.h:162
Size size()
learnerSize
HashTable< const DiscreteVariable *, VariableLearnerType *> VarLearnerTable
Definition: fmdpLearner.h:70
void addTransitionForAction(Idx actionId, const DiscreteVariable *var, const MultiDimImplementation< GUM_SCALAR > *transition)
Adds a variable transition table to specified action.
Definition: fmdp_tpl.h:195
double reward() const
Returns the modality assumed by the given variable in this observation.
Definition: observation.h:116
HashTable< Idx, RewardLearnerType *> __actionRewardLearners
Definition: fmdpLearner.h:252
void addRewardForAction(Idx actionId, const MultiDimImplementation< GUM_SCALAR > *reward)
Adds a default variable reward.
Definition: fmdp_tpl.h:299
const DiscreteVariable * main2prime(const DiscreteVariable *mainVar) const
Returns the primed variable associate to the given main variable.
Definition: fmdp.h:106
void updateFMDP()
Starts an update of datastructure in the associated FMDP.
SequenceIteratorSafe< Idx > endActions() const
Returns an iterator reference to the end of the list of actions.
Definition: fmdp.h:141
VariableLearnerType * __instantiateVarLearner(MultiDimFunctionGraph< double > *target, Set< const DiscreteVariable * > &mainVariables, const DiscreteVariable *learnedVar)
Initializes the learner.
Definition: fmdpLearner.h:126
Size Idx
Type for indexes.
Definition: types.h:50
double __modaMax
learnerSize
Definition: fmdpLearner.h:240
SequenceIteratorSafe< const DiscreteVariable *> endVariables() const
Returns an iterator reference to the end of the list of variables.
Definition: fmdp.h:99
std::size_t Size
In aGrUM, hashed values are unsigned long int.
Definition: types.h:45
RewardLearnerType * __rewardLearner
Definition: fmdpLearner.h:253
void setTableName(const std::string &name)
Sets the name of the table represented by this structure.
Headers of the FMDPLearner class.
void insert(const Key &k)
Inserts a new element into the set.
Definition: set_tpl.h:610