aGrUM  0.16.0
adaptiveRMaxPlaner.cpp
Go to the documentation of this file.
1 
30 // =========================================================================
31 #include <queue>
32 #include <vector>
33 //#include <algorithm>
34 //#include <utility>
35 // =========================================================================
36 #include <agrum/core/math/math.h>
37 #include <agrum/core/functors.h>
39 // =========================================================================
43 // =========================================================================
45 // =========================================================================
46 
48 #define RECASTED(x) reinterpret_cast< const MultiDimFunctionGraph< double >* >(x)
49 
50 namespace gum {
51 
52  /* **************************************************************************************************
53  * **/
54  /* ** **/
55  /* ** Constructors / Destructors **/
56  /* ** **/
57  /* **************************************************************************************************
58  * **/
59 
60  // ===========================================================================
61  // Default constructor
62  // ===========================================================================
64  double discountFactor,
65  double epsilon,
66  const ILearningStrategy* learner,
67  bool verbose) :
68  StructuredPlaner(opi, discountFactor, epsilon, verbose),
69  IDecisionStrategy(), __fmdpLearner(learner), __initialized(false) {
70  GUM_CONSTRUCTOR(AdaptiveRMaxPlaner);
71  }
72 
73  // ===========================================================================
74  // Default destructor
75  // ===========================================================================
77  GUM_DESTRUCTOR(AdaptiveRMaxPlaner);
78 
80  __counterTable.beginSafe();
81  scIter != __counterTable.endSafe();
82  ++scIter)
83  delete scIter.val();
84  }
85 
86  /* **************************************************************************************************
87  * **/
88  /* ** **/
89  /* ** Planning Methods **/
90  /* ** **/
91  /* **************************************************************************************************
92  * **/
93 
94  // ==========================================================================
95  // Initializes data structure needed for making the planning
96  // ==========================================================================
98  if (!__initialized) {
101  for (auto actionIter = fmdp->beginActions();
102  actionIter != fmdp->endActions();
103  ++actionIter) {
104  __counterTable.insert(*actionIter, new StatesCounter());
105  __initializedTable.insert(*actionIter, false);
106  }
107  __initialized = true;
108  }
109  }
110 
111  // ===========================================================================
112  // Performs a value iteration
113  // ===========================================================================
116 
118 
119  __clearTables();
120  }
121 
122  /* **************************************************************************************************
123  * **/
124  /* ** **/
125  /* ** Value Iteration Methods **/
126  /* ** **/
127  /* **************************************************************************************************
128  * **/
129 
130  // ===========================================================================
131  // Performs a single step of value iteration
132  // ===========================================================================
136  for (auto actionIter = _fmdp->beginActions();
137  actionIter != _fmdp->endActions();
138  ++actionIter)
139  _vFunction = this->_operator->add(
140  _vFunction, RECASTED(this->_fmdp->reward(*actionIter)), 1);
141  }
142 
143  // ===========================================================================
144  // Performs a single step of value iteration
145  // ===========================================================================
147  // *****************************************************************************************
148  // Loop reset
149  MultiDimFunctionGraph< double >* newVFunction =
151  newVFunction->copyAndReassign(*_vFunction, _fmdp->mapMainPrime());
152 
153  // *****************************************************************************************
154  // For each action
155  std::vector< MultiDimFunctionGraph< double >* > qActionsSet;
156  for (auto actionIter = _fmdp->beginActions();
157  actionIter != _fmdp->endActions();
158  ++actionIter) {
160  _evalQaction(newVFunction, *actionIter);
161 
162  // *******************************************************************************************
163  // Next, we add the reward
164  qAction = _addReward(qAction, *actionIter);
165 
166  qAction = this->_operator->maximize(
167  __actionsRMaxTable[*actionIter],
168  this->_operator->multiply(qAction, __actionsBoolTable[*actionIter], 1),
169  2);
170 
171  qActionsSet.push_back(qAction);
172  }
173  delete newVFunction;
174 
175  // *****************************************************************************************
176  // Next to evaluate main value function, we take maximise over all action
177  // value, ...
178  newVFunction = _maximiseQactions(qActionsSet);
179 
180  return newVFunction;
181  }
182 
183  /* **************************************************************************************************
184  * **/
185  /* ** **/
186  /* ** Optimal Policy Evaluation Methods **/
187  /* ** **/
188  /* **************************************************************************************************
189  * **/
190 
191  // ===========================================================================
192  // Evals the policy corresponding to the given value function
193  // ===========================================================================
195  // *****************************************************************************************
196  // Loop reset
197  MultiDimFunctionGraph< double >* newVFunction =
199  newVFunction->copyAndReassign(*_vFunction, _fmdp->mapMainPrime());
200 
201  std::vector<
203  argMaxQActionsSet;
204  // *****************************************************************************************
205  // For each action
206  for (auto actionIter = _fmdp->beginActions();
207  actionIter != _fmdp->endActions();
208  ++actionIter) {
210  this->_evalQaction(newVFunction, *actionIter);
211 
212  qAction = this->_addReward(qAction, *actionIter);
213 
214  qAction = this->_operator->maximize(
215  __actionsRMaxTable[*actionIter],
216  this->_operator->multiply(qAction, __actionsBoolTable[*actionIter], 1),
217  2);
218 
219  argMaxQActionsSet.push_back(_makeArgMax(qAction, *actionIter));
220  }
221  delete newVFunction;
222 
223  // *****************************************************************************************
224  // Next to evaluate main value function, we take maximise over all action
225  // value, ...
226  MultiDimFunctionGraph< ArgMaxSet< double, Idx >, SetTerminalNodePolicy >*
227  argMaxVFunction = _argmaximiseQactions(argMaxQActionsSet);
228 
229  // *****************************************************************************************
230  // Next to evaluate main value function, we take maximise over all action
231  // value, ...
232  _extractOptimalPolicy(argMaxVFunction);
233  }
234 
235  // ===========================================================================
236  //
237  // ===========================================================================
239  __rThreshold =
240  __fmdpLearner->modaMax() * 5 > 30 ? __fmdpLearner->modaMax() * 5 : 30;
241  __rmax = __fmdpLearner->rMax() / (1.0 - this->_discountFactor);
242 
243  for (auto actionIter = this->fmdp()->beginActions();
244  actionIter != this->fmdp()->endActions();
245  ++actionIter) {
246  std::vector< MultiDimFunctionGraph< double >* > rmaxs;
247  std::vector< MultiDimFunctionGraph< double >* > boolQs;
248 
249  for (auto varIter = this->fmdp()->beginVariables();
250  varIter != this->fmdp()->endVariables();
251  ++varIter) {
252  const IVisitableGraphLearner* visited = __counterTable[*actionIter];
253 
258 
259  visited->insertSetOfVars(varRMax);
260  visited->insertSetOfVars(varBoolQ);
261 
262  std::pair< NodeId, NodeId > rooty =
263  __visitLearner(visited, visited->root(), varRMax, varBoolQ);
264  varRMax->manager()->setRootNode(rooty.first);
265  varRMax->manager()->reduce();
266  varRMax->manager()->clean();
267  varBoolQ->manager()->setRootNode(rooty.second);
268  varBoolQ->manager()->reduce();
269  varBoolQ->manager()->clean();
270 
271  rmaxs.push_back(varRMax);
272  boolQs.push_back(varBoolQ);
273 
274  // std::cout << RECASTED(this->_fmdp->transition(*actionIter,
275  // *varIter))->toDot() << std::endl;
276  // for( auto varIter2 =
277  // RECASTED(this->_fmdp->transition(*actionIter,
278  // *varIter))->variablesSequence().beginSafe(); varIter2 !=
279  // RECASTED(this->_fmdp->transition(*actionIter,
280  // *varIter))->variablesSequence().endSafe(); ++varIter2 )
281  // std::cout << (*varIter2)->name() << " | ";
282  // std::cout << std::endl;
283 
284  // std::cout << varRMax->toDot() << std::endl;
285  // for( auto varIter =
286  // varRMax->variablesSequence().beginSafe(); varIter !=
287  // varRMax->variablesSequence().endSafe(); ++varIter )
288  // std::cout << (*varIter)->name() << " | ";
289  // std::cout << std::endl;
290 
291  // std::cout << varBoolQ->toDot() << std::endl;
292  // for( auto varIter =
293  // varBoolQ->variablesSequence().beginSafe(); varIter !=
294  // varBoolQ->variablesSequence().endSafe(); ++varIter )
295  // std::cout << (*varIter)->name() << " | ";
296  // std::cout << std::endl;
297  }
298 
299  // std::cout << "Maximising" << std::endl;
300  __actionsRMaxTable.insert(*actionIter, this->_maximiseQactions(rmaxs));
301  __actionsBoolTable.insert(*actionIter, this->_minimiseFunctions(boolQs));
302  }
303  }
304 
305  // ===========================================================================
306  //
307  // ===========================================================================
308  std::pair< NodeId, NodeId >
310  NodeId currentNodeId,
313  std::pair< NodeId, NodeId > rep;
314  if (visited->isTerminal(currentNodeId)) {
315  rep.first = rmax->manager()->addTerminalNode(
316  visited->nodeNbObservation(currentNodeId) < __rThreshold ? __rmax : 0.0);
317  rep.second = boolQ->manager()->addTerminalNode(
318  visited->nodeNbObservation(currentNodeId) < __rThreshold ? 0.0 : 1.0);
319  return rep;
320  }
321 
322  NodeId* rmaxsons = static_cast< NodeId* >(SOA_ALLOCATE(
323  sizeof(NodeId) * visited->nodeVar(currentNodeId)->domainSize()));
324  NodeId* bqsons = static_cast< NodeId* >(SOA_ALLOCATE(
325  sizeof(NodeId) * visited->nodeVar(currentNodeId)->domainSize()));
326 
327  for (Idx moda = 0; moda < visited->nodeVar(currentNodeId)->domainSize();
328  ++moda) {
329  std::pair< NodeId, NodeId > sonp = __visitLearner(
330  visited, visited->nodeSon(currentNodeId, moda), rmax, boolQ);
331  rmaxsons[moda] = sonp.first;
332  bqsons[moda] = sonp.second;
333  }
334 
335  rep.first =
336  rmax->manager()->addInternalNode(visited->nodeVar(currentNodeId), rmaxsons);
337  rep.second =
338  boolQ->manager()->addInternalNode(visited->nodeVar(currentNodeId), bqsons);
339  return rep;
340  }
341 
342  // ===========================================================================
343  //
344  // ===========================================================================
346  for (auto actionIter = this->fmdp()->beginActions();
347  actionIter != this->fmdp()->endActions();
348  ++actionIter) {
349  delete __actionsBoolTable[*actionIter];
350  delete __actionsRMaxTable[*actionIter];
351  }
352  __actionsRMaxTable.clear();
353  __actionsBoolTable.clear();
354  }
355 
356 } // end of namespace gum
void makePlanning(Idx nbStep=1000000)
Performs a value iteration.
HashTable< Idx, StatesCounter *> __counterTable
Copyright 2005-2019 Pierre-Henri WUILLEMIN et Christophe GONZALES (LIP6) {prenom.nom}_at_lip6.fr.
HashTable< Idx, MultiDimFunctionGraph< double > *> __actionsBoolTable
<agrum/FMDP/planning/structuredPlaner.h>
~AdaptiveRMaxPlaner()
Default destructor.
virtual void initialize(const FMDP< double > *fmdp)
Initializes the learner.
virtual MultiDimFunctionGraph< double > * _valueIteration()
Performs a single step of value iteration.
void clean()
Removes var without nodes in the diagram.
Copyright 2005-2019 Pierre-Henri WUILLEMIN et Christophe GONZALES (LIP6) {prenom.nom}_at_lip6.fr.
void setRootNode(const NodeId &root)
Sets root node of decision diagram.
<agrum/FMDP/SDyna/IDecisionStrategy.h>
<agrum/FMDP/simulation/statesCounter.h>
Definition: statesCounter.h:51
SequenceIteratorSafe< Idx > beginActions() const
Returns an iterator reference to he beginning of the list of actions.
Definition: fmdp.h:137
virtual MultiDimFunctionGraph< ArgMaxSet< double, Idx >, SetTerminalNodePolicy > * _argmaximiseQactions(std::vector< MultiDimFunctionGraph< ArgMaxSet< double, Idx >, SetTerminalNodePolicy > *> &)
Performs argmax_a Q(s,a)
void copyAndReassign(const MultiDimFunctionGraph< GUM_SCALAR, TerminalNodePolicy > &src, const Bijection< const DiscreteVariable *, const DiscreteVariable * > &reassign)
Copies src diagrams structure into this diagrams.
double _discountFactor
Discount Factor used for infinite horizon planning.
IOperatorStrategy< double > * _operator
std::pair< NodeId, NodeId > __visitLearner(const IVisitableGraphLearner *, NodeId currentNodeId, MultiDimFunctionGraph< double > *, MultiDimFunctionGraph< double > *)
virtual MultiDimFunctionGraph< GUM_SCALAR > * maximize(const MultiDimFunctionGraph< GUM_SCALAR > *f1, const MultiDimFunctionGraph< GUM_SCALAR > *f2, Idx del=3)=0
SequenceIteratorSafe< const DiscreteVariable *> beginVariables() const
Returns an iterator reference to he beginning of the list of variables.
Definition: fmdp.h:95
INLINE const Bijection< const DiscreteVariable *, const DiscreteVariable *> & mapMainPrime() const
Returns the map binding main variables and prime variables.
Definition: fmdp.h:117
<agrum/FMDP/SDyna/IVisitableGraphLearner.h>
const ILearningStrategy * __fmdpLearner
AdaptiveRMaxPlaner(IOperatorStrategy< double > *opi, double discountFactor, double epsilon, const ILearningStrategy *learner, bool verbose)
Default constructor.
NodeId addInternalNode(const DiscreteVariable *var)
Inserts a new non terminal node in graph.
void _extractOptimalPolicy(const MultiDimFunctionGraph< ArgMaxSet< double, Idx >, SetTerminalNodePolicy > *optimalValueFunction)
From V(s)* = argmax_a Q*(s,a), this function extract pi*(s) This function mainly consists in extracti...
Safe Iterators for hashtables.
Definition: hashTable.h:2220
Copyright 2005-2019 Pierre-Henri WUILLEMIN et Christophe GONZALES (LIP6) {prenom.nom}_at_lip6.fr.
Definition: agrum.h:25
const FMDP< double > * _fmdp
The Factored Markov Decision Process describing our planning situation (NB : this one must have funct...
Copyright 2005-2019 Pierre-Henri WUILLEMIN et Christophe GONZALES (LIP6) {prenom.nom}_at_lip6.fr.
virtual NodeId root() const =0
virtual MultiDimFunctionGraph< GUM_SCALAR > * add(const MultiDimFunctionGraph< GUM_SCALAR > *f1, const MultiDimFunctionGraph< GUM_SCALAR > *f2, Idx del=1)=0
HashTable< Idx, bool > __initializedTable
virtual Size domainSize() const =0
virtual MultiDimFunctionGraph< GUM_SCALAR > * multiply(const MultiDimFunctionGraph< GUM_SCALAR > *f1, const MultiDimFunctionGraph< GUM_SCALAR > *f2, Idx del=3)=0
Copyright 2005-2019 Pierre-Henri WUILLEMIN et Christophe GONZALES (LIP6) {prenom.nom}_at_lip6.fr.
Copyright 2005-2019 Pierre-Henri WUILLEMIN et Christophe GONZALES (LIP6) {prenom.nom}_at_lip6.fr.
virtual const DiscreteVariable * nodeVar(NodeId ni) const =0
<agrum/FMDP/SDyna/ILearningStrategy.h>
HashTable< Idx, MultiDimFunctionGraph< double > *> __actionsRMaxTable
virtual bool isTerminal(NodeId ni) const =0
virtual MultiDimFunctionGraph< double > * _evalQaction(const MultiDimFunctionGraph< double > *, Idx)
Performs the P(s&#39;|s,a).V^{t-1}(s&#39;) part of the value itération.
virtual void insertSetOfVars(MultiDimFunctionGraph< double > *) const =0
const MultiDimImplementation< GUM_SCALAR > * reward(Idx actionId=0) const
Returns the reward table of mdp.
Definition: fmdp_tpl.h:324
Copyright 2005-2019 Pierre-Henri WUILLEMIN et Christophe GONZALES (LIP6) {prenom.nom}_at_lip6.fr.
MultiDimFunctionGraph< ArgMaxSet< double, Idx >, SetTerminalNodePolicy > * _makeArgMax(const MultiDimFunctionGraph< double > *Qaction, Idx actionId)
Creates a copy of given Qaction that can be exploit by a Argmax.
virtual double modaMax() const =0
learnerSize
Copyright 2005-2019 Pierre-Henri WUILLEMIN et Christophe GONZALES (LIP6) {prenom.nom}_at_lip6.fr.
virtual void initialize(const FMDP< GUM_SCALAR > *fmdp)
Initializes data structure needed for making the planning.
Implementation of a Terminal Node Policy that maps nodeid to a set of value.
virtual NodeId nodeSon(NodeId ni, Idx modality) const =0
<agrum/FMDP/planning/adaptiveRMaxPlaner.h>
void initialize(const FMDP< double > *fmdp)
Initializes data structure needed for making the planning.
virtual double rMax() const =0
learnerSize
INLINE const FMDP< double > * fmdp()
Returns a const ptr on the Factored Markov Decision Process on which we&#39;re planning.
virtual MultiDimFunctionGraph< double > * _addReward(MultiDimFunctionGraph< double > *function, Idx actionId=0)
Perform the R(s) + gamma . function.
virtual void _initVFunction()
Performs a single step of value iteration.
SequenceIteratorSafe< Idx > endActions() const
Returns an iterator reference to the end of the list of actions.
Definition: fmdp.h:144
NodeId addTerminalNode(const GUM_SCALAR &value)
Adds a value to the MultiDimFunctionGraph.
Size Idx
Type for indexes.
Definition: types.h:53
MultiDimFunctionGraphManager< GUM_SCALAR, TerminalNodePolicy > * manager()
Returns a const reference to the manager of this diagram.
SequenceIteratorSafe< const DiscreteVariable *> endVariables() const
Returns an iterator reference to the end of the list of variables.
Definition: fmdp.h:102
#define RECASTED(x)
For shorter line and hence more comprehensive code purposes only.
value_type & insert(const Key &key, const Val &val)
Adds a new element (actually a copy of this element) into the hash table.
virtual Idx nodeNbObservation(NodeId ni) const =0
virtual void reduce()=0
Ensures that every isomorphic subgraphs are merged together.
virtual MultiDimFunctionGraph< double > * _minimiseFunctions(std::vector< MultiDimFunctionGraph< double > *> &)
Performs min_i F_i.
Size NodeId
Type for node ids.
Definition: graphElements.h:98
MultiDimFunctionGraph< double > * _vFunction
The Value Function computed iteratively.
virtual void _evalPolicy()
Perform the required tasks to extract an optimal policy.
virtual MultiDimFunctionGraph< double > * _maximiseQactions(std::vector< MultiDimFunctionGraph< double > *> &)
Performs max_a Q(s,a)
virtual MultiDimFunctionGraph< GUM_SCALAR, ExactTerminalNodePolicy > * getFunctionInstance()=0
#define SOA_ALLOCATE(x)
virtual void makePlanning(Idx nbStep=1000000)
Performs a value iteration.