aGrUM  0.16.0
structuredPlaner_tpl.h
Go to the documentation of this file.
1 
30 // =========================================================================
31 #include <queue>
32 #include <vector>
33 //#include <algorithm>
34 //#include <utility>
35 // =========================================================================
36 #include <agrum/core/math/math.h>
37 #include <agrum/core/functors.h>
38 // =========================================================================
42 // =========================================================================
44 // =========================================================================
45 
47 #define RECAST(x) reinterpret_cast< const MultiDimFunctionGraph< GUM_SCALAR >* >(x)
48 
49 namespace gum {
50 
51 
52  /* **************************************************************************************************
53  * **/
54  /* ** **/
55  /* ** Constructors / Destructors **/
56  /* ** **/
57  /* **************************************************************************************************
58  * **/
59 
60  // ===========================================================================
61  // Default constructor
62  // ===========================================================================
63  template < typename GUM_SCALAR >
66  GUM_SCALAR discountFactor,
67  GUM_SCALAR epsilon,
68  bool verbose) :
69  _discountFactor(discountFactor),
70  _operator(opi), _verbose(verbose) {
71  GUM_CONSTRUCTOR(StructuredPlaner);
72 
73  __threshold = epsilon;
74  _vFunction = nullptr;
75  _optimalPolicy = nullptr;
76  }
77 
78  // ===========================================================================
79  // Default destructor
80  // ===========================================================================
81  template < typename GUM_SCALAR >
83  GUM_DESTRUCTOR(StructuredPlaner);
84 
85  if (_vFunction) { delete _vFunction; }
86 
87  if (_optimalPolicy) delete _optimalPolicy;
88 
89  delete _operator;
90  }
91 
92 
93  /* **************************************************************************************************
94  * **/
95  /* ** **/
96  /* ** Datastructure access methods **/
97  /* ** **/
98  /* **************************************************************************************************
99  * **/
100 
101  // ===========================================================================
102  // Initializes data structure needed for making the planning
103  // ===========================================================================
104  template < typename GUM_SCALAR >
106  // ************************************************************************
107  // Discarding the case where no \pi* have been computed
108  if (!_optimalPolicy || _optimalPolicy->root() == 0)
109  return "NO OPTIMAL POLICY CALCULATED YET";
110 
111  // ************************************************************************
112  // Initialisation
113 
114  // Declaration of the needed string stream
115  std::stringstream output;
116  std::stringstream terminalStream;
117  std::stringstream nonTerminalStream;
118  std::stringstream arcstream;
119 
120  // First line for the toDot
121  output << std::endl << "digraph \" OPTIMAL POLICY \" {" << std::endl;
122 
123  // Form line for the internal node stream en the terminal node stream
124  terminalStream << "node [shape = box];" << std::endl;
125  nonTerminalStream << "node [shape = ellipse];" << std::endl;
126 
127  // For somme clarity in the final string
128  std::string tab = "\t";
129 
130  // To know if we already checked a node or not
131  Set< NodeId > visited;
132 
133  // FIFO of nodes to visit
134  std::queue< NodeId > fifo;
135 
136  // Loading the FIFO
137  fifo.push(_optimalPolicy->root());
138  visited << _optimalPolicy->root();
139 
140 
141  // ************************************************************************
142  // Main loop
143  while (!fifo.empty()) {
144  // Node to visit
145  NodeId currentNodeId = fifo.front();
146  fifo.pop();
147 
148  // Checking if it is terminal
149  if (_optimalPolicy->isTerminalNode(currentNodeId)) {
150  // Get back the associated ActionSet
151  ActionSet ase = _optimalPolicy->nodeValue(currentNodeId);
152 
153  // Creating a line for this node
154  terminalStream << tab << currentNodeId << ";" << tab << currentNodeId
155  << " [label=\"" << currentNodeId << " - ";
156 
157  // Enumerating and adding to the line the associated optimal actions
158  for (SequenceIteratorSafe< Idx > valIter = ase.beginSafe();
159  valIter != ase.endSafe();
160  ++valIter)
161  terminalStream << _fmdp->actionName(*valIter) << " ";
162 
163  // Terminating line
164  terminalStream << "\"];" << std::endl;
165  continue;
166  }
167 
168  // Either wise
169  {
170  // Geting back the associated internal node
171  const InternalNode* currentNode = _optimalPolicy->node(currentNodeId);
172 
173  // Creating a line in internalnode stream for this node
174  nonTerminalStream << tab << currentNodeId << ";" << tab << currentNodeId
175  << " [label=\"" << currentNodeId << " - "
176  << currentNode->nodeVar()->name() << "\"];" << std::endl;
177 
178  // Going through the sons and agregating them according the the sons Ids
180  for (Idx sonIter = 0; sonIter < currentNode->nbSons(); ++sonIter) {
181  if (!visited.exists(currentNode->son(sonIter))) {
182  fifo.push(currentNode->son(sonIter));
183  visited << currentNode->son(sonIter);
184  }
185  if (!sonMap.exists(currentNode->son(sonIter)))
186  sonMap.insert(currentNode->son(sonIter), new LinkedList< Idx >());
187  sonMap[currentNode->son(sonIter)]->addLink(sonIter);
188  }
189 
190  // Adding to the arc stram
191  for (auto sonIter = sonMap.beginSafe(); sonIter != sonMap.endSafe();
192  ++sonIter) {
193  arcstream << tab << currentNodeId << " -> " << sonIter.key()
194  << " [label=\" ";
195  Link< Idx >* modaIter = sonIter.val()->list();
196  while (modaIter) {
197  arcstream << currentNode->nodeVar()->label(modaIter->element());
198  if (modaIter->nextLink()) arcstream << ", ";
199  modaIter = modaIter->nextLink();
200  }
201  arcstream << "\",color=\"#00ff00\"];" << std::endl;
202  delete sonIter.val();
203  }
204  }
205  }
206 
207  // Terminating
208  output << terminalStream.str() << std::endl
209  << nonTerminalStream.str() << std::endl
210  << arcstream.str() << std::endl
211  << "}" << std::endl;
212 
213  return output.str();
214  }
215 
216 
217  /* **************************************************************************************************
218  * **/
219  /* ** **/
220  /* ** Planning Methods **/
221  /* ** **/
222  /* **************************************************************************************************
223  * **/
224 
225  // ===========================================================================
226  // Initializes data structure needed for making the planning
227  // ===========================================================================
228  template < typename GUM_SCALAR >
230  _fmdp = fmdp;
231 
232  // Determination of the threshold value
233  __threshold *= (1 - _discountFactor) / (2 * _discountFactor);
234 
235  // Establishement of sequence of variable elemination
236  for (auto varIter = _fmdp->beginVariables(); varIter != _fmdp->endVariables();
237  ++varIter)
238  _elVarSeq << _fmdp->main2prime(*varIter);
239 
240  // Initialisation of the value function
241  _vFunction = _operator->getFunctionInstance();
242  _optimalPolicy = _operator->getAggregatorInstance();
243  __firstTime = true;
244  }
245 
246 
247  // ===========================================================================
248  // Performs a value iteration
249  // ===========================================================================
250  template < typename GUM_SCALAR >
252  if (__firstTime) {
253  this->_initVFunction();
254  __firstTime = false;
255  }
256 
257  // *****************************************************************************************
258  // Main loop
259  // *****************************************************************************************
260  Idx nbIte = 0;
261  GUM_SCALAR gap = __threshold + 1;
262  while ((gap > __threshold) && (nbIte < nbStep)) {
263  ++nbIte;
264 
265  MultiDimFunctionGraph< GUM_SCALAR >* newVFunction = this->_valueIteration();
266 
267  // *****************************************************************************************
268  // Then we compare new value function and the old one
270  _operator->subtract(newVFunction, _vFunction);
271  gap = 0;
272 
273  for (deltaV->beginValues(); deltaV->hasValue(); deltaV->nextValue())
274  if (gap < fabs(deltaV->value())) gap = fabs(deltaV->value());
275  delete deltaV;
276 
277  if (_verbose)
278  std::cout << " ------------------- Fin itération n° " << nbIte << std::endl
279  << " Gap : " << gap << " - " << __threshold << std::endl;
280 
281  // *****************************************************************************************
282  // And eventually we update pointers for next loop
283  delete _vFunction;
284  _vFunction = newVFunction;
285  }
286 
287  // *****************************************************************************************
288  // Policy matching value function research
289  // *****************************************************************************************
290  this->_evalPolicy();
291  }
292 
293 
294  // ===========================================================================
295  // Performs a single step of value iteration
296  // ===========================================================================
297  template < typename GUM_SCALAR >
299  _vFunction->copy(*(RECAST(_fmdp->reward())));
300  }
301 
302  /* **************************************************************************************************
303  * **/
304  /* ** **/
305  /* ** Value Iteration Methods **/
306  /* ** **/
307  /* **************************************************************************************************
308  * **/
309 
310 
311  // ===========================================================================
312  // Performs a single step of value iteration
313  // ===========================================================================
314  template < typename GUM_SCALAR >
317  // *****************************************************************************************
318  // Loop reset
320  _operator->getFunctionInstance();
321  newVFunction->copyAndReassign(*_vFunction, _fmdp->mapMainPrime());
322 
323  // *****************************************************************************************
324  // For each action
325  std::vector< MultiDimFunctionGraph< GUM_SCALAR >* > qActionsSet;
326  for (auto actionIter = _fmdp->beginActions();
327  actionIter != _fmdp->endActions();
328  ++actionIter) {
330  this->_evalQaction(newVFunction, *actionIter);
331  qActionsSet.push_back(qAction);
332  }
333  delete newVFunction;
334 
335  // *****************************************************************************************
336  // Next to evaluate main value function, we take maximise over all action
337  // value, ...
338  newVFunction = this->_maximiseQactions(qActionsSet);
339 
340  // *******************************************************************************************
341  // Next, we eval the new function value
342  newVFunction = this->_addReward(newVFunction);
343 
344  return newVFunction;
345  }
346 
347 
348  // ===========================================================================
349  // Evals the q function for current fmdp action
350  // ===========================================================================
351  template < typename GUM_SCALAR >
354  const MultiDimFunctionGraph< GUM_SCALAR >* Vold, Idx actionId) {
355  // ******************************************************************************
356  // Initialisation :
357  // Creating a copy of last Vfunction to deduce from the new Qaction
358  // And finding the first var to eleminate (the one at the end)
359 
360  return _operator->regress(Vold, actionId, this->_fmdp, this->_elVarSeq);
361  }
362 
363 
364  // ===========================================================================
365  // Maximise the AAction to iobtain the vFunction
366  // ===========================================================================
367  template < typename GUM_SCALAR >
370  std::vector< MultiDimFunctionGraph< GUM_SCALAR >* >& qActionsSet) {
371  MultiDimFunctionGraph< GUM_SCALAR >* newVFunction = qActionsSet.back();
372  qActionsSet.pop_back();
373 
374  while (!qActionsSet.empty()) {
375  MultiDimFunctionGraph< GUM_SCALAR >* qAction = qActionsSet.back();
376  qActionsSet.pop_back();
377  newVFunction = _operator->maximize(newVFunction, qAction);
378  }
379 
380  return newVFunction;
381  }
382 
383 
384  // ===========================================================================
385  // Maximise the AAction to iobtain the vFunction
386  // ===========================================================================
387  template < typename GUM_SCALAR >
390  std::vector< MultiDimFunctionGraph< GUM_SCALAR >* >& qActionsSet) {
391  MultiDimFunctionGraph< GUM_SCALAR >* newVFunction = qActionsSet.back();
392  qActionsSet.pop_back();
393 
394  while (!qActionsSet.empty()) {
395  MultiDimFunctionGraph< GUM_SCALAR >* qAction = qActionsSet.back();
396  qActionsSet.pop_back();
397  newVFunction = _operator->minimize(newVFunction, qAction);
398  }
399 
400  return newVFunction;
401  }
402 
403 
404  // ===========================================================================
405  // Updates the value function by multiplying by discount and adding reward
406  // ===========================================================================
407  template < typename GUM_SCALAR >
409  MultiDimFunctionGraph< GUM_SCALAR >* Vold, Idx actionId) {
410  // *****************************************************************************************
411  // ... we multiply the result by the discount factor, ...
413  _operator->getFunctionInstance();
414  newVFunction->copyAndMultiplyByScalar(*Vold, this->_discountFactor);
415  delete Vold;
416 
417  // *****************************************************************************************
418  // ... and finally add reward
419  newVFunction = _operator->add(newVFunction, RECAST(_fmdp->reward(actionId)));
420 
421  return newVFunction;
422  }
423 
424 
425  /* **************************************************************************************************
426  * **/
427  /* ** **/
428  /* ** Optimal Policy Evaluation Methods **/
429  /* ** **/
430  /* **************************************************************************************************
431  * **/
432 
433  // ===========================================================================
434  // Evals the policy corresponding to the given value function
435  // ===========================================================================
436  template < typename GUM_SCALAR >
438  // *****************************************************************************************
439  // Loop reset
441  _operator->getFunctionInstance();
442  newVFunction->copyAndReassign(*_vFunction, _fmdp->mapMainPrime());
443 
444  std::vector< MultiDimFunctionGraph< ArgMaxSet< GUM_SCALAR, Idx >,
446  argMaxQActionsSet;
447  // *****************************************************************************************
448  // For each action
449  for (auto actionIter = _fmdp->beginActions();
450  actionIter != _fmdp->endActions();
451  ++actionIter) {
453  this->_evalQaction(newVFunction, *actionIter);
454 
455  qAction = this->_addReward(qAction);
456 
457  argMaxQActionsSet.push_back(_makeArgMax(qAction, *actionIter));
458  }
459  delete newVFunction;
460 
461 
462  // *****************************************************************************************
463  // Next to evaluate main value function, we take maximise over all action
464  // value, ...
466  argMaxVFunction = _argmaximiseQactions(argMaxQActionsSet);
467 
468  // *****************************************************************************************
469  // Next to evaluate main value function, we take maximise over all action
470  // value, ...
471  _extractOptimalPolicy(argMaxVFunction);
472  }
473 
474 
475  // ===========================================================================
476  // Creates a copy of given in parameter decision Graph and replaces leaves
477  // of that Graph by a pair containing value of the leaf and action to which
478  // is bind this Graph (given in parameter).
479  // ===========================================================================
480  template < typename GUM_SCALAR >
483  const MultiDimFunctionGraph< GUM_SCALAR >* qAction, Idx actionId) {
485  amcpy = _operator->getArgMaxFunctionInstance();
486 
487  // Insertion des nouvelles variables
489  qAction->variablesSequence().beginSafe();
490  varIter != qAction->variablesSequence().endSafe();
491  ++varIter)
492  amcpy->add(**varIter);
493 
495  amcpy->manager()->setRootNode(
496  __recurArgMaxCopy(qAction->root(), actionId, qAction, amcpy, src2dest));
497 
498  delete qAction;
499  return amcpy;
500  }
501 
502 
503  // ==========================================================================
504  // Recursion part for the createArgMaxCopy
505  // ==========================================================================
506  template < typename GUM_SCALAR >
508  NodeId currentNodeId,
509  Idx actionId,
512  argMaxCpy,
513  HashTable< NodeId, NodeId >& visitedNodes) {
514  if (visitedNodes.exists(currentNodeId)) return visitedNodes[currentNodeId];
515 
516  NodeId nody;
517  if (src->isTerminalNode(currentNodeId)) {
518  ArgMaxSet< GUM_SCALAR, Idx > leaf(src->nodeValue(currentNodeId), actionId);
519  nody = argMaxCpy->manager()->addTerminalNode(leaf);
520  } else {
521  const InternalNode* currentNode = src->node(currentNodeId);
522  NodeId* sonsMap = static_cast< NodeId* >(
523  SOA_ALLOCATE(sizeof(NodeId) * currentNode->nodeVar()->domainSize()));
524  for (Idx moda = 0; moda < currentNode->nodeVar()->domainSize(); ++moda)
525  sonsMap[moda] = __recurArgMaxCopy(
526  currentNode->son(moda), actionId, src, argMaxCpy, visitedNodes);
527  nody =
528  argMaxCpy->manager()->addInternalNode(currentNode->nodeVar(), sonsMap);
529  }
530  visitedNodes.insert(currentNodeId, nody);
531  return nody;
532  }
533 
534 
535  // ===========================================================================
536  // Performs argmax_a Q(s,a)
537  // ===========================================================================
538  template < typename GUM_SCALAR >
543  qActionsSet) {
545  newVFunction = qActionsSet.back();
546  qActionsSet.pop_back();
547 
548  while (!qActionsSet.empty()) {
550  qAction = qActionsSet.back();
551  qActionsSet.pop_back();
552  newVFunction = _operator->argmaximize(newVFunction, qAction);
553  }
554 
555  return newVFunction;
556  }
557 
558  // ===========================================================================
559  // Creates a copy of given in parameter decision Graph and replaces leaves
560  // of that Graph by a pair containing value of the leaf and action to which
561  // is bind this Graph (given in parameter).
562  // ===========================================================================
563  template < typename GUM_SCALAR >
567  argMaxOptimalValueFunction) {
568  _optimalPolicy->clear();
569 
570  // Insertion des nouvelles variables
572  argMaxOptimalValueFunction->variablesSequence().beginSafe();
573  varIter != argMaxOptimalValueFunction->variablesSequence().endSafe();
574  ++varIter)
575  _optimalPolicy->add(**varIter);
576 
578  _optimalPolicy->manager()->setRootNode(__recurExtractOptPol(
579  argMaxOptimalValueFunction->root(), argMaxOptimalValueFunction, src2dest));
580 
581  delete argMaxOptimalValueFunction;
582  }
583 
584 
585  // ==========================================================================
586  // Recursion part for the createArgMaxCopy
587  // ==========================================================================
588  template < typename GUM_SCALAR >
590  NodeId currentNodeId,
592  SetTerminalNodePolicy >* argMaxOptVFunc,
593  HashTable< NodeId, NodeId >& visitedNodes) {
594  if (visitedNodes.exists(currentNodeId)) return visitedNodes[currentNodeId];
595 
596  NodeId nody;
597  if (argMaxOptVFunc->isTerminalNode(currentNodeId)) {
598  ActionSet leaf;
599  __transferActionIds(argMaxOptVFunc->nodeValue(currentNodeId), leaf);
600  nody = _optimalPolicy->manager()->addTerminalNode(leaf);
601  } else {
602  const InternalNode* currentNode = argMaxOptVFunc->node(currentNodeId);
603  NodeId* sonsMap = static_cast< NodeId* >(
604  SOA_ALLOCATE(sizeof(NodeId) * currentNode->nodeVar()->domainSize()));
605  for (Idx moda = 0; moda < currentNode->nodeVar()->domainSize(); ++moda)
606  sonsMap[moda] = __recurExtractOptPol(
607  currentNode->son(moda), argMaxOptVFunc, visitedNodes);
608  nody = _optimalPolicy->manager()->addInternalNode(currentNode->nodeVar(),
609  sonsMap);
610  }
611  visitedNodes.insert(currentNodeId, nody);
612  return nody;
613  }
614 
615  // ==========================================================================
616  // Extract from an ArgMaxSet the associated ActionSet
617  // ==========================================================================
618  template < typename GUM_SCALAR >
620  const ArgMaxSet< GUM_SCALAR, Idx >& src, ActionSet& dest) {
621  for (auto idi = src.beginSafe(); idi != src.endSafe(); ++idi)
622  dest += *idi;
623  }
624 
625 
626 } // end of namespace gum
SequenceIteratorSafe< GUM_SCALAR_SEQ > endSafe() const
Iterator end.
Definition: argMaxSet.h:115
Copyright 2005-2019 Pierre-Henri WUILLEMIN et Christophe GONZALES (LIP6) {prenom.nom}_at_lip6.fr.
<agrum/FMDP/planning/structuredPlaner.h>
void nextValue() const
Increments the constant safe iterator.
bool isTerminalNode(const NodeId &node) const
Indicates if given node is terminal or not.
void setRootNode(const NodeId &root)
Sets root node of decision diagram.
void beginValues() const
Initializes the constant safe iterator on terminal nodes.
A class to store the optimal actions.
Definition: actionSet.h:88
void copyAndMultiplyByScalar(const MultiDimFunctionGraph< GUM_SCALAR, TerminalNodePolicy > &src, GUM_SCALAR gamma)
Copies src diagrams and multiply every value by the given scalar.
const iterator_safe & endSafe() noexcept
Returns the safe iterator pointing to the end of the hashtable.
const InternalNode * node(NodeId n) const
Returns internalNode structure associated to that nodeId.
const DiscreteVariable * nodeVar() const
Returns the node variable.
Idx nbSons() const
Returns the number of sons.
#define RECAST(x)
For shorter line and hence more comprehensive code purposes only.
void copyAndReassign(const MultiDimFunctionGraph< GUM_SCALAR, TerminalNodePolicy > &src, const Bijection< const DiscreteVariable *, const DiscreteVariable * > &reassign)
Copies src diagrams structure into this diagrams.
<agrum/FMDP/SDyna/IOperatorStrategy.h>
NodeId son(Idx modality) const
Returns the son at a given index.
bool exists(const Key &key) const
Checks whether there exists an element with a given key in the hashtable.
const Key & key(const Key &key) const
Returns a reference on a given key.
This class is used to implement factored decision process.
Definition: fmdp.h:57
Copyright 2005-2019 Pierre-Henri WUILLEMIN et Christophe GONZALES (LIP6) {prenom.nom}_at_lip6.fr.
Definition: agrum.h:25
Copyright 2005-2019 Pierre-Henri WUILLEMIN et Christophe GONZALES (LIP6) {prenom.nom}_at_lip6.fr.
The class for generic Hash Tables.
Definition: hashTable.h:679
Class to handle efficiently argMaxSet.
Definition: argMaxSet.h:57
SequenceIteratorSafe< Idx > endSafe() const
Iterator end.
Definition: actionSet.h:152
Copyright 2005-2019 Pierre-Henri WUILLEMIN et Christophe GONZALES (LIP6) {prenom.nom}_at_lip6.fr.
virtual Size domainSize() const =0
StructuredPlaner(IOperatorStrategy< GUM_SCALAR > *opi, GUM_SCALAR discountFactor, GUM_SCALAR epsilon, bool verbose)
Default constructor.
virtual void add(const DiscreteVariable &v)
Adds a new var to the variables of the multidimensional matrix.
bool exists(const Key &k) const
Indicates whether a given elements belong to the set.
Definition: set_tpl.h:607
Copyright 2005-2019 Pierre-Henri WUILLEMIN et Christophe GONZALES (LIP6) {prenom.nom}_at_lip6.fr.
const GUM_SCALAR & nodeValue(NodeId n) const
Returns value associated to given node.
virtual std::string label(Idx i) const =0
get the indice-th label. This method is pure virtual.
Structure used to represent a node internal structure.
Definition: internalNode.h:102
bool hasValue() const
Indicates if constant safe iterator has reach end of terminal nodes list.
const DiscreteVariable * main2prime(const DiscreteVariable *mainVar) const
Returns the primed variable associate to the given main variable.
Definition: fmdp.h:109
Copyright 2005-2019 Pierre-Henri WUILLEMIN et Christophe GONZALES (LIP6) {prenom.nom}_at_lip6.fr.
const NodeId & root() const
Returns the id of the root node from the diagram.
SequenceIteratorSafe< GUM_SCALAR_SEQ > beginSafe() const
Iterator beginning.
Definition: argMaxSet.h:108
Copyright 2005-2019 Pierre-Henri WUILLEMIN et Christophe GONZALES (LIP6) {prenom.nom}_at_lip6.fr.
SequenceIteratorSafe< Idx > beginSafe() const
Iterator beginning.
Definition: actionSet.h:145
Implementation of a Terminal Node Policy that maps nodeid to a set of value.
virtual const Sequence< const DiscreteVariable *> & variablesSequence() const override
Returns a const ref to the sequence of DiscreteVariable*.
Chain list allocated using the SmallObjectAllocator.
Definition: link.h:134
iterator_safe beginSafe()
Returns the safe iterator pointing to the beginning of the hashtable.
Size Idx
Type for indexes.
Definition: types.h:53
MultiDimFunctionGraphManager< GUM_SCALAR, TerminalNodePolicy > * manager()
Returns a const reference to the manager of this diagram.
const GUM_SCALAR & value() const
Returns the value of the current terminal nodes pointed by the constant safe iterator.
value_type & insert(const Key &key, const Val &val)
Adds a new element (actually a copy of this element) into the hash table.
const std::string & name() const
returns the name of the variable
Size NodeId
Type for node ids.
Definition: graphElements.h:98
#define SOA_ALLOCATE(x)