aGrUM  0.14.2
structuredPlaner_tpl.h
Go to the documentation of this file.
1 /***************************************************************************
2  * Copyright (C) 2005 by Pierre-Henri WUILLEMIN et Christophe GONZALES *
3  * {prenom.nom}_at_lip6.fr *
4  * *
5  * This program is free software; you can redistribute it and/or modify *
6  * it under the terms of the GNU General Public License as published by *
7  * the Free Software Foundation; either version 2 of the License, or *
8  * (at your option) any later version. *
9  * *
10  * This program is distributed in the hope that it will be useful, *
11  * but WITHOUT ANY WARRANTY; without even the implied warranty of *
12  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the *
13  * GNU General Public License for more details. *
14  * *
15  * You should have received a copy of the GNU General Public License *
16  * along with this program; if not, write to the *
17  * Free Software Foundation, Inc., *
18  * 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. *
19  ***************************************************************************/
27 // =========================================================================
28 #include <queue>
29 #include <vector>
30 //#include <algorithm>
31 //#include <utility>
32 // =========================================================================
33 #include <agrum/core/math/math.h>
34 #include <agrum/core/functors.h>
35 // =========================================================================
39 // =========================================================================
41 // =========================================================================
42 
44 #define RECAST(x) reinterpret_cast< const MultiDimFunctionGraph< GUM_SCALAR >* >(x)
45 
46 namespace gum {
47 
48 
49  /* **************************************************************************************************
50  * **/
51  /* ** **/
52  /* ** Constructors / Destructors **/
53  /* ** **/
54  /* **************************************************************************************************
55  * **/
56 
57  // ===========================================================================
58  // Default constructor
59  // ===========================================================================
60  template < typename GUM_SCALAR >
63  GUM_SCALAR discountFactor,
64  GUM_SCALAR epsilon,
65  bool verbose) :
66  _discountFactor(discountFactor),
67  _operator(opi), _verbose(verbose) {
68  GUM_CONSTRUCTOR(StructuredPlaner);
69 
70  __threshold = epsilon;
71  _vFunction = nullptr;
72  _optimalPolicy = nullptr;
73  }
74 
75  // ===========================================================================
76  // Default destructor
77  // ===========================================================================
78  template < typename GUM_SCALAR >
80  GUM_DESTRUCTOR(StructuredPlaner);
81 
82  if (_vFunction) { delete _vFunction; }
83 
84  if (_optimalPolicy) delete _optimalPolicy;
85 
86  delete _operator;
87  }
88 
89 
90  /* **************************************************************************************************
91  * **/
92  /* ** **/
93  /* ** Datastructure access methods **/
94  /* ** **/
95  /* **************************************************************************************************
96  * **/
97 
98  // ===========================================================================
99  // Initializes data structure needed for making the planning
100  // ===========================================================================
101  template < typename GUM_SCALAR >
103  // ************************************************************************
104  // Discarding the case where no \pi* have been computed
105  if (!_optimalPolicy || _optimalPolicy->root() == 0)
106  return "NO OPTIMAL POLICY CALCULATED YET";
107 
108  // ************************************************************************
109  // Initialisation
110 
111  // Declaration of the needed string stream
112  std::stringstream output;
113  std::stringstream terminalStream;
114  std::stringstream nonTerminalStream;
115  std::stringstream arcstream;
116 
117  // First line for the toDot
118  output << std::endl << "digraph \" OPTIMAL POLICY \" {" << std::endl;
119 
120  // Form line for the internal node stream en the terminal node stream
121  terminalStream << "node [shape = box];" << std::endl;
122  nonTerminalStream << "node [shape = ellipse];" << std::endl;
123 
124  // For somme clarity in the final string
125  std::string tab = "\t";
126 
127  // To know if we already checked a node or not
128  Set< NodeId > visited;
129 
130  // FIFO of nodes to visit
131  std::queue< NodeId > fifo;
132 
133  // Loading the FIFO
134  fifo.push(_optimalPolicy->root());
135  visited << _optimalPolicy->root();
136 
137 
138  // ************************************************************************
139  // Main loop
140  while (!fifo.empty()) {
141  // Node to visit
142  NodeId currentNodeId = fifo.front();
143  fifo.pop();
144 
145  // Checking if it is terminal
146  if (_optimalPolicy->isTerminalNode(currentNodeId)) {
147  // Get back the associated ActionSet
148  ActionSet ase = _optimalPolicy->nodeValue(currentNodeId);
149 
150  // Creating a line for this node
151  terminalStream << tab << currentNodeId << ";" << tab << currentNodeId
152  << " [label=\"" << currentNodeId << " - ";
153 
154  // Enumerating and adding to the line the associated optimal actions
155  for (SequenceIteratorSafe< Idx > valIter = ase.beginSafe();
156  valIter != ase.endSafe();
157  ++valIter)
158  terminalStream << _fmdp->actionName(*valIter) << " ";
159 
160  // Terminating line
161  terminalStream << "\"];" << std::endl;
162  continue;
163  }
164 
165  // Either wise
166  {
167  // Geting back the associated internal node
168  const InternalNode* currentNode = _optimalPolicy->node(currentNodeId);
169 
170  // Creating a line in internalnode stream for this node
171  nonTerminalStream << tab << currentNodeId << ";" << tab << currentNodeId
172  << " [label=\"" << currentNodeId << " - "
173  << currentNode->nodeVar()->name() << "\"];" << std::endl;
174 
175  // Going through the sons and agregating them according the the sons Ids
177  for (Idx sonIter = 0; sonIter < currentNode->nbSons(); ++sonIter) {
178  if (!visited.exists(currentNode->son(sonIter))) {
179  fifo.push(currentNode->son(sonIter));
180  visited << currentNode->son(sonIter);
181  }
182  if (!sonMap.exists(currentNode->son(sonIter)))
183  sonMap.insert(currentNode->son(sonIter), new LinkedList< Idx >());
184  sonMap[currentNode->son(sonIter)]->addLink(sonIter);
185  }
186 
187  // Adding to the arc stram
188  for (auto sonIter = sonMap.beginSafe(); sonIter != sonMap.endSafe();
189  ++sonIter) {
190  arcstream << tab << currentNodeId << " -> " << sonIter.key()
191  << " [label=\" ";
192  Link< Idx >* modaIter = sonIter.val()->list();
193  while (modaIter) {
194  arcstream << currentNode->nodeVar()->label(modaIter->element());
195  if (modaIter->nextLink()) arcstream << ", ";
196  modaIter = modaIter->nextLink();
197  }
198  arcstream << "\",color=\"#00ff00\"];" << std::endl;
199  delete sonIter.val();
200  }
201  }
202  }
203 
204  // Terminating
205  output << terminalStream.str() << std::endl
206  << nonTerminalStream.str() << std::endl
207  << arcstream.str() << std::endl
208  << "}" << std::endl;
209 
210  return output.str();
211  }
212 
213 
214  /* **************************************************************************************************
215  * **/
216  /* ** **/
217  /* ** Planning Methods **/
218  /* ** **/
219  /* **************************************************************************************************
220  * **/
221 
222  // ===========================================================================
223  // Initializes data structure needed for making the planning
224  // ===========================================================================
225  template < typename GUM_SCALAR >
227  _fmdp = fmdp;
228 
229  // Determination of the threshold value
230  __threshold *= (1 - _discountFactor) / (2 * _discountFactor);
231 
232  // Establishement of sequence of variable elemination
233  for (auto varIter = _fmdp->beginVariables(); varIter != _fmdp->endVariables();
234  ++varIter)
235  _elVarSeq << _fmdp->main2prime(*varIter);
236 
237  // Initialisation of the value function
238  _vFunction = _operator->getFunctionInstance();
239  _optimalPolicy = _operator->getAggregatorInstance();
240  __firstTime = true;
241  }
242 
243 
244  // ===========================================================================
245  // Performs a value iteration
246  // ===========================================================================
247  template < typename GUM_SCALAR >
249  if (__firstTime) {
250  this->_initVFunction();
251  __firstTime = false;
252  }
253 
254  // *****************************************************************************************
255  // Main loop
256  // *****************************************************************************************
257  Idx nbIte = 0;
258  GUM_SCALAR gap = __threshold + 1;
259  while ((gap > __threshold) && (nbIte < nbStep)) {
260  ++nbIte;
261 
262  MultiDimFunctionGraph< GUM_SCALAR >* newVFunction = this->_valueIteration();
263 
264  // *****************************************************************************************
265  // Then we compare new value function and the old one
267  _operator->subtract(newVFunction, _vFunction);
268  gap = 0;
269 
270  for (deltaV->beginValues(); deltaV->hasValue(); deltaV->nextValue())
271  if (gap < fabs(deltaV->value())) gap = fabs(deltaV->value());
272  delete deltaV;
273 
274  if (_verbose)
275  std::cout << " ------------------- Fin itération n° " << nbIte << std::endl
276  << " Gap : " << gap << " - " << __threshold << std::endl;
277 
278  // *****************************************************************************************
279  // And eventually we update pointers for next loop
280  delete _vFunction;
281  _vFunction = newVFunction;
282  }
283 
284  // *****************************************************************************************
285  // Policy matching value function research
286  // *****************************************************************************************
287  this->_evalPolicy();
288  }
289 
290 
291  // ===========================================================================
292  // Performs a single step of value iteration
293  // ===========================================================================
294  template < typename GUM_SCALAR >
296  _vFunction->copy(*(RECAST(_fmdp->reward())));
297  }
298 
299  /* **************************************************************************************************
300  * **/
301  /* ** **/
302  /* ** Value Iteration Methods **/
303  /* ** **/
304  /* **************************************************************************************************
305  * **/
306 
307 
308  // ===========================================================================
309  // Performs a single step of value iteration
310  // ===========================================================================
311  template < typename GUM_SCALAR >
314  // *****************************************************************************************
315  // Loop reset
317  _operator->getFunctionInstance();
318  newVFunction->copyAndReassign(*_vFunction, _fmdp->mapMainPrime());
319 
320  // *****************************************************************************************
321  // For each action
322  std::vector< MultiDimFunctionGraph< GUM_SCALAR >* > qActionsSet;
323  for (auto actionIter = _fmdp->beginActions();
324  actionIter != _fmdp->endActions();
325  ++actionIter) {
327  this->_evalQaction(newVFunction, *actionIter);
328  qActionsSet.push_back(qAction);
329  }
330  delete newVFunction;
331 
332  // *****************************************************************************************
333  // Next to evaluate main value function, we take maximise over all action
334  // value, ...
335  newVFunction = this->_maximiseQactions(qActionsSet);
336 
337  // *******************************************************************************************
338  // Next, we eval the new function value
339  newVFunction = this->_addReward(newVFunction);
340 
341  return newVFunction;
342  }
343 
344 
345  // ===========================================================================
346  // Evals the q function for current fmdp action
347  // ===========================================================================
348  template < typename GUM_SCALAR >
351  const MultiDimFunctionGraph< GUM_SCALAR >* Vold, Idx actionId) {
352  // ******************************************************************************
353  // Initialisation :
354  // Creating a copy of last Vfunction to deduce from the new Qaction
355  // And finding the first var to eleminate (the one at the end)
356 
357  return _operator->regress(Vold, actionId, this->_fmdp, this->_elVarSeq);
358  }
359 
360 
361  // ===========================================================================
362  // Maximise the AAction to iobtain the vFunction
363  // ===========================================================================
364  template < typename GUM_SCALAR >
367  std::vector< MultiDimFunctionGraph< GUM_SCALAR >* >& qActionsSet) {
368  MultiDimFunctionGraph< GUM_SCALAR >* newVFunction = qActionsSet.back();
369  qActionsSet.pop_back();
370 
371  while (!qActionsSet.empty()) {
372  MultiDimFunctionGraph< GUM_SCALAR >* qAction = qActionsSet.back();
373  qActionsSet.pop_back();
374  newVFunction = _operator->maximize(newVFunction, qAction);
375  }
376 
377  return newVFunction;
378  }
379 
380 
381  // ===========================================================================
382  // Maximise the AAction to iobtain the vFunction
383  // ===========================================================================
384  template < typename GUM_SCALAR >
387  std::vector< MultiDimFunctionGraph< GUM_SCALAR >* >& qActionsSet) {
388  MultiDimFunctionGraph< GUM_SCALAR >* newVFunction = qActionsSet.back();
389  qActionsSet.pop_back();
390 
391  while (!qActionsSet.empty()) {
392  MultiDimFunctionGraph< GUM_SCALAR >* qAction = qActionsSet.back();
393  qActionsSet.pop_back();
394  newVFunction = _operator->minimize(newVFunction, qAction);
395  }
396 
397  return newVFunction;
398  }
399 
400 
401  // ===========================================================================
402  // Updates the value function by multiplying by discount and adding reward
403  // ===========================================================================
404  template < typename GUM_SCALAR >
406  MultiDimFunctionGraph< GUM_SCALAR >* Vold, Idx actionId) {
407  // *****************************************************************************************
408  // ... we multiply the result by the discount factor, ...
410  _operator->getFunctionInstance();
411  newVFunction->copyAndMultiplyByScalar(*Vold, this->_discountFactor);
412  delete Vold;
413 
414  // *****************************************************************************************
415  // ... and finally add reward
416  newVFunction = _operator->add(newVFunction, RECAST(_fmdp->reward(actionId)));
417 
418  return newVFunction;
419  }
420 
421 
422  /* **************************************************************************************************
423  * **/
424  /* ** **/
425  /* ** Optimal Policy Evaluation Methods **/
426  /* ** **/
427  /* **************************************************************************************************
428  * **/
429 
430  // ===========================================================================
431  // Evals the policy corresponding to the given value function
432  // ===========================================================================
433  template < typename GUM_SCALAR >
435  // *****************************************************************************************
436  // Loop reset
438  _operator->getFunctionInstance();
439  newVFunction->copyAndReassign(*_vFunction, _fmdp->mapMainPrime());
440 
441  std::vector< MultiDimFunctionGraph< ArgMaxSet< GUM_SCALAR, Idx >,
443  argMaxQActionsSet;
444  // *****************************************************************************************
445  // For each action
446  for (auto actionIter = _fmdp->beginActions();
447  actionIter != _fmdp->endActions();
448  ++actionIter) {
450  this->_evalQaction(newVFunction, *actionIter);
451 
452  qAction = this->_addReward(qAction);
453 
454  argMaxQActionsSet.push_back(_makeArgMax(qAction, *actionIter));
455  }
456  delete newVFunction;
457 
458 
459  // *****************************************************************************************
460  // Next to evaluate main value function, we take maximise over all action
461  // value, ...
463  argMaxVFunction = _argmaximiseQactions(argMaxQActionsSet);
464 
465  // *****************************************************************************************
466  // Next to evaluate main value function, we take maximise over all action
467  // value, ...
468  _extractOptimalPolicy(argMaxVFunction);
469  }
470 
471 
472  // ===========================================================================
473  // Creates a copy of given in parameter decision Graph and replaces leaves
474  // of that Graph by a pair containing value of the leaf and action to which
475  // is bind this Graph (given in parameter).
476  // ===========================================================================
477  template < typename GUM_SCALAR >
480  const MultiDimFunctionGraph< GUM_SCALAR >* qAction, Idx actionId) {
482  amcpy = _operator->getArgMaxFunctionInstance();
483 
484  // Insertion des nouvelles variables
486  qAction->variablesSequence().beginSafe();
487  varIter != qAction->variablesSequence().endSafe();
488  ++varIter)
489  amcpy->add(**varIter);
490 
492  amcpy->manager()->setRootNode(
493  __recurArgMaxCopy(qAction->root(), actionId, qAction, amcpy, src2dest));
494 
495  delete qAction;
496  return amcpy;
497  }
498 
499 
500  // ==========================================================================
501  // Recursion part for the createArgMaxCopy
502  // ==========================================================================
503  template < typename GUM_SCALAR >
505  NodeId currentNodeId,
506  Idx actionId,
509  argMaxCpy,
510  HashTable< NodeId, NodeId >& visitedNodes) {
511  if (visitedNodes.exists(currentNodeId)) return visitedNodes[currentNodeId];
512 
513  NodeId nody;
514  if (src->isTerminalNode(currentNodeId)) {
515  ArgMaxSet< GUM_SCALAR, Idx > leaf(src->nodeValue(currentNodeId), actionId);
516  nody = argMaxCpy->manager()->addTerminalNode(leaf);
517  } else {
518  const InternalNode* currentNode = src->node(currentNodeId);
519  NodeId* sonsMap = static_cast< NodeId* >(
520  SOA_ALLOCATE(sizeof(NodeId) * currentNode->nodeVar()->domainSize()));
521  for (Idx moda = 0; moda < currentNode->nodeVar()->domainSize(); ++moda)
522  sonsMap[moda] = __recurArgMaxCopy(
523  currentNode->son(moda), actionId, src, argMaxCpy, visitedNodes);
524  nody =
525  argMaxCpy->manager()->addInternalNode(currentNode->nodeVar(), sonsMap);
526  }
527  visitedNodes.insert(currentNodeId, nody);
528  return nody;
529  }
530 
531 
532  // ===========================================================================
533  // Performs argmax_a Q(s,a)
534  // ===========================================================================
535  template < typename GUM_SCALAR >
540  qActionsSet) {
542  newVFunction = qActionsSet.back();
543  qActionsSet.pop_back();
544 
545  while (!qActionsSet.empty()) {
547  qAction = qActionsSet.back();
548  qActionsSet.pop_back();
549  newVFunction = _operator->argmaximize(newVFunction, qAction);
550  }
551 
552  return newVFunction;
553  }
554 
555  // ===========================================================================
556  // Creates a copy of given in parameter decision Graph and replaces leaves
557  // of that Graph by a pair containing value of the leaf and action to which
558  // is bind this Graph (given in parameter).
559  // ===========================================================================
560  template < typename GUM_SCALAR >
564  argMaxOptimalValueFunction) {
565  _optimalPolicy->clear();
566 
567  // Insertion des nouvelles variables
569  argMaxOptimalValueFunction->variablesSequence().beginSafe();
570  varIter != argMaxOptimalValueFunction->variablesSequence().endSafe();
571  ++varIter)
572  _optimalPolicy->add(**varIter);
573 
575  _optimalPolicy->manager()->setRootNode(__recurExtractOptPol(
576  argMaxOptimalValueFunction->root(), argMaxOptimalValueFunction, src2dest));
577 
578  delete argMaxOptimalValueFunction;
579  }
580 
581 
582  // ==========================================================================
583  // Recursion part for the createArgMaxCopy
584  // ==========================================================================
585  template < typename GUM_SCALAR >
587  NodeId currentNodeId,
589  SetTerminalNodePolicy >* argMaxOptVFunc,
590  HashTable< NodeId, NodeId >& visitedNodes) {
591  if (visitedNodes.exists(currentNodeId)) return visitedNodes[currentNodeId];
592 
593  NodeId nody;
594  if (argMaxOptVFunc->isTerminalNode(currentNodeId)) {
595  ActionSet leaf;
596  __transferActionIds(argMaxOptVFunc->nodeValue(currentNodeId), leaf);
597  nody = _optimalPolicy->manager()->addTerminalNode(leaf);
598  } else {
599  const InternalNode* currentNode = argMaxOptVFunc->node(currentNodeId);
600  NodeId* sonsMap = static_cast< NodeId* >(
601  SOA_ALLOCATE(sizeof(NodeId) * currentNode->nodeVar()->domainSize()));
602  for (Idx moda = 0; moda < currentNode->nodeVar()->domainSize(); ++moda)
603  sonsMap[moda] = __recurExtractOptPol(
604  currentNode->son(moda), argMaxOptVFunc, visitedNodes);
605  nody = _optimalPolicy->manager()->addInternalNode(currentNode->nodeVar(),
606  sonsMap);
607  }
608  visitedNodes.insert(currentNodeId, nody);
609  return nody;
610  }
611 
612  // ==========================================================================
613  // Extract from an ArgMaxSet the associated ActionSet
614  // ==========================================================================
615  template < typename GUM_SCALAR >
617  const ArgMaxSet< GUM_SCALAR, Idx >& src, ActionSet& dest) {
618  for (auto idi = src.beginSafe(); idi != src.endSafe(); ++idi)
619  dest += *idi;
620  }
621 
622 
623 } // end of namespace gum
SequenceIteratorSafe< GUM_SCALAR_SEQ > endSafe() const
Iterator end.
Definition: argMaxSet.h:113
Useful macros for maths.
<agrum/FMDP/planning/structuredPlaner.h>
void nextValue() const
Increments the constant safe iterator.
bool isTerminalNode(const NodeId &node) const
Indicates if given node is terminal or not.
void setRootNode(const NodeId &root)
Sets root node of decision diagram.
void beginValues() const
Initializes the constant safe iterator on terminal nodes.
A class to store the optimal actions.
Definition: actionSet.h:85
void copyAndMultiplyByScalar(const MultiDimFunctionGraph< GUM_SCALAR, TerminalNodePolicy > &src, GUM_SCALAR gamma)
Copies src diagrams and multiply every value by the given scalar.
const iterator_safe & endSafe() noexcept
Returns the safe iterator pointing to the end of the hashtable.
const InternalNode * node(NodeId n) const
Returns internalNode structure associated to that nodeId.
const DiscreteVariable * nodeVar() const
Returns the node variable.
Idx nbSons() const
Returns the number of sons.
#define RECAST(x)
For shorter line and hence more comprehensive code purposes only.
void copyAndReassign(const MultiDimFunctionGraph< GUM_SCALAR, TerminalNodePolicy > &src, const Bijection< const DiscreteVariable *, const DiscreteVariable * > &reassign)
Copies src diagrams structure into this diagrams.
<agrum/FMDP/SDyna/IOperatorStrategy.h>
NodeId son(Idx modality) const
Returns the son at a given index.
bool exists(const Key &key) const
Checks whether there exists an element with a given key in the hashtable.
const Key & key(const Key &key) const
Returns a reference on a given key.
This class is used to implement factored decision process.
Definition: fmdp.h:54
gum is the global namespace for all aGrUM entities
Definition: agrum.h:25
This files contains several function objects that are not (yet) defined in the STL.
The class for generic Hash Tables.
Definition: hashTable.h:676
Class to handle efficiently argMaxSet.
Definition: argMaxSet.h:55
SequenceIteratorSafe< Idx > endSafe() const
Iterator end.
Definition: actionSet.h:149
Headers of the StructuredPlaner planer class.
virtual Size domainSize() const =0
StructuredPlaner(IOperatorStrategy< GUM_SCALAR > *opi, GUM_SCALAR discountFactor, GUM_SCALAR epsilon, bool verbose)
Default constructor.
virtual void add(const DiscreteVariable &v)
Adds a new var to the variables of the multidimensional matrix.
bool exists(const Key &k) const
Indicates whether a given elements belong to the set.
Definition: set_tpl.h:604
Header of the Potential class.
const GUM_SCALAR & nodeValue(NodeId n) const
Returns value associated to given node.
virtual std::string label(Idx i) const =0
get the indice-th label. This method is pure virtual.
Structure used to represent a node internal structure.
Definition: internalNode.h:100
bool hasValue() const
Indicates if constant safe iterator has reach end of terminal nodes list.
const DiscreteVariable * main2prime(const DiscreteVariable *mainVar) const
Returns the primed variable associate to the given main variable.
Definition: fmdp.h:106
Header files of gum::Instantiation.
const NodeId & root() const
Returns the id of the root node from the diagram.
SequenceIteratorSafe< GUM_SCALAR_SEQ > beginSafe() const
Iterator beginning.
Definition: argMaxSet.h:106
Headers of MultiDimFunctionGraph.
SequenceIteratorSafe< Idx > beginSafe() const
Iterator beginning.
Definition: actionSet.h:142
Implementation of a Terminal Node Policy that maps nodeid to a set of value.
virtual const Sequence< const DiscreteVariable *> & variablesSequence() const override
Returns a const ref to the sequence of DiscreteVariable*.
Chain list allocated using the SmallObjectAllocator.
Definition: link.h:131
iterator_safe beginSafe()
Returns the safe iterator pointing to the beginning of the hashtable.
Size Idx
Type for indexes.
Definition: types.h:50
MultiDimFunctionGraphManager< GUM_SCALAR, TerminalNodePolicy > * manager()
Returns a const reference to the manager of this diagram.
const GUM_SCALAR & value() const
Returns the value of the current terminal nodes pointed by the constant safe iterator.
value_type & insert(const Key &key, const Val &val)
Adds a new element (actually a copy of this element) into the hash table.
const std::string & name() const
returns the name of the variable
Size NodeId
Type for node ids.
Definition: graphElements.h:97
#define SOA_ALLOCATE(x)