aGrUM  0.20.3
a C++ library for (probabilistic) graphical models
structuredPlaner_tpl.h
Go to the documentation of this file.
1 /**
2  *
3  * Copyright (c) 2005-2021 by Pierre-Henri WUILLEMIN(@LIP6) & Christophe GONZALES(@AMU)
4  * info_at_agrum_dot_org
5  *
6  * This library is free software: you can redistribute it and/or modify
7  * it under the terms of the GNU Lesser General Public License as published by
8  * the Free Software Foundation, either version 3 of the License, or
9  * (at your option) any later version.
10  *
11  * This library is distributed in the hope that it will be useful,
12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14  * GNU Lesser General Public License for more details.
15  *
16  * You should have received a copy of the GNU Lesser General Public License
17  * along with this library. If not, see <http://www.gnu.org/licenses/>.
18  *
19  */
20 
21 
22 /**
23  * @file
24  * @brief Template implementation of FMDP/planning/StructuredPlaner.h classes.
25  *
26  * @author Pierre-Henri WUILLEMIN(@LIP6) and Jean-Christophe MAGNAN and Christophe
27  * GONZALES(@AMU)
28  */
29 
30 // =========================================================================
31 #include <queue>
32 #include <vector>
33 //#include <algorithm>
34 //#include <utility>
35 // =========================================================================
36 #include <agrum/tools/core/math/math_utils.h>
37 #include <agrum/tools/core/functors.h>
38 // =========================================================================
39 #include <agrum/tools/multidim/implementations/multiDimFunctionGraph.h>
40 #include <agrum/tools/multidim/instantiation.h>
41 #include <agrum/tools/multidim/potential.h>
42 // =========================================================================
43 #include <agrum/FMDP/planning/structuredPlaner.h>
44 // =========================================================================
45 
46 /// For shorter line and hence more comprehensive code purposes only
47 #define RECAST(x) reinterpret_cast< const MultiDimFunctionGraph< GUM_SCALAR >* >(x)
48 
49 namespace gum {
50 
51 
52  /* **************************************************************************************************
53  * **/
54  /* ** **/
55  /* ** Constructors / Destructors **/
56  /* ** **/
57  /* **************************************************************************************************
58  * **/
59 
60  // ===========================================================================
61  // Default constructor
62  // ===========================================================================
63  template < typename GUM_SCALAR >
64  INLINE StructuredPlaner< GUM_SCALAR >::StructuredPlaner(IOperatorStrategy< GUM_SCALAR >* opi,
67  bool verbose) :
71 
73  vFunction_ = nullptr;
74  optimalPolicy_ = nullptr;
75  }
76 
77  // ===========================================================================
78  // Default destructor
79  // ===========================================================================
80  template < typename GUM_SCALAR >
83 
84  if (vFunction_) { delete vFunction_; }
85 
86  if (optimalPolicy_) delete optimalPolicy_;
87 
88  delete operator_;
89  }
90 
91 
92  /* **************************************************************************************************
93  * **/
94  /* ** **/
95  /* ** Datastructure access methods **/
96  /* ** **/
97  /* **************************************************************************************************
98  * **/
99 
100  // ===========================================================================
101  // Initializes data structure needed for making the planning
102  // ===========================================================================
103  template < typename GUM_SCALAR >
105  // ************************************************************************
106  // Discarding the case where no \pi* have been computed
107  if (!optimalPolicy_ || optimalPolicy_->root() == 0) return "NO OPTIMAL POLICY CALCULATED YET";
108 
109  // ************************************************************************
110  // Initialisation
111 
112  // Declaration of the needed string stream
117 
118  // First line for the toDot
119  output << std::endl << "digraph \" OPTIMAL POLICY \" {" << std::endl;
120 
121  // Form line for the internal node stream en the terminal node stream
122  terminalStream << "node [shape = box];" << std::endl;
123  nonTerminalStream << "node [shape = ellipse];" << std::endl;
124 
125  // For somme clarity in the final string
126  std::string tab = "\t";
127 
128  // To know if we already checked a node or not
129  Set< NodeId > visited;
130 
131  // FIFO of nodes to visit
132  std::queue< NodeId > fifo;
133 
134  // Loading the FIFO
137 
138 
139  // ************************************************************************
140  // Main loop
141  while (!fifo.empty()) {
142  // Node to visit
144  fifo.pop();
145 
146  // Checking if it is terminal
148  // Get back the associated ActionSet
150 
151  // Creating a line for this node
152  terminalStream << tab << currentNodeId << ";" << tab << currentNodeId << " [label=\""
153  << currentNodeId << " - ";
154 
155  // Enumerating and adding to the line the associated optimal actions
157  ++valIter)
158  terminalStream << fmdp_->actionName(*valIter) << " ";
159 
160  // Terminating line
161  terminalStream << "\"];" << std::endl;
162  continue;
163  }
164 
165  // Either wise
166  {
167  // Geting back the associated internal node
169 
170  // Creating a line in internalnode stream for this node
171  nonTerminalStream << tab << currentNodeId << ";" << tab << currentNodeId << " [label=\""
172  << currentNodeId << " - " << currentNode->nodeVar()->name() << "\"];"
173  << std::endl;
174 
175  // Going through the sons and agregating them according the the sons Ids
177  for (Idx sonIter = 0; sonIter < currentNode->nbSons(); ++sonIter) {
181  }
185  }
186 
187  // Adding to the arc stram
188  for (auto sonIter = sonMap.beginSafe(); sonIter != sonMap.endSafe(); ++sonIter) {
189  arcstream << tab << currentNodeId << " -> " << sonIter.key() << " [label=\" ";
190  Link< Idx >* modaIter = sonIter.val()->list();
191  while (modaIter) {
193  if (modaIter->nextLink()) arcstream << ", ";
195  }
196  arcstream << "\",color=\"#00ff00\"];" << std::endl;
197  delete sonIter.val();
198  }
199  }
200  }
201 
202  // Terminating
203  output << terminalStream.str() << std::endl
204  << nonTerminalStream.str() << std::endl
205  << arcstream.str() << std::endl
206  << "}" << std::endl;
207 
208  return output.str();
209  }
210 
211 
212  /* **************************************************************************************************
213  * **/
214  /* ** **/
215  /* ** Planning Methods **/
216  /* ** **/
217  /* **************************************************************************************************
218  * **/
219 
220  // ===========================================================================
221  // Initializes data structure needed for making the planning
222  // ===========================================================================
223  template < typename GUM_SCALAR >
225  fmdp_ = fmdp;
226 
227  // Determination of the threshold value
229 
230  // Establishement of sequence of variable elemination
231  for (auto varIter = fmdp_->beginVariables(); varIter != fmdp_->endVariables(); ++varIter)
233 
234  // Initialisation of the value function
237  _firstTime_ = true;
238  }
239 
240 
241  // ===========================================================================
242  // Performs a value iteration
243  // ===========================================================================
244  template < typename GUM_SCALAR >
246  if (_firstTime_) {
247  this->initVFunction_();
248  _firstTime_ = false;
249  }
250 
251  // *****************************************************************************************
252  // Main loop
253  // *****************************************************************************************
254  Idx nbIte = 0;
255  GUM_SCALAR gap = _threshold_ + 1;
256  while ((gap > _threshold_) && (nbIte < nbStep)) {
257  ++nbIte;
258 
260 
261  // *****************************************************************************************
262  // Then we compare new value function and the old one
264  gap = 0;
265 
267  if (gap < fabs(deltaV->value())) gap = fabs(deltaV->value());
268  delete deltaV;
269 
270  if (verbose_)
271  std::cout << " ------------------- Fin itération n° " << nbIte << std::endl
272  << " Gap : " << gap << " - " << _threshold_ << std::endl;
273 
274  // *****************************************************************************************
275  // And eventually we update pointers for next loop
276  delete vFunction_;
278  }
279 
280  // *****************************************************************************************
281  // Policy matching value function research
282  // *****************************************************************************************
283  this->evalPolicy_();
284  }
285 
286 
287  // ===========================================================================
288  // Performs a single step of value iteration
289  // ===========================================================================
290  template < typename GUM_SCALAR >
292  vFunction_->copy(*(RECAST(fmdp_->reward())));
293  }
294 
295  /* **************************************************************************************************
296  * **/
297  /* ** **/
298  /* ** Value Iteration Methods **/
299  /* ** **/
300  /* **************************************************************************************************
301  * **/
302 
303 
304  // ===========================================================================
305  // Performs a single step of value iteration
306  // ===========================================================================
307  template < typename GUM_SCALAR >
309  // *****************************************************************************************
310  // Loop reset
313 
314  // *****************************************************************************************
315  // For each action
320  }
321  delete newVFunction;
322 
323  // *****************************************************************************************
324  // Next to evaluate main value function, we take maximise over all action
325  // value, ...
327 
328  // *******************************************************************************************
329  // Next, we eval the new function value
331 
332  return newVFunction;
333  }
334 
335 
336  // ===========================================================================
337  // Evals the q function for current fmdp action
338  // ===========================================================================
339  template < typename GUM_SCALAR >
342  Idx actionId) {
343  // ******************************************************************************
344  // Initialisation :
345  // Creating a copy of last Vfunction to deduce from the new Qaction
346  // And finding the first var to eleminate (the one at the end)
347 
348  return operator_->regress(Vold, actionId, this->fmdp_, this->elVarSeq_);
349  }
350 
351 
352  // ===========================================================================
353  // Maximise the AAction to iobtain the vFunction
354  // ===========================================================================
355  template < typename GUM_SCALAR >
360 
361  while (!qActionsSet.empty()) {
365  }
366 
367  return newVFunction;
368  }
369 
370 
371  // ===========================================================================
372  // Maximise the AAction to iobtain the vFunction
373  // ===========================================================================
374  template < typename GUM_SCALAR >
379 
380  while (!qActionsSet.empty()) {
384  }
385 
386  return newVFunction;
387  }
388 
389 
390  // ===========================================================================
391  // Updates the value function by multiplying by discount and adding reward
392  // ===========================================================================
393  template < typename GUM_SCALAR >
396  Idx actionId) {
397  // *****************************************************************************************
398  // ... we multiply the result by the discount factor, ...
401  delete Vold;
402 
403  // *****************************************************************************************
404  // ... and finally add reward
406 
407  return newVFunction;
408  }
409 
410 
411  /* **************************************************************************************************
412  * **/
413  /* ** **/
414  /* ** Optimal Policy Evaluation Methods **/
415  /* ** **/
416  /* **************************************************************************************************
417  * **/
418 
419  // ===========================================================================
420  // Evals the policy corresponding to the given value function
421  // ===========================================================================
422  template < typename GUM_SCALAR >
424  // *****************************************************************************************
425  // Loop reset
428 
431  // *****************************************************************************************
432  // For each action
435 
436  qAction = this->addReward_(qAction);
437 
439  }
440  delete newVFunction;
441 
442 
443  // *****************************************************************************************
444  // Next to evaluate main value function, we take maximise over all action
445  // value, ...
448 
449  // *****************************************************************************************
450  // Next to evaluate main value function, we take maximise over all action
451  // value, ...
453  }
454 
455 
456  // ===========================================================================
457  // Creates a copy of given in parameter decision Graph and replaces leaves
458  // of that Graph by a pair containing value of the leaf and action to which
459  // is bind this Graph (given in parameter).
460  // ===========================================================================
461  template < typename GUM_SCALAR >
464  Idx actionId) {
467 
468  // Insertion des nouvelles variables
472  ++varIter)
473  amcpy->add(**varIter);
474 
478 
479  delete qAction;
480  return amcpy;
481  }
482 
483 
484  // ==========================================================================
485  // Recursion part for the createArgMaxCopy
486  // ==========================================================================
487  template < typename GUM_SCALAR >
490  Idx actionId,
495 
496  NodeId nody;
500  } else {
502  NodeId* sonsMap = static_cast< NodeId* >(
504  for (Idx moda = 0; moda < currentNode->nodeVar()->domainSize(); ++moda)
505  sonsMap[moda]
508  }
510  return nody;
511  }
512 
513 
514  // ===========================================================================
515  // Performs argmax_a Q(s,a)
516  // ===========================================================================
517  template < typename GUM_SCALAR >
523  = qActionsSet.back();
525 
526  while (!qActionsSet.empty()) {
528  = qActionsSet.back();
531  }
532 
533  return newVFunction;
534  }
535 
536  // ===========================================================================
537  // Creates a copy of given in parameter decision Graph and replaces leaves
538  // of that Graph by a pair containing value of the leaf and action to which
539  // is bind this Graph (given in parameter).
540  // ===========================================================================
541  template < typename GUM_SCALAR >
546 
547  // Insertion des nouvelles variables
551  ++varIter)
553 
557  src2dest));
558 
560  }
561 
562 
563  // ==========================================================================
564  // Recursion part for the createArgMaxCopy
565  // ==========================================================================
566  template < typename GUM_SCALAR >
573 
574  NodeId nody;
576  ActionSet leaf;
579  } else {
581  NodeId* sonsMap = static_cast< NodeId* >(
583  for (Idx moda = 0; moda < currentNode->nodeVar()->domainSize(); ++moda)
586  }
588  return nody;
589  }
590 
591  // ==========================================================================
592  // Extract from an ArgMaxSet the associated ActionSet
593  // ==========================================================================
594  template < typename GUM_SCALAR >
596  ActionSet& dest) {
597  for (auto idi = src.beginSafe(); idi != src.endSafe(); ++idi)
598  dest += *idi;
599  }
600 
601 
602 } // end of namespace gum
INLINE void emplace(Args &&... args)
Definition: set_tpl.h:643
#define RECAST(x)
Definition: fmdp_tpl.h:36