aGrUM  0.20.2
a C++ library for (probabilistic) graphical models
structuredPlaner_tpl.h
Go to the documentation of this file.
1 /**
2  *
3  * Copyright 2005-2020 Pierre-Henri WUILLEMIN(@LIP6) & Christophe GONZALES(@AMU)
4  * info_at_agrum_dot_org
5  *
6  * This library is free software: you can redistribute it and/or modify
7  * it under the terms of the GNU Lesser General Public License as published by
8  * the Free Software Foundation, either version 3 of the License, or
9  * (at your option) any later version.
10  *
11  * This library is distributed in the hope that it will be useful,
12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14  * GNU Lesser General Public License for more details.
15  *
16  * You should have received a copy of the GNU Lesser General Public License
17  * along with this library. If not, see <http://www.gnu.org/licenses/>.
18  *
19  */
20 
21 
22 /**
23  * @file
24  * @brief Template implementation of FMDP/planning/StructuredPlaner.h classes.
25  *
26  * @author Pierre-Henri WUILLEMIN(@LIP6) and Jean-Christophe MAGNAN and Christophe
27  * GONZALES(@AMU)
28  */
29 
30 // =========================================================================
31 #include <queue>
32 #include <vector>
33 //#include <algorithm>
34 //#include <utility>
35 // =========================================================================
36 #include <agrum/tools/core/math/math_utils.h>
37 #include <agrum/tools/core/functors.h>
38 // =========================================================================
39 #include <agrum/tools/multidim/implementations/multiDimFunctionGraph.h>
40 #include <agrum/tools/multidim/instantiation.h>
41 #include <agrum/tools/multidim/potential.h>
42 // =========================================================================
43 #include <agrum/FMDP/planning/structuredPlaner.h>
44 // =========================================================================
45 
46 /// For shorter line and hence more comprehensive code purposes only
47 #define RECAST(x) reinterpret_cast< const MultiDimFunctionGraph< GUM_SCALAR >* >(x)
48 
49 namespace gum {
50 
51 
52  /* **************************************************************************************************
53  * **/
54  /* ** **/
55  /* ** Constructors / Destructors **/
56  /* ** **/
57  /* **************************************************************************************************
58  * **/
59 
60  // ===========================================================================
61  // Default constructor
62  // ===========================================================================
63  template < typename GUM_SCALAR >
64  INLINE StructuredPlaner< GUM_SCALAR >::StructuredPlaner(
68  bool verbose) :
72 
74  vFunction_ = nullptr;
75  optimalPolicy_ = nullptr;
76  }
77 
78  // ===========================================================================
79  // Default destructor
80  // ===========================================================================
81  template < typename GUM_SCALAR >
84 
85  if (vFunction_) { delete vFunction_; }
86 
87  if (optimalPolicy_) delete optimalPolicy_;
88 
89  delete operator_;
90  }
91 
92 
93  /* **************************************************************************************************
94  * **/
95  /* ** **/
96  /* ** Datastructure access methods **/
97  /* ** **/
98  /* **************************************************************************************************
99  * **/
100 
101  // ===========================================================================
102  // Initializes data structure needed for making the planning
103  // ===========================================================================
104  template < typename GUM_SCALAR >
106  // ************************************************************************
107  // Discarding the case where no \pi* have been computed
108  if (!optimalPolicy_ || optimalPolicy_->root() == 0)
109  return "NO OPTIMAL POLICY CALCULATED YET";
110 
111  // ************************************************************************
112  // Initialisation
113 
114  // Declaration of the needed string stream
119 
120  // First line for the toDot
121  output << std::endl << "digraph \" OPTIMAL POLICY \" {" << std::endl;
122 
123  // Form line for the internal node stream en the terminal node stream
124  terminalStream << "node [shape = box];" << std::endl;
125  nonTerminalStream << "node [shape = ellipse];" << std::endl;
126 
127  // For somme clarity in the final string
128  std::string tab = "\t";
129 
130  // To know if we already checked a node or not
131  Set< NodeId > visited;
132 
133  // FIFO of nodes to visit
134  std::queue< NodeId > fifo;
135 
136  // Loading the FIFO
139 
140 
141  // ************************************************************************
142  // Main loop
143  while (!fifo.empty()) {
144  // Node to visit
146  fifo.pop();
147 
148  // Checking if it is terminal
150  // Get back the associated ActionSet
152 
153  // Creating a line for this node
155  << " [label=\"" << currentNodeId << " - ";
156 
157  // Enumerating and adding to the line the associated optimal actions
159  valIter != ase.endSafe();
160  ++valIter)
161  terminalStream << fmdp_->actionName(*valIter) << " ";
162 
163  // Terminating line
164  terminalStream << "\"];" << std::endl;
165  continue;
166  }
167 
168  // Either wise
169  {
170  // Geting back the associated internal node
172 
173  // Creating a line in internalnode stream for this node
175  << " [label=\"" << currentNodeId << " - "
176  << currentNode->nodeVar()->name() << "\"];" << std::endl;
177 
178  // Going through the sons and agregating them according the the sons Ids
180  for (Idx sonIter = 0; sonIter < currentNode->nbSons(); ++sonIter) {
184  }
188  }
189 
190  // Adding to the arc stram
191  for (auto sonIter = sonMap.beginSafe(); sonIter != sonMap.endSafe();
192  ++sonIter) {
193  arcstream << tab << currentNodeId << " -> " << sonIter.key()
194  << " [label=\" ";
195  Link< Idx >* modaIter = sonIter.val()->list();
196  while (modaIter) {
198  if (modaIter->nextLink()) arcstream << ", ";
200  }
201  arcstream << "\",color=\"#00ff00\"];" << std::endl;
202  delete sonIter.val();
203  }
204  }
205  }
206 
207  // Terminating
208  output << terminalStream.str() << std::endl
209  << nonTerminalStream.str() << std::endl
210  << arcstream.str() << std::endl
211  << "}" << std::endl;
212 
213  return output.str();
214  }
215 
216 
217  /* **************************************************************************************************
218  * **/
219  /* ** **/
220  /* ** Planning Methods **/
221  /* ** **/
222  /* **************************************************************************************************
223  * **/
224 
225  // ===========================================================================
226  // Initializes data structure needed for making the planning
227  // ===========================================================================
228  template < typename GUM_SCALAR >
230  fmdp_ = fmdp;
231 
232  // Determination of the threshold value
234 
235  // Establishement of sequence of variable elemination
236  for (auto varIter = fmdp_->beginVariables(); varIter != fmdp_->endVariables();
237  ++varIter)
239 
240  // Initialisation of the value function
243  firstTime__ = true;
244  }
245 
246 
247  // ===========================================================================
248  // Performs a value iteration
249  // ===========================================================================
250  template < typename GUM_SCALAR >
252  if (firstTime__) {
253  this->initVFunction_();
254  firstTime__ = false;
255  }
256 
257  // *****************************************************************************************
258  // Main loop
259  // *****************************************************************************************
260  Idx nbIte = 0;
261  GUM_SCALAR gap = threshold__ + 1;
262  while ((gap > threshold__) && (nbIte < nbStep)) {
263  ++nbIte;
264 
266 
267  // *****************************************************************************************
268  // Then we compare new value function and the old one
271  gap = 0;
272 
274  if (gap < fabs(deltaV->value())) gap = fabs(deltaV->value());
275  delete deltaV;
276 
277  if (verbose_)
278  std::cout << " ------------------- Fin itération n° " << nbIte << std::endl
279  << " Gap : " << gap << " - " << threshold__ << std::endl;
280 
281  // *****************************************************************************************
282  // And eventually we update pointers for next loop
283  delete vFunction_;
285  }
286 
287  // *****************************************************************************************
288  // Policy matching value function research
289  // *****************************************************************************************
290  this->evalPolicy_();
291  }
292 
293 
294  // ===========================================================================
295  // Performs a single step of value iteration
296  // ===========================================================================
297  template < typename GUM_SCALAR >
299  vFunction_->copy(*(RECAST(fmdp_->reward())));
300  }
301 
302  /* **************************************************************************************************
303  * **/
304  /* ** **/
305  /* ** Value Iteration Methods **/
306  /* ** **/
307  /* **************************************************************************************************
308  * **/
309 
310 
311  // ===========================================================================
312  // Performs a single step of value iteration
313  // ===========================================================================
314  template < typename GUM_SCALAR >
317  // *****************************************************************************************
318  // Loop reset
322 
323  // *****************************************************************************************
324  // For each action
326  for (auto actionIter = fmdp_->beginActions();
328  ++actionIter) {
332  }
333  delete newVFunction;
334 
335  // *****************************************************************************************
336  // Next to evaluate main value function, we take maximise over all action
337  // value, ...
339 
340  // *******************************************************************************************
341  // Next, we eval the new function value
343 
344  return newVFunction;
345  }
346 
347 
348  // ===========================================================================
349  // Evals the q function for current fmdp action
350  // ===========================================================================
351  template < typename GUM_SCALAR >
355  Idx actionId) {
356  // ******************************************************************************
357  // Initialisation :
358  // Creating a copy of last Vfunction to deduce from the new Qaction
359  // And finding the first var to eleminate (the one at the end)
360 
361  return operator_->regress(Vold, actionId, this->fmdp_, this->elVarSeq_);
362  }
363 
364 
365  // ===========================================================================
366  // Maximise the AAction to iobtain the vFunction
367  // ===========================================================================
368  template < typename GUM_SCALAR >
374 
375  while (!qActionsSet.empty()) {
379  }
380 
381  return newVFunction;
382  }
383 
384 
385  // ===========================================================================
386  // Maximise the AAction to iobtain the vFunction
387  // ===========================================================================
388  template < typename GUM_SCALAR >
394 
395  while (!qActionsSet.empty()) {
399  }
400 
401  return newVFunction;
402  }
403 
404 
405  // ===========================================================================
406  // Updates the value function by multiplying by discount and adding reward
407  // ===========================================================================
408  template < typename GUM_SCALAR >
411  Idx actionId) {
412  // *****************************************************************************************
413  // ... we multiply the result by the discount factor, ...
417  delete Vold;
418 
419  // *****************************************************************************************
420  // ... and finally add reward
422 
423  return newVFunction;
424  }
425 
426 
427  /* **************************************************************************************************
428  * **/
429  /* ** **/
430  /* ** Optimal Policy Evaluation Methods **/
431  /* ** **/
432  /* **************************************************************************************************
433  * **/
434 
435  // ===========================================================================
436  // Evals the policy corresponding to the given value function
437  // ===========================================================================
438  template < typename GUM_SCALAR >
440  // *****************************************************************************************
441  // Loop reset
445 
449  // *****************************************************************************************
450  // For each action
451  for (auto actionIter = fmdp_->beginActions();
453  ++actionIter) {
456 
457  qAction = this->addReward_(qAction);
458 
460  }
461  delete newVFunction;
462 
463 
464  // *****************************************************************************************
465  // Next to evaluate main value function, we take maximise over all action
466  // value, ...
470 
471  // *****************************************************************************************
472  // Next to evaluate main value function, we take maximise over all action
473  // value, ...
475  }
476 
477 
478  // ===========================================================================
479  // Creates a copy of given in parameter decision Graph and replaces leaves
480  // of that Graph by a pair containing value of the leaf and action to which
481  // is bind this Graph (given in parameter).
482  // ===========================================================================
483  template < typename GUM_SCALAR >
487  Idx actionId) {
489  amcpy
491 
492  // Insertion des nouvelles variables
496  ++varIter)
497  amcpy->add(**varIter);
498 
502 
503  delete qAction;
504  return amcpy;
505  }
506 
507 
508  // ==========================================================================
509  // Recursion part for the createArgMaxCopy
510  // ==========================================================================
511  template < typename GUM_SCALAR >
514  Idx actionId,
517  argMaxCpy,
520 
521  NodeId nody;
525  } else {
527  NodeId* sonsMap = static_cast< NodeId* >(
529  for (Idx moda = 0; moda < currentNode->nodeVar()->domainSize(); ++moda)
531  actionId,
532  src,
533  argMaxCpy,
534  visitedNodes);
535  nody
537  }
539  return nody;
540  }
541 
542 
543  // ===========================================================================
544  // Performs argmax_a Q(s,a)
545  // ===========================================================================
546  template < typename GUM_SCALAR >
551  qActionsSet) {
554  = qActionsSet.back();
556 
557  while (!qActionsSet.empty()) {
559  qAction
560  = qActionsSet.back();
563  }
564 
565  return newVFunction;
566  }
567 
568  // ===========================================================================
569  // Creates a copy of given in parameter decision Graph and replaces leaves
570  // of that Graph by a pair containing value of the leaf and action to which
571  // is bind this Graph (given in parameter).
572  // ===========================================================================
573  template < typename GUM_SCALAR >
579 
580  // Insertion des nouvelles variables
584  ++varIter)
586 
591  src2dest));
592 
594  }
595 
596 
597  // ==========================================================================
598  // Recursion part for the createArgMaxCopy
599  // ==========================================================================
600  template < typename GUM_SCALAR >
607 
608  NodeId nody;
610  ActionSet leaf;
613  } else {
615  NodeId* sonsMap = static_cast< NodeId* >(
617  for (Idx moda = 0; moda < currentNode->nodeVar()->domainSize(); ++moda)
620  visitedNodes);
622  sonsMap);
623  }
625  return nody;
626  }
627 
628  // ==========================================================================
629  // Extract from an ArgMaxSet the associated ActionSet
630  // ==========================================================================
631  template < typename GUM_SCALAR >
633  const ArgMaxSet< GUM_SCALAR, Idx >& src,
634  ActionSet& dest) {
635  for (auto idi = src.beginSafe(); idi != src.endSafe(); ++idi)
636  dest += *idi;
637  }
638 
639 
640 } // end of namespace gum
INLINE void emplace(Args &&... args)
Definition: set_tpl.h:669
#define RECAST(x)
Definition: fmdp_tpl.h:36