aGrUM  0.14.2
adaptiveRMaxPlaner.cpp
Go to the documentation of this file.
1 /***************************************************************************
2  * Copyright (C) 2005 by Pierre-Henri WUILLEMIN et Christophe GONZALES *
3  * {prenom.nom}_at_lip6.fr *
4  * *
5  * This program is free software; you can redistribute it and/or modify *
6  * it under the terms of the GNU General Public License as published by *
7  * the Free Software Foundation; either version 2 of the License, or *
8  * (at your option) any later version. *
9  * *
10  * This program is distributed in the hope that it will be useful, *
11  * but WITHOUT ANY WARRANTY; without even the implied warranty of *
12  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the *
13  * GNU General Public License for more details. *
14  * *
15  * You should have received a copy of the GNU General Public License *
16  * along with this program; if not, write to the *
17  * Free Software Foundation, Inc., *
18  * 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. *
19  ***************************************************************************/
27 // =========================================================================
28 #include <queue>
29 #include <vector>
30 //#include <algorithm>
31 //#include <utility>
32 // =========================================================================
33 #include <agrum/core/math/math.h>
34 #include <agrum/core/functors.h>
36 // =========================================================================
40 // =========================================================================
42 // =========================================================================
43 
45 #define RECASTED(x) reinterpret_cast< const MultiDimFunctionGraph< double >* >(x)
46 
47 namespace gum {
48 
49  /* **************************************************************************************************
50  * **/
51  /* ** **/
52  /* ** Constructors / Destructors **/
53  /* ** **/
54  /* **************************************************************************************************
55  * **/
56 
57  // ===========================================================================
58  // Default constructor
59  // ===========================================================================
61  double discountFactor,
62  double epsilon,
63  const ILearningStrategy* learner,
64  bool verbose) :
65  StructuredPlaner(opi, discountFactor, epsilon, verbose),
66  IDecisionStrategy(), __fmdpLearner(learner), __initialized(false) {
67  GUM_CONSTRUCTOR(AdaptiveRMaxPlaner);
68  }
69 
70  // ===========================================================================
71  // Default destructor
72  // ===========================================================================
74  GUM_DESTRUCTOR(AdaptiveRMaxPlaner);
75 
77  __counterTable.beginSafe();
78  scIter != __counterTable.endSafe();
79  ++scIter)
80  delete scIter.val();
81  }
82 
83  /* **************************************************************************************************
84  * **/
85  /* ** **/
86  /* ** Planning Methods **/
87  /* ** **/
88  /* **************************************************************************************************
89  * **/
90 
91  // ==========================================================================
92  // Initializes data structure needed for making the planning
93  // ==========================================================================
95  if (!__initialized) {
98  for (auto actionIter = fmdp->beginActions();
99  actionIter != fmdp->endActions();
100  ++actionIter) {
101  __counterTable.insert(*actionIter, new StatesCounter());
102  __initializedTable.insert(*actionIter, false);
103  }
104  __initialized = true;
105  }
106  }
107 
108  // ===========================================================================
109  // Performs a value iteration
110  // ===========================================================================
113 
115 
116  __clearTables();
117  }
118 
119  /* **************************************************************************************************
120  * **/
121  /* ** **/
122  /* ** Value Iteration Methods **/
123  /* ** **/
124  /* **************************************************************************************************
125  * **/
126 
127  // ===========================================================================
128  // Performs a single step of value iteration
129  // ===========================================================================
133  for (auto actionIter = _fmdp->beginActions();
134  actionIter != _fmdp->endActions();
135  ++actionIter)
136  _vFunction = this->_operator->add(
137  _vFunction, RECASTED(this->_fmdp->reward(*actionIter)), 1);
138  }
139 
140  // ===========================================================================
141  // Performs a single step of value iteration
142  // ===========================================================================
144  // *****************************************************************************************
145  // Loop reset
146  MultiDimFunctionGraph< double >* newVFunction =
148  newVFunction->copyAndReassign(*_vFunction, _fmdp->mapMainPrime());
149 
150  // *****************************************************************************************
151  // For each action
152  std::vector< MultiDimFunctionGraph< double >* > qActionsSet;
153  for (auto actionIter = _fmdp->beginActions();
154  actionIter != _fmdp->endActions();
155  ++actionIter) {
157  _evalQaction(newVFunction, *actionIter);
158 
159  // *******************************************************************************************
160  // Next, we add the reward
161  qAction = _addReward(qAction, *actionIter);
162 
163  qAction = this->_operator->maximize(
164  __actionsRMaxTable[*actionIter],
165  this->_operator->multiply(qAction, __actionsBoolTable[*actionIter], 1),
166  2);
167 
168  qActionsSet.push_back(qAction);
169  }
170  delete newVFunction;
171 
172  // *****************************************************************************************
173  // Next to evaluate main value function, we take maximise over all action
174  // value, ...
175  newVFunction = _maximiseQactions(qActionsSet);
176 
177  return newVFunction;
178  }
179 
180  /* **************************************************************************************************
181  * **/
182  /* ** **/
183  /* ** Optimal Policy Evaluation Methods **/
184  /* ** **/
185  /* **************************************************************************************************
186  * **/
187 
188  // ===========================================================================
189  // Evals the policy corresponding to the given value function
190  // ===========================================================================
192  // *****************************************************************************************
193  // Loop reset
194  MultiDimFunctionGraph< double >* newVFunction =
196  newVFunction->copyAndReassign(*_vFunction, _fmdp->mapMainPrime());
197 
198  std::vector<
200  argMaxQActionsSet;
201  // *****************************************************************************************
202  // For each action
203  for (auto actionIter = _fmdp->beginActions();
204  actionIter != _fmdp->endActions();
205  ++actionIter) {
207  this->_evalQaction(newVFunction, *actionIter);
208 
209  qAction = this->_addReward(qAction, *actionIter);
210 
211  qAction = this->_operator->maximize(
212  __actionsRMaxTable[*actionIter],
213  this->_operator->multiply(qAction, __actionsBoolTable[*actionIter], 1),
214  2);
215 
216  argMaxQActionsSet.push_back(_makeArgMax(qAction, *actionIter));
217  }
218  delete newVFunction;
219 
220  // *****************************************************************************************
221  // Next to evaluate main value function, we take maximise over all action
222  // value, ...
223  MultiDimFunctionGraph< ArgMaxSet< double, Idx >, SetTerminalNodePolicy >*
224  argMaxVFunction = _argmaximiseQactions(argMaxQActionsSet);
225 
226  // *****************************************************************************************
227  // Next to evaluate main value function, we take maximise over all action
228  // value, ...
229  _extractOptimalPolicy(argMaxVFunction);
230  }
231 
232  // ===========================================================================
233  //
234  // ===========================================================================
236  __rThreshold =
237  __fmdpLearner->modaMax() * 5 > 30 ? __fmdpLearner->modaMax() * 5 : 30;
238  __rmax = __fmdpLearner->rMax() / (1.0 - this->_discountFactor);
239 
240  for (auto actionIter = this->fmdp()->beginActions();
241  actionIter != this->fmdp()->endActions();
242  ++actionIter) {
243  std::vector< MultiDimFunctionGraph< double >* > rmaxs;
244  std::vector< MultiDimFunctionGraph< double >* > boolQs;
245 
246  for (auto varIter = this->fmdp()->beginVariables();
247  varIter != this->fmdp()->endVariables();
248  ++varIter) {
249  const IVisitableGraphLearner* visited = __counterTable[*actionIter];
250 
255 
256  visited->insertSetOfVars(varRMax);
257  visited->insertSetOfVars(varBoolQ);
258 
259  std::pair< NodeId, NodeId > rooty =
260  __visitLearner(visited, visited->root(), varRMax, varBoolQ);
261  varRMax->manager()->setRootNode(rooty.first);
262  varRMax->manager()->reduce();
263  varRMax->manager()->clean();
264  varBoolQ->manager()->setRootNode(rooty.second);
265  varBoolQ->manager()->reduce();
266  varBoolQ->manager()->clean();
267 
268  rmaxs.push_back(varRMax);
269  boolQs.push_back(varBoolQ);
270 
271  // std::cout << RECASTED(this->_fmdp->transition(*actionIter,
272  // *varIter))->toDot() << std::endl;
273  // for( auto varIter2 =
274  // RECASTED(this->_fmdp->transition(*actionIter,
275  // *varIter))->variablesSequence().beginSafe(); varIter2 !=
276  // RECASTED(this->_fmdp->transition(*actionIter,
277  // *varIter))->variablesSequence().endSafe(); ++varIter2 )
278  // std::cout << (*varIter2)->name() << " | ";
279  // std::cout << std::endl;
280 
281  // std::cout << varRMax->toDot() << std::endl;
282  // for( auto varIter =
283  // varRMax->variablesSequence().beginSafe(); varIter !=
284  // varRMax->variablesSequence().endSafe(); ++varIter )
285  // std::cout << (*varIter)->name() << " | ";
286  // std::cout << std::endl;
287 
288  // std::cout << varBoolQ->toDot() << std::endl;
289  // for( auto varIter =
290  // varBoolQ->variablesSequence().beginSafe(); varIter !=
291  // varBoolQ->variablesSequence().endSafe(); ++varIter )
292  // std::cout << (*varIter)->name() << " | ";
293  // std::cout << std::endl;
294  }
295 
296  // std::cout << "Maximising" << std::endl;
297  __actionsRMaxTable.insert(*actionIter, this->_maximiseQactions(rmaxs));
298  __actionsBoolTable.insert(*actionIter, this->_minimiseFunctions(boolQs));
299  }
300  }
301 
302  // ===========================================================================
303  //
304  // ===========================================================================
305  std::pair< NodeId, NodeId >
307  NodeId currentNodeId,
310  std::pair< NodeId, NodeId > rep;
311  if (visited->isTerminal(currentNodeId)) {
312  rep.first = rmax->manager()->addTerminalNode(
313  visited->nodeNbObservation(currentNodeId) < __rThreshold ? __rmax : 0.0);
314  rep.second = boolQ->manager()->addTerminalNode(
315  visited->nodeNbObservation(currentNodeId) < __rThreshold ? 0.0 : 1.0);
316  return rep;
317  }
318 
319  NodeId* rmaxsons = static_cast< NodeId* >(SOA_ALLOCATE(
320  sizeof(NodeId) * visited->nodeVar(currentNodeId)->domainSize()));
321  NodeId* bqsons = static_cast< NodeId* >(SOA_ALLOCATE(
322  sizeof(NodeId) * visited->nodeVar(currentNodeId)->domainSize()));
323 
324  for (Idx moda = 0; moda < visited->nodeVar(currentNodeId)->domainSize();
325  ++moda) {
326  std::pair< NodeId, NodeId > sonp = __visitLearner(
327  visited, visited->nodeSon(currentNodeId, moda), rmax, boolQ);
328  rmaxsons[moda] = sonp.first;
329  bqsons[moda] = sonp.second;
330  }
331 
332  rep.first =
333  rmax->manager()->addInternalNode(visited->nodeVar(currentNodeId), rmaxsons);
334  rep.second =
335  boolQ->manager()->addInternalNode(visited->nodeVar(currentNodeId), bqsons);
336  return rep;
337  }
338 
339  // ===========================================================================
340  //
341  // ===========================================================================
343  for (auto actionIter = this->fmdp()->beginActions();
344  actionIter != this->fmdp()->endActions();
345  ++actionIter) {
346  delete __actionsBoolTable[*actionIter];
347  delete __actionsRMaxTable[*actionIter];
348  }
349  __actionsRMaxTable.clear();
350  __actionsBoolTable.clear();
351  }
352 
353 } // end of namespace gum
void makePlanning(Idx nbStep=1000000)
Performs a value iteration.
HashTable< Idx, StatesCounter *> __counterTable
Useful macros for maths.
HashTable< Idx, MultiDimFunctionGraph< double > *> __actionsBoolTable
<agrum/FMDP/planning/structuredPlaner.h>
~AdaptiveRMaxPlaner()
Default destructor.
virtual void initialize(const FMDP< double > *fmdp)
Initializes the learner.
virtual MultiDimFunctionGraph< double > * _valueIteration()
Performs a single step of value iteration.
void clean()
Removes var without nodes in the diagram.
Headers of gum::SmallObjectAllocator.
void setRootNode(const NodeId &root)
Sets root node of decision diagram.
<agrum/FMDP/SDyna/IDecisionStrategy.h>
<agrum/FMDP/simulation/statesCounter.h>
Definition: statesCounter.h:48
SequenceIteratorSafe< Idx > beginActions() const
Returns an iterator reference to he beginning of the list of actions.
Definition: fmdp.h:134
virtual MultiDimFunctionGraph< ArgMaxSet< double, Idx >, SetTerminalNodePolicy > * _argmaximiseQactions(std::vector< MultiDimFunctionGraph< ArgMaxSet< double, Idx >, SetTerminalNodePolicy > *> &)
Performs argmax_a Q(s,a)
void copyAndReassign(const MultiDimFunctionGraph< GUM_SCALAR, TerminalNodePolicy > &src, const Bijection< const DiscreteVariable *, const DiscreteVariable * > &reassign)
Copies src diagrams structure into this diagrams.
double _discountFactor
Discount Factor used for infinite horizon planning.
IOperatorStrategy< double > * _operator
std::pair< NodeId, NodeId > __visitLearner(const IVisitableGraphLearner *, NodeId currentNodeId, MultiDimFunctionGraph< double > *, MultiDimFunctionGraph< double > *)
virtual MultiDimFunctionGraph< GUM_SCALAR > * maximize(const MultiDimFunctionGraph< GUM_SCALAR > *f1, const MultiDimFunctionGraph< GUM_SCALAR > *f2, Idx del=3)=0
SequenceIteratorSafe< const DiscreteVariable *> beginVariables() const
Returns an iterator reference to he beginning of the list of variables.
Definition: fmdp.h:92
INLINE const Bijection< const DiscreteVariable *, const DiscreteVariable *> & mapMainPrime() const
Returns the map binding main variables and prime variables.
Definition: fmdp.h:114
<agrum/FMDP/SDyna/IVisitableGraphLearner.h>
const ILearningStrategy * __fmdpLearner
AdaptiveRMaxPlaner(IOperatorStrategy< double > *opi, double discountFactor, double epsilon, const ILearningStrategy *learner, bool verbose)
Default constructor.
NodeId addInternalNode(const DiscreteVariable *var)
Inserts a new non terminal node in graph.
void _extractOptimalPolicy(const MultiDimFunctionGraph< ArgMaxSet< double, Idx >, SetTerminalNodePolicy > *optimalValueFunction)
From V(s)* = argmax_a Q*(s,a), this function extract pi*(s) This function mainly consists in extracti...
Safe Iterators for hashtables.
Definition: hashTable.h:2217
gum is the global namespace for all aGrUM entities
Definition: agrum.h:25
const FMDP< double > * _fmdp
The Factored Markov Decision Process describing our planning situation (NB : this one must have funct...
This files contains several function objects that are not (yet) defined in the STL.
virtual NodeId root() const =0
virtual MultiDimFunctionGraph< GUM_SCALAR > * add(const MultiDimFunctionGraph< GUM_SCALAR > *f1, const MultiDimFunctionGraph< GUM_SCALAR > *f2, Idx del=1)=0
HashTable< Idx, bool > __initializedTable
virtual Size domainSize() const =0
virtual MultiDimFunctionGraph< GUM_SCALAR > * multiply(const MultiDimFunctionGraph< GUM_SCALAR > *f1, const MultiDimFunctionGraph< GUM_SCALAR > *f2, Idx del=3)=0
Header of the Potential class.
Headers of the RMax planer class.
virtual const DiscreteVariable * nodeVar(NodeId ni) const =0
<agrum/FMDP/SDyna/ILearningStrategy.h>
HashTable< Idx, MultiDimFunctionGraph< double > *> __actionsRMaxTable
virtual bool isTerminal(NodeId ni) const =0
virtual MultiDimFunctionGraph< double > * _evalQaction(const MultiDimFunctionGraph< double > *, Idx)
Performs the P(s&#39;|s,a).V^{t-1}(s&#39;) part of the value itération.
virtual void insertSetOfVars(MultiDimFunctionGraph< double > *) const =0
const MultiDimImplementation< GUM_SCALAR > * reward(Idx actionId=0) const
Returns the reward table of mdp.
Definition: fmdp_tpl.h:321
Header files of gum::Instantiation.
MultiDimFunctionGraph< ArgMaxSet< double, Idx >, SetTerminalNodePolicy > * _makeArgMax(const MultiDimFunctionGraph< double > *Qaction, Idx actionId)
Creates a copy of given Qaction that can be exploit by a Argmax.
virtual double modaMax() const =0
learnerSize
Headers of MultiDimFunctionGraph.
virtual void initialize(const FMDP< GUM_SCALAR > *fmdp)
Initializes data structure needed for making the planning.
Implementation of a Terminal Node Policy that maps nodeid to a set of value.
virtual NodeId nodeSon(NodeId ni, Idx modality) const =0
<agrum/FMDP/planning/adaptiveRMaxPlaner.h>
void initialize(const FMDP< double > *fmdp)
Initializes data structure needed for making the planning.
virtual double rMax() const =0
learnerSize
INLINE const FMDP< double > * fmdp()
Returns a const ptr on the Factored Markov Decision Process on which we&#39;re planning.
virtual MultiDimFunctionGraph< double > * _addReward(MultiDimFunctionGraph< double > *function, Idx actionId=0)
Perform the R(s) + gamma . function.
virtual void _initVFunction()
Performs a single step of value iteration.
SequenceIteratorSafe< Idx > endActions() const
Returns an iterator reference to the end of the list of actions.
Definition: fmdp.h:141
NodeId addTerminalNode(const GUM_SCALAR &value)
Adds a value to the MultiDimFunctionGraph.
Size Idx
Type for indexes.
Definition: types.h:50
MultiDimFunctionGraphManager< GUM_SCALAR, TerminalNodePolicy > * manager()
Returns a const reference to the manager of this diagram.
SequenceIteratorSafe< const DiscreteVariable *> endVariables() const
Returns an iterator reference to the end of the list of variables.
Definition: fmdp.h:99
#define RECASTED(x)
For shorter line and hence more comprehensive code purposes only.
value_type & insert(const Key &key, const Val &val)
Adds a new element (actually a copy of this element) into the hash table.
virtual Idx nodeNbObservation(NodeId ni) const =0
virtual void reduce()=0
Ensures that every isomorphic subgraphs are merged together.
virtual MultiDimFunctionGraph< double > * _minimiseFunctions(std::vector< MultiDimFunctionGraph< double > *> &)
Performs min_i F_i.
Size NodeId
Type for node ids.
Definition: graphElements.h:97
MultiDimFunctionGraph< double > * _vFunction
The Value Function computed iteratively.
virtual void _evalPolicy()
Perform the required tasks to extract an optimal policy.
virtual MultiDimFunctionGraph< double > * _maximiseQactions(std::vector< MultiDimFunctionGraph< double > *> &)
Performs max_a Q(s,a)
virtual MultiDimFunctionGraph< GUM_SCALAR, ExactTerminalNodePolicy > * getFunctionInstance()=0
#define SOA_ALLOCATE(x)
virtual void makePlanning(Idx nbStep=1000000)
Performs a value iteration.