aGrUM  0.14.2
structuredPlaner.h
Go to the documentation of this file.
1 /***************************************************************************
2  * Copyright (C) 2005 by Christophe GONZALES and Pierre-Henri WUILLEMIN *
3  * {prenom.nom}_at_lip6.fr *
4  * *
5  * This program is free software; you can redistribute it and/or modify *
6  * it under the terms of the GNU General Public License as published by *
7  * the Free Software Foundation; either version 2 of the License, or *
8  * (at your option) any later version. *
9  * *
10  * This program is distributed in the hope that it will be useful, *
11  * but WITHOUT ANY WARRANTY; without even the implied warranty of *
12  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the *
13  * GNU General Public License for more details. *
14  * *
15  * You should have received a copy of the GNU General Public License *
16  * along with this program; if not, write to the *
17  * Free Software Foundation, Inc., *
18  * 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. *
19  ***************************************************************************/
27 // =========================================================================
28 #ifndef GUM_STRUCTURED_PLANNING_H
29 #define GUM_STRUCTURED_PLANNING_H
30 // =========================================================================
31 #include <thread>
32 // =========================================================================
33 #include <agrum/core/argMaxSet.h>
34 #include <agrum/core/functors.h>
35 #include <agrum/core/inline.h>
37 // =========================================================================
40 // =========================================================================
42 #include <agrum/FMDP/fmdp.h>
47 // =========================================================================
48 
49 namespace gum {
50 
66  template < typename GUM_SCALAR >
67  class StructuredPlaner : public IPlanningStrategy< GUM_SCALAR > {
68  // ###################################################################
70  // ###################################################################
72  public:
73  // ==========================================================================
75  // ==========================================================================
77  spumddInstance(GUM_SCALAR discountFactor = 0.9,
78  GUM_SCALAR epsilon = 0.00001,
79  bool verbose = true) {
82  discountFactor,
83  epsilon,
84  verbose);
85  }
86 
87  // ==========================================================================
89  // ==========================================================================
91  sviInstance(GUM_SCALAR discountFactor = 0.9,
92  GUM_SCALAR epsilon = 0.00001,
93  bool verbose = true) {
96  discountFactor,
97  epsilon,
98  verbose);
99  }
100 
102 
103  // ###################################################################
105  // ###################################################################
107  protected:
108  // ==========================================================================
110  // ==========================================================================
112  GUM_SCALAR discountFactor,
113  GUM_SCALAR epsilon,
114  bool verbose);
115 
116  // ==========================================================================
118  // ==========================================================================
119  public:
120  virtual ~StructuredPlaner();
121 
123 
124  // ###################################################################
126  // ###################################################################
128 
129  public:
130  // ==========================================================================
133  // ==========================================================================
134  INLINE const FMDP< GUM_SCALAR >* fmdp() { return _fmdp; }
135 
136  // ==========================================================================
138  // ==========================================================================
140  return _vFunction;
141  }
142 
143  // ==========================================================================
145  // ==========================================================================
146  virtual Size vFunctionSize() {
147  return _vFunction != nullptr ? _vFunction->realSize() : 0;
148  }
149 
150  // ==========================================================================
152  // ==========================================================================
155  return _optimalPolicy;
156  }
157 
158  // ==========================================================================
160  // ==========================================================================
162  return _optimalPolicy != nullptr ? _optimalPolicy->realSize() : 0;
163  }
164 
165  // ==========================================================================
169  // ==========================================================================
170  std::string optimalPolicy2String();
171 
173 
174 
175  // ###################################################################
177  // ###################################################################
179 
180  public:
181  // ==========================================================================
187  // ==========================================================================
188  virtual void initialize(const FMDP< GUM_SCALAR >* fmdp);
189 
190 
191  // ==========================================================================
200  // ==========================================================================
201  virtual void makePlanning(Idx nbStep = 1000000);
202 
204 
205 
206  // ###################################################################
208  // ###################################################################
210 
211  protected:
212  // ==========================================================================
214  // ==========================================================================
215  virtual void _initVFunction();
216 
217  // ==========================================================================
219  // ==========================================================================
221 
222  // ==========================================================================
224  // ==========================================================================
227 
228  // ==========================================================================
231  // ==========================================================================
234 
235  // ==========================================================================
238  // ==========================================================================
241 
242  // ==========================================================================
245  // ==========================================================================
247  _addReward(MultiDimFunctionGraph< GUM_SCALAR >* function, Idx actionId = 0);
248 
250 
251 
252  // ###################################################################
254  // ###################################################################
256 
257  protected:
258  // ==========================================================================
260  // ==========================================================================
261  virtual void _evalPolicy();
262 
263  // ==========================================================================
274  // ==========================================================================
277  Idx actionId);
278 
279  private:
280  // ==========================================================================
282  // ==========================================================================
284  Idx,
289 
290  protected:
291  // ==========================================================================
294  // ==========================================================================
299  SetTerminalNodePolicy >* >&);
300 
301  // ==========================================================================
306  // ==========================================================================
309  SetTerminalNodePolicy >* optimalValueFunction);
310 
311  private:
312  // ==========================================================================
314  // ==========================================================================
316  NodeId,
320 
321  // ==========================================================================
323  // ==========================================================================
325 
326 
328 
329  protected:
330  // ==========================================================================
334  // ==========================================================================
336 
337  // ==========================================================================
339  // ==========================================================================
341 
342  // ==========================================================================
349  // ==========================================================================
351 
352  // ==========================================================================
354  // ==========================================================================
356 
357  // ==========================================================================
359  // ==========================================================================
360  GUM_SCALAR _discountFactor;
361 
363 
364  // ==========================================================================
367  // ==========================================================================
368  bool _verbose;
369 
370 
371  private:
372  // ==========================================================================
375  // ==========================================================================
376  GUM_SCALAR __threshold;
378  };
379 
380 } /* namespace gum */
381 
382 
384 
385 #endif // GUM_STRUCTURED_PLANNING_H
static StructuredPlaner< GUM_SCALAR > * sviInstance(GUM_SCALAR discountFactor=0.9, GUM_SCALAR epsilon=0.00001, bool verbose=true)
Headers of the ITerminalNodePolicy.
<agrum/FMDP/planning/structuredPlaner.h>
virtual ~StructuredPlaner()
Default destructor.
Headers of gum::SmallObjectAllocator.
A class to store the optimal actions.
Definition: actionSet.h:85
virtual Size realSize() const
Returns the real number of parameters used for this table.
virtual MultiDimFunctionGraph< ArgMaxSet< GUM_SCALAR, Idx >, SetTerminalNodePolicy > * _argmaximiseQactions(std::vector< MultiDimFunctionGraph< ArgMaxSet< GUM_SCALAR, Idx >, SetTerminalNodePolicy > * > &)
Performs argmax_a Q(s,a)
Headers of the Operator Strategy interface.
aGrUM&#39;s inline/outline selection
NodeId __recurExtractOptPol(NodeId, const MultiDimFunctionGraph< ArgMaxSet< GUM_SCALAR, Idx >, SetTerminalNodePolicy > *, HashTable< NodeId, NodeId > &)
Recursion part for the createArgMaxCopy.
GUM_SCALAR _discountFactor
Discount Factor used for infinite horizon planning.
virtual void _evalPolicy()
Perform the required tasks to extract an optimal policy.
IOperatorStrategy< GUM_SCALAR > * _operator
<agrum/FMDP/SDyna/IOperatorStrategy.h>
bool _verbose
Boolean used to indcates whether or not iteration informations should be displayed on terminal...
virtual MultiDimFunctionGraph< GUM_SCALAR > * _valueIteration()
Performs a single step of value iteration.
void _extractOptimalPolicy(const MultiDimFunctionGraph< ArgMaxSet< GUM_SCALAR, Idx >, SetTerminalNodePolicy > *optimalValueFunction)
From V(s)* = argmax_a Q*(s,a), this function extract pi*(s) This function mainly consists in extracti...
This class is used to implement factored decision process.
Definition: fmdp.h:54
<agrum/FMDP/planning/mddOperatorStrategy.h>
void __transferActionIds(const ArgMaxSet< GUM_SCALAR, Idx > &, ActionSet &)
Extract from an ArgMaxSet the associated ActionSet.
gum is the global namespace for all aGrUM entities
Definition: agrum.h:25
const FMDP< GUM_SCALAR > * _fmdp
The Factored Markov Decision Process describing our planning situation (NB : this one must have funct...
This files contains several function objects that are not (yet) defined in the STL.
Class to handle efficiently argMaxSet.
Definition: argMaxSet.h:55
Headers of the MDDOperatorStrategy planer class.
Representation of a setA Set is a structure that contains arbitrary elements.
Definition: set.h:162
Class for implementation of factored markov decision process.
StructuredPlaner(IOperatorStrategy< GUM_SCALAR > *opi, GUM_SCALAR discountFactor, GUM_SCALAR epsilon, bool verbose)
Default constructor.
<agrum/FMDP/planning/treeOperatorStrategy.h>
virtual void _initVFunction()
Performs a single step of value iteration.
virtual MultiDimFunctionGraph< GUM_SCALAR > * _evalQaction(const MultiDimFunctionGraph< GUM_SCALAR > *, Idx)
Performs the P(s&#39;|s,a).V^{t-1}(s&#39;) part of the value itération.
GUM_SCALAR __threshold
The threshold value Whenever | V^{n} - V^{n+1} | < threshold, we consider that V ~ V*...
Set< const DiscreteVariable *> _elVarSeq
A Set to eleminate primed variables.
MultiDimFunctionGraph< ActionSet, SetTerminalNodePolicy > * _optimalPolicy
The associated optimal policy.
virtual Size vFunctionSize()
Returns vFunction computed so far current size.
std::string optimalPolicy2String()
Provide a better toDot for the optimal policy where the leaves have the action name instead of its id...
MultiDimFunctionGraph< ArgMaxSet< GUM_SCALAR, Idx >, SetTerminalNodePolicy > * _makeArgMax(const MultiDimFunctionGraph< GUM_SCALAR > *Qaction, Idx actionId)
Creates a copy of given Qaction that can be exploit by a Argmax.
Headers of MultiDimFunctionGraph.
virtual void initialize(const FMDP< GUM_SCALAR > *fmdp)
Initializes data structure needed for making the planning.
Implementation of a Terminal Node Policy that maps nodeid to a set of value.
Headers of the Planning Strategy interface.
virtual Size optimalPolicySize()
Returns optimalPolicy computed so far current size.
NodeId __recurArgMaxCopy(NodeId, Idx, const MultiDimFunctionGraph< GUM_SCALAR > *, MultiDimFunctionGraph< ArgMaxSet< GUM_SCALAR, Idx >, SetTerminalNodePolicy > *, HashTable< NodeId, NodeId > &)
Recursion part for the createArgMaxCopy.
Template implementation of FMDP/planning/StructuredPlaner.h classes.
INLINE const FMDP< GUM_SCALAR > * fmdp()
Returns a const ptr on the Factored Markov Decision Process on which we&#39;re planning.
virtual MultiDimFunctionGraph< GUM_SCALAR > * _addReward(MultiDimFunctionGraph< GUM_SCALAR > *function, Idx actionId=0)
Perform the R(s) + gamma . function.
Headers of the TreeOperatorStrategy planer class.
Headers of the MDDOperatorStrategy planer class.
This files contains several function objects that are not (yet) defined in the STL.
Size Idx
Type for indexes.
Definition: types.h:50
<agrum/FMDP/SDyna/IPlanningStrategy.h>
std::size_t Size
In aGrUM, hashed values are unsigned long int.
Definition: types.h:45
virtual MultiDimFunctionGraph< GUM_SCALAR > * _minimiseFunctions(std::vector< MultiDimFunctionGraph< GUM_SCALAR > * > &)
Performs min_i F_i.
Size NodeId
Type for node ids.
Definition: graphElements.h:97
static StructuredPlaner< GUM_SCALAR > * spumddInstance(GUM_SCALAR discountFactor=0.9, GUM_SCALAR epsilon=0.00001, bool verbose=true)
MultiDimFunctionGraph< GUM_SCALAR > * _vFunction
The Value Function computed iteratively.
virtual MultiDimFunctionGraph< GUM_SCALAR > * _maximiseQactions(std::vector< MultiDimFunctionGraph< GUM_SCALAR > * > &)
Performs max_a Q(s,a)
INLINE const MultiDimFunctionGraph< GUM_SCALAR > * vFunction()
Returns a const ptr on the value function computed so far.
INLINE const MultiDimFunctionGraph< ActionSet, SetTerminalNodePolicy > * optimalPolicy()
Returns the best policy obtained so far.
virtual void makePlanning(Idx nbStep=1000000)
Performs a value iteration.