aGrUM  0.20.3
a C++ library for (probabilistic) graphical models
structuredPlaner.h
Go to the documentation of this file.
1 /**
2  *
3  * Copyright (c) 2005-2021 by Pierre-Henri WUILLEMIN(@LIP6) & Christophe GONZALES(@AMU)
4  * info_at_agrum_dot_org
5  *
6  * This library is free software: you can redistribute it and/or modify
7  * it under the terms of the GNU Lesser General Public License as published by
8  * the Free Software Foundation, either version 3 of the License, or
9  * (at your option) any later version.
10  *
11  * This library is distributed in the hope that it will be useful,
12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14  * GNU Lesser General Public License for more details.
15  *
16  * You should have received a copy of the GNU Lesser General Public License
17  * along with this library. If not, see <http://www.gnu.org/licenses/>.
18  *
19  */
20 
21 
22 /**
23  * @file
24  * @brief Headers of the StructuredPlaner planer class.
25  *
26  * @author Pierre-Henri WUILLEMIN(@LIP6) and Jean-Christophe MAGNAN and Christophe
27  * GONZALES(@AMU)
28  */
29 
30 // =========================================================================
31 #ifndef GUM_STRUCTURED_PLANNING_H
32 #define GUM_STRUCTURED_PLANNING_H
33 // =========================================================================
34 #include <thread>
35 // =========================================================================
36 #include <agrum/tools/core/argMaxSet.h>
37 #include <agrum/tools/core/functors.h>
38 #include <agrum/tools/core/inline.h>
39 #include <agrum/tools/core/smallobjectallocator/smallObjectAllocator.h>
40 // =========================================================================
41 #include <agrum/tools/multidim/implementations/multiDimFunctionGraph.h>
42 #include <agrum/tools/multidim/utils/FunctionGraphUtilities/terminalNodePolicies/SetTerminalNodePolicy.h>
43 // =========================================================================
44 #include <agrum/FMDP/SDyna/Strategies/IPlanningStrategy.h>
45 #include <agrum/FMDP/fmdp.h>
46 #include <agrum/FMDP/planning/IOperatorStrategy.h>
47 #include <agrum/FMDP/planning/actionSet.h>
48 #include <agrum/FMDP/planning/mddOperatorStrategy.h>
49 #include <agrum/FMDP/planning/treeOperatorStrategy.h>
50 // =========================================================================
51 
52 namespace gum {
53 
54  /**
55  * @class StructuredPlaner structuredPlaner.h
56  * <agrum/FMDP/planning/structuredPlaner.h>
57  * @brief A class to find optimal policy for a given FMDP.
58  * @ingroup fmdp_group
59  *
60  * Perform a structure value iteration planning
61  *
62  * Pure virtual functions :
63  * regress_, maximize_, argmaximize_, add_ and subtract_
64  * are a priori the ones to be respecified according to the used
65  * datastructure
66  * (MDDs, DTs, BNs, ...)
67  *
68  */
69  template < typename GUM_SCALAR >
71  // ###################################################################
72  /// @name
73  // ###################################################################
74  /// @{
75  public:
76  // ==========================================================================
77  ///
78  // ==========================================================================
79  static StructuredPlaner< GUM_SCALAR >* spumddInstance(GUM_SCALAR discountFactor = 0.9,
80  GUM_SCALAR epsilon = 0.00001,
81  bool verbose = true) {
82  return new StructuredPlaner< GUM_SCALAR >(new MDDOperatorStrategy< GUM_SCALAR >(),
83  discountFactor,
84  epsilon,
85  verbose);
86  }
87 
88  // ==========================================================================
89  ///
90  // ==========================================================================
91  static StructuredPlaner< GUM_SCALAR >* sviInstance(GUM_SCALAR discountFactor = 0.9,
92  GUM_SCALAR epsilon = 0.00001,
93  bool verbose = true) {
94  return new StructuredPlaner< GUM_SCALAR >(new TreeOperatorStrategy< GUM_SCALAR >(),
95  discountFactor,
96  epsilon,
97  verbose);
98  }
99 
100  /// @}
101 
102  // ###################################################################
103  /// @name Constructor & destructor.
104  // ###################################################################
105  /// @{
106  protected:
107  // ==========================================================================
108  /// Default constructor
109  // ==========================================================================
110  StructuredPlaner(IOperatorStrategy< GUM_SCALAR >* opi,
111  GUM_SCALAR discountFactor,
112  GUM_SCALAR epsilon,
113  bool verbose);
114 
115  // ==========================================================================
116  /// Default destructor
117  // ==========================================================================
118  public:
119  virtual ~StructuredPlaner();
120 
121  /// @}
122 
123  // ###################################################################
124  /// @name Datastructure access methods
125  // ###################################################################
126  /// @{
127 
128  public:
129  // ==========================================================================
130  /// Returns a const ptr on the Factored Markov Decision Process on which
131  /// we're planning
132  // ==========================================================================
133  INLINE const FMDP< GUM_SCALAR >* fmdp() { return fmdp_; }
134 
135  // ==========================================================================
136  /// Returns a const ptr on the value function computed so far
137  // ==========================================================================
139 
140  // ==========================================================================
141  /// Returns vFunction computed so far current size
142  // ==========================================================================
143  virtual Size vFunctionSize() { return vFunction_ != nullptr ? vFunction_->realSize() : 0; }
144 
145  // ==========================================================================
146  /// Returns the best policy obtained so far
147  // ==========================================================================
149  return optimalPolicy_;
150  }
151 
152  // ==========================================================================
153  /// Returns optimalPolicy computed so far current size
154  // ==========================================================================
156  return optimalPolicy_ != nullptr ? optimalPolicy_->realSize() : 0;
157  }
158 
159  // ==========================================================================
160  /// Provide a better toDot for the optimal policy where the leaves have the
161  /// action
162  /// name instead of its id
163  // ==========================================================================
165 
166  /// @}
167 
168 
169  // ###################################################################
170  /// @name Planning Methods
171  // ###################################################################
172  /// @{
173 
174  public:
175  // ==========================================================================
176  /**
177  * Initializes data structure needed for making the planning
178  * @warning No calling this methods before starting the first makePlaninng
179  * will surely and definitely result in a crash
180  */
181  // ==========================================================================
182  virtual void initialize(const FMDP< GUM_SCALAR >* fmdp);
183 
184 
185  // ==========================================================================
186  /**
187  * Performs a value iteration
188  *
189  * @param nbStep : enables you to specify how many value iterations you wish
190  * to do.
191  * makePlanning will then stop whether when optimal value function is reach
192  * or when nbStep have been performed
193  */
194  // ==========================================================================
195  virtual void makePlanning(Idx nbStep = 1000000);
196 
197  /// @}
198 
199 
200  // ###################################################################
201  /// @name Value Iteration Methods
202  // ###################################################################
203  /// @{
204 
205  protected:
206  // ==========================================================================
207  ///
208  // ==========================================================================
209  virtual void initVFunction_();
210 
211  // ==========================================================================
212  /// Performs a single step of value iteration
213  // ==========================================================================
215 
216  // ==========================================================================
217  /// Performs the P(s'|s,a).V^{t-1}(s') part of the value itération
218  // ==========================================================================
221 
222  // ==========================================================================
223  /// Performs max_a Q(s,a)
224  /// @warning Performs also the deallocation of the QActions
225  // ==========================================================================
228 
229  // ==========================================================================
230  /// Performs min_i F_i
231  /// @warning Performs also the deallocation of the F_i
232  // ==========================================================================
235 
236  // ==========================================================================
237  /// Perform the R(s) + gamma . function
238  /// @warning function is deleted, new one is returned
239  // ==========================================================================
242 
243  /// @}
244 
245 
246  // ###################################################################
247  /// @name Optimal policy extraction methods
248  // ###################################################################
249  /// @{
250 
251  protected:
252  // ==========================================================================
253  /// Perform the required tasks to extract an optimal policy
254  // ==========================================================================
255  virtual void evalPolicy_();
256 
257  // ==========================================================================
258  /**
259  * Creates a copy of given Qaction that can be exploit by a Argmax.
260  * Hence, this step consists in replacing each lea by an ArgMaxSet
261  * containing the value of the leaf and the actionId of the Qaction
262  *
263  * @param Qaction : the function graph we want to transform
264  * @param actionId : the action Id associated to that graph
265  *
266  * @warning delete the original Qaction, returns its conversion
267  */
268  // ==========================================================================
271 
272  private:
273  // ==========================================================================
274  /// Recursion part for the createArgMaxCopy
275  // ==========================================================================
277  NodeId,
278  Idx,
281  HashTable< NodeId, NodeId >&);
282 
283  protected:
284  // ==========================================================================
285  /// Performs argmax_a Q(s,a)
286  /// @warning Performs also the deallocation of the QActions
287  // ==========================================================================
290  SetTerminalNodePolicy >* >&);
291 
292  // ==========================================================================
293  /// From V(s)* = argmax_a Q*(s,a), this function extract pi*(s)
294  /// This function mainly consists in extracting from each ArgMaxSet
295  /// presents at the leaves the associated ActionSet
296  /// @warning deallocate the argmax optimal value function
297  // ==========================================================================
301 
302  private:
303  // ==========================================================================
304  /// Recursion part for the createArgMaxCopy
305  // ==========================================================================
307  NodeId,
309  HashTable< NodeId, NodeId >&);
310 
311  // ==========================================================================
312  /// Extract from an ArgMaxSet the associated ActionSet
313  // ==========================================================================
315 
316 
317  /// @}
318 
319  protected:
320  // ==========================================================================
321  /// The Factored Markov Decision Process describing our planning situation
322  /// (NB : this one must have function graph as transitions and reward
323  /// functions )
324  // ==========================================================================
325  const FMDP< GUM_SCALAR >* fmdp_;
326 
327  // ==========================================================================
328  /// The Value Function computed iteratively
329  // ==========================================================================
331 
332  // ==========================================================================
333  /// The associated optimal policy
334  /// @warning Leaves are ActionSet which contains the ids of the best actions
335  /// While this is sufficient to be exploited, to be understood by a human
336  /// somme translation from the fmdp_ is required. optimalPolicy2String do
337  /// this
338  /// job.
339  // ==========================================================================
341 
342  // ==========================================================================
343  /// A Set to eleminate primed variables
344  // ==========================================================================
346 
347  // ==========================================================================
348  /// Discount Factor used for infinite horizon planning
349  // ==========================================================================
350  GUM_SCALAR discountFactor_;
351 
353 
354  // ==========================================================================
355  /// Boolean used to indcates whether or not iteration informations should be
356  /// displayed on terminal
357  // ==========================================================================
358  bool verbose_;
359 
360 
361  private:
362  // ==========================================================================
363  /// The threshold value
364  /// Whenever | V^{n} - V^{n+1} | < threshold, we consider that V ~ V*
365  // ==========================================================================
366  GUM_SCALAR _threshold_;
368  };
369 
370 } /* namespace gum */
371 
372 
373 #include <agrum/FMDP/planning/structuredPlaner_tpl.h>
374 
375 #endif // GUM_STRUCTURED_PLANNING_H
static StructuredPlaner< GUM_SCALAR > * sviInstance(GUM_SCALAR discountFactor=0.9, GUM_SCALAR epsilon=0.00001, bool verbose=true)
virtual ~StructuredPlaner()
Default destructor.
bool verbose_
Boolean used to indcates whether or not iteration informations should be displayed on terminal...
NodeId _recurExtractOptPol_(NodeId, const MultiDimFunctionGraph< ArgMaxSet< GUM_SCALAR, Idx >, SetTerminalNodePolicy > *, HashTable< NodeId, NodeId > &)
Recursion part for the createArgMaxCopy.
INLINE void emplace(Args &&... args)
Definition: set_tpl.h:643
NodeId _recurArgMaxCopy_(NodeId, Idx, const MultiDimFunctionGraph< GUM_SCALAR > *, MultiDimFunctionGraph< ArgMaxSet< GUM_SCALAR, Idx >, SetTerminalNodePolicy > *, HashTable< NodeId, NodeId > &)
Recursion part for the createArgMaxCopy.
const FMDP< GUM_SCALAR > * fmdp_
The Factored Markov Decision Process describing our planning situation (NB : this one must have funct...
virtual MultiDimFunctionGraph< GUM_SCALAR > * evalQaction_(const MultiDimFunctionGraph< GUM_SCALAR > *, Idx)
Performs the P(s&#39;|s,a).V^{t-1}(s&#39;) part of the value itération.
GUM_SCALAR _threshold_
The threshold value Whenever | V^{n} - V^{n+1} | < threshold, we consider that V ~ V*...
StructuredPlaner(IOperatorStrategy< GUM_SCALAR > *opi, GUM_SCALAR discountFactor, GUM_SCALAR epsilon, bool verbose)
Default constructor.
void _transferActionIds_(const ArgMaxSet< GUM_SCALAR, Idx > &, ActionSet &)
Extract from an ArgMaxSet the associated ActionSet.
virtual MultiDimFunctionGraph< GUM_SCALAR > * addReward_(MultiDimFunctionGraph< GUM_SCALAR > *function, Idx actionId=0)
Perform the R(s) + gamma . function.
virtual MultiDimFunctionGraph< GUM_SCALAR > * minimiseFunctions_(std::vector< MultiDimFunctionGraph< GUM_SCALAR > * > &)
Performs min_i F_i.
virtual void initialize(const FMDP< GUM_SCALAR > *fmdp)
Initializes data structure needed for making the planning.
virtual MultiDimFunctionGraph< GUM_SCALAR > * valueIteration_()
Performs a single step of value iteration.
void extractOptimalPolicy_(const MultiDimFunctionGraph< ArgMaxSet< GUM_SCALAR, Idx >, SetTerminalNodePolicy > *optimalValueFunction)
From V(s)* = argmax_a Q*(s,a), this function extract pi*(s) This function mainly consists in extracti...
virtual void evalPolicy_()
Perform the required tasks to extract an optimal policy.
virtual MultiDimFunctionGraph< ArgMaxSet< GUM_SCALAR, Idx >, SetTerminalNodePolicy > * argmaximiseQactions_(std::vector< MultiDimFunctionGraph< ArgMaxSet< GUM_SCALAR, Idx >, SetTerminalNodePolicy > * > &)
Performs argmax_a Q(s,a)
MultiDimFunctionGraph< ActionSet, SetTerminalNodePolicy > * optimalPolicy_
The associated optimal policy.
GUM_SCALAR discountFactor_
Discount Factor used for infinite horizon planning.
Set< const DiscreteVariable *> elVarSeq_
A Set to eleminate primed variables.
virtual void initVFunction_()
Performs a single step of value iteration.
MultiDimFunctionGraph< GUM_SCALAR > * vFunction_
The Value Function computed iteratively.
virtual MultiDimFunctionGraph< GUM_SCALAR > * maximiseQactions_(std::vector< MultiDimFunctionGraph< GUM_SCALAR > * > &)
Performs max_a Q(s,a)
static StructuredPlaner< GUM_SCALAR > * spumddInstance(GUM_SCALAR discountFactor=0.9, GUM_SCALAR epsilon=0.00001, bool verbose=true)
IOperatorStrategy< GUM_SCALAR > * operator_
virtual void makePlanning(Idx nbStep=1000000)
Performs a value iteration.