aGrUM  0.20.2
a C++ library for (probabilistic) graphical models
structuredPlaner.h
Go to the documentation of this file.
1 /**
2  *
3  * Copyright 2005-2020 Pierre-Henri WUILLEMIN(@LIP6) & Christophe GONZALES(@AMU)
4  * info_at_agrum_dot_org
5  *
6  * This library is free software: you can redistribute it and/or modify
7  * it under the terms of the GNU Lesser General Public License as published by
8  * the Free Software Foundation, either version 3 of the License, or
9  * (at your option) any later version.
10  *
11  * This library is distributed in the hope that it will be useful,
12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14  * GNU Lesser General Public License for more details.
15  *
16  * You should have received a copy of the GNU Lesser General Public License
17  * along with this library. If not, see <http://www.gnu.org/licenses/>.
18  *
19  */
20 
21 
22 /**
23  * @file
24  * @brief Headers of the StructuredPlaner planer class.
25  *
26  * @author Pierre-Henri WUILLEMIN(@LIP6) and Jean-Christophe MAGNAN and Christophe
27  * GONZALES(@AMU)
28  */
29 
30 // =========================================================================
31 #ifndef GUM_STRUCTURED_PLANNING_H
32 #define GUM_STRUCTURED_PLANNING_H
33 // =========================================================================
34 #include <thread>
35 // =========================================================================
36 #include <agrum/tools/core/argMaxSet.h>
37 #include <agrum/tools/core/functors.h>
38 #include <agrum/tools/core/inline.h>
39 #include <agrum/tools/core/smallobjectallocator/smallObjectAllocator.h>
40 // =========================================================================
41 #include <agrum/tools/multidim/implementations/multiDimFunctionGraph.h>
42 #include <agrum/tools/multidim/utils/FunctionGraphUtilities/terminalNodePolicies/SetTerminalNodePolicy.h>
43 // =========================================================================
44 #include <agrum/FMDP/SDyna/Strategies/IPlanningStrategy.h>
45 #include <agrum/FMDP/fmdp.h>
46 #include <agrum/FMDP/planning/IOperatorStrategy.h>
47 #include <agrum/FMDP/planning/actionSet.h>
48 #include <agrum/FMDP/planning/mddOperatorStrategy.h>
49 #include <agrum/FMDP/planning/treeOperatorStrategy.h>
50 // =========================================================================
51 
52 namespace gum {
53 
54  /**
55  * @class StructuredPlaner structuredPlaner.h
56  * <agrum/FMDP/planning/structuredPlaner.h>
57  * @brief A class to find optimal policy for a given FMDP.
58  * @ingroup fmdp_group
59  *
60  * Perform a structure value iteration planning
61  *
62  * Pure virtual functions :
63  * regress_, maximize_, argmaximize_, add_ and subtract_
64  * are a priori the ones to be respecified according to the used
65  * datastructure
66  * (MDDs, DTs, BNs, ...)
67  *
68  */
69  template < typename GUM_SCALAR >
71  // ###################################################################
72  /// @name
73  // ###################################################################
74  /// @{
75  public:
76  // ==========================================================================
77  ///
78  // ==========================================================================
79  static StructuredPlaner< GUM_SCALAR >*
80  spumddInstance(GUM_SCALAR discountFactor = 0.9,
81  GUM_SCALAR epsilon = 0.00001,
82  bool verbose = true) {
83  return new StructuredPlaner< GUM_SCALAR >(
84  new MDDOperatorStrategy< GUM_SCALAR >(),
85  discountFactor,
86  epsilon,
87  verbose);
88  }
89 
90  // ==========================================================================
91  ///
92  // ==========================================================================
93  static StructuredPlaner< GUM_SCALAR >*
94  sviInstance(GUM_SCALAR discountFactor = 0.9,
95  GUM_SCALAR epsilon = 0.00001,
96  bool verbose = true) {
97  return new StructuredPlaner< GUM_SCALAR >(
98  new TreeOperatorStrategy< GUM_SCALAR >(),
99  discountFactor,
100  epsilon,
101  verbose);
102  }
103 
104  /// @}
105 
106  // ###################################################################
107  /// @name Constructor & destructor.
108  // ###################################################################
109  /// @{
110  protected:
111  // ==========================================================================
112  /// Default constructor
113  // ==========================================================================
114  StructuredPlaner(IOperatorStrategy< GUM_SCALAR >* opi,
115  GUM_SCALAR discountFactor,
116  GUM_SCALAR epsilon,
117  bool verbose);
118 
119  // ==========================================================================
120  /// Default destructor
121  // ==========================================================================
122  public:
123  virtual ~StructuredPlaner();
124 
125  /// @}
126 
127  // ###################################################################
128  /// @name Datastructure access methods
129  // ###################################################################
130  /// @{
131 
132  public:
133  // ==========================================================================
134  /// Returns a const ptr on the Factored Markov Decision Process on which
135  /// we're planning
136  // ==========================================================================
137  INLINE const FMDP< GUM_SCALAR >* fmdp() { return fmdp_; }
138 
139  // ==========================================================================
140  /// Returns a const ptr on the value function computed so far
141  // ==========================================================================
143  return vFunction_;
144  }
145 
146  // ==========================================================================
147  /// Returns vFunction computed so far current size
148  // ==========================================================================
149  virtual Size vFunctionSize() {
150  return vFunction_ != nullptr ? vFunction_->realSize() : 0;
151  }
152 
153  // ==========================================================================
154  /// Returns the best policy obtained so far
155  // ==========================================================================
158  return optimalPolicy_;
159  }
160 
161  // ==========================================================================
162  /// Returns optimalPolicy computed so far current size
163  // ==========================================================================
165  return optimalPolicy_ != nullptr ? optimalPolicy_->realSize() : 0;
166  }
167 
168  // ==========================================================================
169  /// Provide a better toDot for the optimal policy where the leaves have the
170  /// action
171  /// name instead of its id
172  // ==========================================================================
174 
175  /// @}
176 
177 
178  // ###################################################################
179  /// @name Planning Methods
180  // ###################################################################
181  /// @{
182 
183  public:
184  // ==========================================================================
185  /**
186  * Initializes data structure needed for making the planning
187  * @warning No calling this methods before starting the first makePlaninng
188  * will surely and definitely result in a crash
189  */
190  // ==========================================================================
191  virtual void initialize(const FMDP< GUM_SCALAR >* fmdp);
192 
193 
194  // ==========================================================================
195  /**
196  * Performs a value iteration
197  *
198  * @param nbStep : enables you to specify how many value iterations you wish
199  * to do.
200  * makePlanning will then stop whether when optimal value function is reach
201  * or when nbStep have been performed
202  */
203  // ==========================================================================
204  virtual void makePlanning(Idx nbStep = 1000000);
205 
206  /// @}
207 
208 
209  // ###################################################################
210  /// @name Value Iteration Methods
211  // ###################################################################
212  /// @{
213 
214  protected:
215  // ==========================================================================
216  ///
217  // ==========================================================================
218  virtual void initVFunction_();
219 
220  // ==========================================================================
221  /// Performs a single step of value iteration
222  // ==========================================================================
224 
225  // ==========================================================================
226  /// Performs the P(s'|s,a).V^{t-1}(s') part of the value itération
227  // ==========================================================================
230 
231  // ==========================================================================
232  /// Performs max_a Q(s,a)
233  /// @warning Performs also the deallocation of the QActions
234  // ==========================================================================
237 
238  // ==========================================================================
239  /// Performs min_i F_i
240  /// @warning Performs also the deallocation of the F_i
241  // ==========================================================================
244 
245  // ==========================================================================
246  /// Perform the R(s) + gamma . function
247  /// @warning function is deleted, new one is returned
248  // ==========================================================================
251 
252  /// @}
253 
254 
255  // ###################################################################
256  /// @name Optimal policy extraction methods
257  // ###################################################################
258  /// @{
259 
260  protected:
261  // ==========================================================================
262  /// Perform the required tasks to extract an optimal policy
263  // ==========================================================================
264  virtual void evalPolicy_();
265 
266  // ==========================================================================
267  /**
268  * Creates a copy of given Qaction that can be exploit by a Argmax.
269  * Hence, this step consists in replacing each lea by an ArgMaxSet
270  * containing the value of the leaf and the actionId of the Qaction
271  *
272  * @param Qaction : the function graph we want to transform
273  * @param actionId : the action Id associated to that graph
274  *
275  * @warning delete the original Qaction, returns its conversion
276  */
277  // ==========================================================================
280  Idx actionId);
281 
282  private:
283  // ==========================================================================
284  /// Recursion part for the createArgMaxCopy
285  // ==========================================================================
287  Idx,
291  HashTable< NodeId, NodeId >&);
292 
293  protected:
294  // ==========================================================================
295  /// Performs argmax_a Q(s,a)
296  /// @warning Performs also the deallocation of the QActions
297  // ==========================================================================
302  SetTerminalNodePolicy >* >&);
303 
304  // ==========================================================================
305  /// From V(s)* = argmax_a Q*(s,a), this function extract pi*(s)
306  /// This function mainly consists in extracting from each ArgMaxSet
307  /// presents at the leaves the associated ActionSet
308  /// @warning deallocate the argmax optimal value function
309  // ==========================================================================
313 
314  private:
315  // ==========================================================================
316  /// Recursion part for the createArgMaxCopy
317  // ==========================================================================
319  NodeId,
322  HashTable< NodeId, NodeId >&);
323 
324  // ==========================================================================
325  /// Extract from an ArgMaxSet the associated ActionSet
326  // ==========================================================================
328 
329 
330  /// @}
331 
332  protected:
333  // ==========================================================================
334  /// The Factored Markov Decision Process describing our planning situation
335  /// (NB : this one must have function graph as transitions and reward
336  /// functions )
337  // ==========================================================================
338  const FMDP< GUM_SCALAR >* fmdp_;
339 
340  // ==========================================================================
341  /// The Value Function computed iteratively
342  // ==========================================================================
344 
345  // ==========================================================================
346  /// The associated optimal policy
347  /// @warning Leaves are ActionSet which contains the ids of the best actions
348  /// While this is sufficient to be exploited, to be understood by a human
349  /// somme translation from the fmdp_ is required. optimalPolicy2String do
350  /// this
351  /// job.
352  // ==========================================================================
354 
355  // ==========================================================================
356  /// A Set to eleminate primed variables
357  // ==========================================================================
359 
360  // ==========================================================================
361  /// Discount Factor used for infinite horizon planning
362  // ==========================================================================
363  GUM_SCALAR discountFactor_;
364 
366 
367  // ==========================================================================
368  /// Boolean used to indcates whether or not iteration informations should be
369  /// displayed on terminal
370  // ==========================================================================
371  bool verbose_;
372 
373 
374  private:
375  // ==========================================================================
376  /// The threshold value
377  /// Whenever | V^{n} - V^{n+1} | < threshold, we consider that V ~ V*
378  // ==========================================================================
379  GUM_SCALAR threshold__;
381  };
382 
383 } /* namespace gum */
384 
385 
386 #include <agrum/FMDP/planning/structuredPlaner_tpl.h>
387 
388 #endif // GUM_STRUCTURED_PLANNING_H
static StructuredPlaner< GUM_SCALAR > * sviInstance(GUM_SCALAR discountFactor=0.9, GUM_SCALAR epsilon=0.00001, bool verbose=true)
virtual ~StructuredPlaner()
Default destructor.
bool verbose_
Boolean used to indcates whether or not iteration informations should be displayed on terminal...
void transferActionIds__(const ArgMaxSet< GUM_SCALAR, Idx > &, ActionSet &)
Extract from an ArgMaxSet the associated ActionSet.
INLINE void emplace(Args &&... args)
Definition: set_tpl.h:669
const FMDP< GUM_SCALAR > * fmdp_
The Factored Markov Decision Process describing our planning situation (NB : this one must have funct...
virtual MultiDimFunctionGraph< GUM_SCALAR > * evalQaction_(const MultiDimFunctionGraph< GUM_SCALAR > *, Idx)
Performs the P(s&#39;|s,a).V^{t-1}(s&#39;) part of the value itération.
NodeId recurExtractOptPol__(NodeId, const MultiDimFunctionGraph< ArgMaxSet< GUM_SCALAR, Idx >, SetTerminalNodePolicy > *, HashTable< NodeId, NodeId > &)
Recursion part for the createArgMaxCopy.
GUM_SCALAR threshold__
The threshold value Whenever | V^{n} - V^{n+1} | < threshold, we consider that V ~ V*...
StructuredPlaner(IOperatorStrategy< GUM_SCALAR > *opi, GUM_SCALAR discountFactor, GUM_SCALAR epsilon, bool verbose)
Default constructor.
virtual MultiDimFunctionGraph< GUM_SCALAR > * addReward_(MultiDimFunctionGraph< GUM_SCALAR > *function, Idx actionId=0)
Perform the R(s) + gamma . function.
virtual MultiDimFunctionGraph< GUM_SCALAR > * minimiseFunctions_(std::vector< MultiDimFunctionGraph< GUM_SCALAR > * > &)
Performs min_i F_i.
virtual void initialize(const FMDP< GUM_SCALAR > *fmdp)
Initializes data structure needed for making the planning.
virtual MultiDimFunctionGraph< GUM_SCALAR > * valueIteration_()
Performs a single step of value iteration.
NodeId recurArgMaxCopy__(NodeId, Idx, const MultiDimFunctionGraph< GUM_SCALAR > *, MultiDimFunctionGraph< ArgMaxSet< GUM_SCALAR, Idx >, SetTerminalNodePolicy > *, HashTable< NodeId, NodeId > &)
Recursion part for the createArgMaxCopy.
void extractOptimalPolicy_(const MultiDimFunctionGraph< ArgMaxSet< GUM_SCALAR, Idx >, SetTerminalNodePolicy > *optimalValueFunction)
From V(s)* = argmax_a Q*(s,a), this function extract pi*(s) This function mainly consists in extracti...
virtual void evalPolicy_()
Perform the required tasks to extract an optimal policy.
virtual MultiDimFunctionGraph< ArgMaxSet< GUM_SCALAR, Idx >, SetTerminalNodePolicy > * argmaximiseQactions_(std::vector< MultiDimFunctionGraph< ArgMaxSet< GUM_SCALAR, Idx >, SetTerminalNodePolicy > * > &)
Performs argmax_a Q(s,a)
MultiDimFunctionGraph< ActionSet, SetTerminalNodePolicy > * optimalPolicy_
The associated optimal policy.
GUM_SCALAR discountFactor_
Discount Factor used for infinite horizon planning.
Set< const DiscreteVariable *> elVarSeq_
A Set to eleminate primed variables.
virtual void initVFunction_()
Performs a single step of value iteration.
MultiDimFunctionGraph< GUM_SCALAR > * vFunction_
The Value Function computed iteratively.
virtual MultiDimFunctionGraph< GUM_SCALAR > * maximiseQactions_(std::vector< MultiDimFunctionGraph< GUM_SCALAR > * > &)
Performs max_a Q(s,a)
static StructuredPlaner< GUM_SCALAR > * spumddInstance(GUM_SCALAR discountFactor=0.9, GUM_SCALAR epsilon=0.00001, bool verbose=true)
IOperatorStrategy< GUM_SCALAR > * operator_
virtual void makePlanning(Idx nbStep=1000000)
Performs a value iteration.