aGrUM  0.20.3
a C++ library for (probabilistic) graphical models
sdyna.h
Go to the documentation of this file.
1 /**
2  *
3  * Copyright (c) 2005-2021 by Pierre-Henri WUILLEMIN(@LIP6) & Christophe GONZALES(@AMU)
4  * info_at_agrum_dot_org
5  *
6  * This library is free software: you can redistribute it and/or modify
7  * it under the terms of the GNU Lesser General Public License as published by
8  * the Free Software Foundation, either version 3 of the License, or
9  * (at your option) any later version.
10  *
11  * This library is distributed in the hope that it will be useful,
12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14  * GNU Lesser General Public License for more details.
15  *
16  * You should have received a copy of the GNU Lesser General Public License
17  * along with this library. If not, see <http://www.gnu.org/licenses/>.
18  *
19  */
20 
21 
22 /**
23  * @file
24  * @brief Headers of the SDyna abstract class.
25  *
26  * @author Pierre-Henri WUILLEMIN(@LIP6) and Jean-Christophe MAGNAN and Christophe
27  * GONZALES(@AMU)
28  */
29 
30 // =========================================================================
31 #ifndef GUM_SDYNA_H
32 #define GUM_SDYNA_H
33 // =========================================================================
34 #include <agrum/agrum.h>
35 #include <agrum/tools/multidim/instantiation.h>
36 // =========================================================================
37 #include <agrum/tools/variables/discreteVariable.h>
38 // =========================================================================
39 #include <agrum/FMDP/SDyna/Strategies/IDecisionStrategy.h>
40 #include <agrum/FMDP/SDyna/Strategies/ILearningStrategy.h>
41 #include <agrum/FMDP/SDyna/Strategies/IPlanningStrategy.h>
42 #include <agrum/FMDP/decision/E_GreedyDecider.h>
43 #include <agrum/FMDP/decision/lazyDecider.h>
44 #include <agrum/FMDP/decision/randomDecider.h>
45 #include <agrum/FMDP/decision/statisticalLazyDecider.h>
46 #include <agrum/FMDP/fmdp.h>
47 #include <agrum/FMDP/learning/fmdpLearner.h>
48 #include <agrum/FMDP/learning/observation.h>
49 #include <agrum/FMDP/planning/actionSet.h>
50 #include <agrum/FMDP/planning/adaptiveRMaxPlaner.h>
51 #include <agrum/FMDP/planning/structuredPlaner.h>
52 // =========================================================================
53 
54 namespace gum {
55 
56  /**
57  * @class SDYNA
58  * @headerfile sdyna.h <agrum/FMDP/SDyna/sdyna.h>
59  * @brief
60  * @ingroup fmdp_group
61  *
62  * The general SDyna architecture abstract class.
63  * Instance of SDyna architecture should inherit
64  *
65  */
66  class SDYNA {
67  // ###################################################################
68  /// @name
69  // ###################################################################
70  /// @
71  public:
72  // ==========================================================================
73  ///
74  // ==========================================================================
75  static SDYNA* spitiInstance(double attributeSelectionThreshold = 0.99,
76  double discountFactor = 0.9,
77  double epsilon = 1,
78  Idx observationPhaseLenght = 100,
79  Idx nbValueIterationStep = 10) {
80  bool actionReward = false;
81  ILearningStrategy* ls
82  = new FMDPLearner< CHI2TEST, CHI2TEST, ITILEARNER >(attributeSelectionThreshold,
83  actionReward);
84  IPlanningStrategy< double >* ps
85  = StructuredPlaner< double >::sviInstance(discountFactor, epsilon);
86  IDecisionStrategy* ds = new E_GreedyDecider();
87  return new SDYNA(ls, ps, ds, observationPhaseLenght, nbValueIterationStep, actionReward);
88  }
89 
90  // ==========================================================================
91  ///
92  // ==========================================================================
93  static SDYNA* spimddiInstance(double attributeSelectionThreshold = 0.99,
94  double similarityThreshold = 0.3,
95  double discountFactor = 0.9,
96  double epsilon = 1,
97  Idx observationPhaseLenght = 100,
98  Idx nbValueIterationStep = 10) {
99  bool actionReward = false;
100  ILearningStrategy* ls
101  = new FMDPLearner< GTEST, GTEST, IMDDILEARNER >(attributeSelectionThreshold,
102  actionReward,
103  similarityThreshold);
104  IPlanningStrategy< double >* ps
105  = StructuredPlaner< double >::spumddInstance(discountFactor, epsilon, false);
106  IDecisionStrategy* ds = new E_GreedyDecider();
107  return new SDYNA(ls,
108  ps,
109  ds,
110  observationPhaseLenght,
111  nbValueIterationStep,
112  actionReward,
113  false);
114  }
115 
116  // ==========================================================================
117  ///
118  // ==========================================================================
119  static SDYNA* RMaxMDDInstance(double attributeSelectionThreshold = 0.99,
120  double similarityThreshold = 0.3,
121  double discountFactor = 0.9,
122  double epsilon = 1,
123  Idx observationPhaseLenght = 100,
124  Idx nbValueIterationStep = 10) {
125  bool actionReward = true;
126  ILearningStrategy* ls
127  = new FMDPLearner< GTEST, GTEST, IMDDILEARNER >(attributeSelectionThreshold,
128  actionReward,
129  similarityThreshold);
130  AdaptiveRMaxPlaner* rm
131  = AdaptiveRMaxPlaner::ReducedAndOrderedInstance(ls, discountFactor, epsilon);
132  IPlanningStrategy< double >* ps = rm;
133  IDecisionStrategy* ds = rm;
134  return new SDYNA(ls, ps, ds, observationPhaseLenght, nbValueIterationStep, actionReward);
135  }
136 
137  // ==========================================================================
138  ///
139  // ==========================================================================
140  static SDYNA* RMaxTreeInstance(double attributeSelectionThreshold = 0.99,
141  double discountFactor = 0.9,
142  double epsilon = 1,
143  Idx observationPhaseLenght = 100,
144  Idx nbValueIterationStep = 10) {
145  bool actionReward = true;
146  ILearningStrategy* ls
147  = new FMDPLearner< GTEST, GTEST, ITILEARNER >(attributeSelectionThreshold, actionReward);
148  AdaptiveRMaxPlaner* rm = AdaptiveRMaxPlaner::TreeInstance(ls, discountFactor, epsilon);
149  IPlanningStrategy< double >* ps = rm;
150  IDecisionStrategy* ds = rm;
151  return new SDYNA(ls, ps, ds, observationPhaseLenght, nbValueIterationStep, actionReward);
152  }
153 
154  // ==========================================================================
155  ///
156  // ==========================================================================
157  static SDYNA* RandomMDDInstance(double attributeSelectionThreshold = 0.99,
158  double similarityThreshold = 0.3,
159  double discountFactor = 0.9,
160  double epsilon = 1,
161  Idx observationPhaseLenght = 100,
162  Idx nbValueIterationStep = 10) {
163  bool actionReward = true;
164  ILearningStrategy* ls
165  = new FMDPLearner< GTEST, GTEST, IMDDILEARNER >(attributeSelectionThreshold,
166  actionReward,
167  similarityThreshold);
168  IPlanningStrategy< double >* ps
169  = StructuredPlaner< double >::spumddInstance(discountFactor, epsilon);
170  IDecisionStrategy* ds = new RandomDecider();
171  return new SDYNA(ls, ps, ds, observationPhaseLenght, nbValueIterationStep, actionReward);
172  }
173 
174  // ==========================================================================
175  ///
176  // ==========================================================================
177  static SDYNA* RandomTreeInstance(double attributeSelectionThreshold = 0.99,
178  double discountFactor = 0.9,
179  double epsilon = 1,
180  Idx observationPhaseLenght = 100,
181  Idx nbValueIterationStep = 10) {
182  bool actionReward = true;
183  ILearningStrategy* ls
184  = new FMDPLearner< CHI2TEST, CHI2TEST, ITILEARNER >(attributeSelectionThreshold,
185  actionReward);
186  IPlanningStrategy< double >* ps
187  = StructuredPlaner< double >::sviInstance(discountFactor, epsilon);
188  IDecisionStrategy* ds = new RandomDecider();
189  return new SDYNA(ls, ps, ds, observationPhaseLenght, nbValueIterationStep, actionReward);
190  }
191 
192 
193  /// @}
194 
195  // ###################################################################
196  /// @name Constructor & destructor.
197  // ###################################################################
198  /// @{
199 
200  // ==========================================================================
201  /**
202  * Constructor
203  *
204  * @return an instance of SDyna architecture
205  */
206  // ==========================================================================
207  private:
208  SDYNA(ILearningStrategy* learner,
209  IPlanningStrategy< double >* planer,
210  IDecisionStrategy* decider,
211  Idx observationPhaseLenght,
212  Idx nbValueIterationStep,
213  bool actionReward,
214  bool verbose = true);
215 
216  // ==========================================================================
217  /// Destructor
218  // ==========================================================================
219  public:
220  ~SDYNA();
221 
222  /// @}
223 
224 
225  // ###################################################################
226  /// @name Problem specification methods
227  // ###################################################################
228  /// @{
229  public:
230  // ==========================================================================
231  /**
232  * Inserts a new action in the SDyna instance.
233  * @warning Without effect until method initialize is called
234  * @param actionId : an id to identify the action
235  * @param actionName : its human name
236  */
237  // ==========================================================================
238  void addAction(const Idx actionId, const std::string& actionName) {
239  fmdp_->addAction(actionId, actionName);
240  }
241 
242  // ==========================================================================
243  /**
244  * Inserts a new variable in the SDyna instance.
245  * @warning Without effect until method initialize is called
246  * @param var : the var to be added.
247  * Note that variable may or may not have all its modalities given.
248  * If not they will be discovered by the SDyna architecture during the
249  * process
250  */
251  // ==========================================================================
252  void addVariable(const DiscreteVariable* var) { fmdp_->addVariable(var); }
253 
254  /// @}
255 
256 
257  // ###################################################################
258  /// @name Initialization
259  // ###################################################################
260  /// @{
261  public:
262  // ==========================================================================
263  /**
264  * Initializes the Sdyna instance.
265  */
266  // ==========================================================================
267  void initialize();
268 
269  // ==========================================================================
270  /**
271  * Initializes the Sdyna instance at given state.
272  * @param initialState : the state of the studied system from which we will
273  * begin the explore, learn and exploit process
274  */
275  // ==========================================================================
276  void initialize(const Instantiation& initialState);
277 
278  /// @}
279 
280 
281  // ###################################################################
282  /// @name Incremental methods
283  // ###################################################################
284  /// @{
285  public:
286  // ==========================================================================
287  /**
288  * Sets last state visited to the given state.
289  * During the learning process, we will consider that were in this state
290  * before the transition.
291  * @param currentState : the state
292  */
293  // ==========================================================================
294  void setCurrentState(const Instantiation& currentState) { lastState_ = currentState; }
295 
296  // ==========================================================================
297  /**
298  * @return actionId the id of the action the SDyna instance wish to be
299  * performed
300  * @param curState the state in which we currently are
301  */
302  // ==========================================================================
304 
305  // ==========================================================================
306  /**
307  * @return the id of the action the SDyna instance wish to be performed
308  */
309  // ==========================================================================
310  Idx takeAction();
311 
312  // ==========================================================================
313  /**
314  * Performs a feedback on the last transition.
315  * In extenso, learn from the transition.
316  * @param originalState : the state we were in before the transition
317  * @param reachedState : the state we reached after
318  * @param performedAction : the action we performed
319  * @param obtainedReward : the reward we obtained
320  */
321  // ==========================================================================
322  void feedback(const Instantiation& originalState,
323  const Instantiation& reachedState,
324  Idx performedAction,
325  double obtainedReward);
326 
327  // ==========================================================================
328  /**
329  * Performs a feedback on the last transition.
330  * In extenso, learn from the transition.
331  * @param reachedState : the state reached after the transition
332  * @param obtainedReward : the reward obtained during the transition
333  * @warning Uses the _originalState_ and _performedAction_ stored in cache
334  * If you want to specify the original state and the performed action, see
335  * below
336  */
337  // ==========================================================================
338  void feedback(const Instantiation& reachedState, double obtainedReward);
339 
340  // ==========================================================================
341  /**
342  * Starts a new planning
343  * @param nbStep : the maximal number of value iteration performed in this
344  * planning
345  */
346  // ==========================================================================
347  void makePlanning(Idx nbStep);
348 
349  /// @}
350 
351 
352  public:
353  // ==========================================================================
354  /**
355  * Returns
356  * @return a string describing the learned FMDP, and the associated
357  * optimal policy.
358  * Both in DOT language.
359  */
360  // ==========================================================================
361  std::string toString();
362 
363  std::string optimalPolicy2String() { return _planer_->optimalPolicy2String(); }
364 
365 
366  // ###################################################################
367  /// @name Size methods
368  /// @brief just to get the size of the different data structure for
369  /// performance evaluation purposes only
370  // ###################################################################
371  /// @{
372  public:
373  // ==========================================================================
374  /**
375  * @brief learnerSize
376  * @return
377  */
378  // ==========================================================================
379  Size learnerSize() { return _learner_->size(); }
380 
381  // ==========================================================================
382  /**
383  * @brief modelSize
384  * @return
385  */
386  // ==========================================================================
387  Size modelSize() { return fmdp_->size(); }
388 
389  // ==========================================================================
390  /**
391  * @brief valueFunctionSize
392  * @return
393  */
394  // ==========================================================================
395  Size valueFunctionSize() { return _planer_->vFunctionSize(); }
396 
397  // ==========================================================================
398  /**
399  * @brief optimalPolicySize
400  * @return
401  */
402  // ==========================================================================
403  Size optimalPolicySize() { return _planer_->optimalPolicySize(); }
404 
405  /// @}
406 
407 
408  protected:
409  /// The learnt Markovian Decision Process
410  FMDP< double >* fmdp_;
411 
412  /// The state in which the system is before we perform a new action
414 
415  private:
416  /// The learner used to learn the FMDP
418 
419  /// The planer used to plan an optimal strategy
421 
422  /// The decider
424 
425 
426  /// The number of observation we make before using again the planer
428 
429  /// The total number of observation made so far
431 
432  /// The number of Value Iteration step we perform
434 
435  /// The last performed action
437 
438  /// Since SDYNA made these observation, it has to delete them on quitting
440 
442 
443  bool verbose_;
444  };
445 
446 
447 } /* namespace gum */
448 
449 
450 #endif // GUM_SDYNA_H
~SDYNA()
Destructor.
Definition: sdyna.cpp:78
std::string optimalPolicy2String()
Definition: sdyna.h:363
std::string toString()
Returns.
Definition: sdyna.cpp:230
Idx takeAction()
Definition: sdyna.cpp:216
INLINE void emplace(Args &&... args)
Definition: set_tpl.h:643
void setCurrentState(const Instantiation &currentState)
Sets last state visited to the given state.
Definition: sdyna.h:294
SDYNA(ILearningStrategy *learner, IPlanningStrategy< double > *planer, IDecisionStrategy *decider, Idx observationPhaseLenght, Idx nbValueIterationStep, bool actionReward, bool verbose=true)
Constructor.
Definition: sdyna.cpp:57
static SDYNA * RMaxTreeInstance(double attributeSelectionThreshold=0.99, double discountFactor=0.9, double epsilon=1, Idx observationPhaseLenght=100, Idx nbValueIterationStep=10)
@
Definition: sdyna.h:140
void feedback(const Instantiation &originalState, const Instantiation &reachedState, Idx performedAction, double obtainedReward)
Performs a feedback on the last transition.
Definition: sdyna.cpp:129
Idx _observationPhaseLenght_
The number of observation we make before using again the planer.
Definition: sdyna.h:427
void addVariable(const DiscreteVariable *var)
Inserts a new variable in the SDyna instance.
Definition: sdyna.h:252
static SDYNA * RandomTreeInstance(double attributeSelectionThreshold=0.99, double discountFactor=0.9, double epsilon=1, Idx observationPhaseLenght=100, Idx nbValueIterationStep=10)
@
Definition: sdyna.h:177
Instantiation lastState_
The state in which the system is before we perform a new action.
Definition: sdyna.h:413
FMDP< double > * fmdp_
The learnt Markovian Decision Process.
Definition: sdyna.h:410
The general SDyna architecture abstract class.
Definition: sdyna.h:66
IDecisionStrategy * _decider_
The decider.
Definition: sdyna.h:423
Idx _nbObservation_
The total number of observation made so far.
Definition: sdyna.h:430
Size learnerSize()
learnerSize
Definition: sdyna.h:379
Set< Observation *> _bin_
Since SDYNA made these observation, it has to delete them on quitting.
Definition: sdyna.h:439
Size valueFunctionSize()
valueFunctionSize
Definition: sdyna.h:395
void feedback(const Instantiation &reachedState, double obtainedReward)
Performs a feedback on the last transition.
Definition: sdyna.cpp:149
void addAction(const Idx actionId, const std::string &actionName)
Inserts a new action in the SDyna instance.
Definition: sdyna.h:238
IPlanningStrategy< double > * _planer_
The planer used to plan an optimal strategy.
Definition: sdyna.h:420
static SDYNA * RMaxMDDInstance(double attributeSelectionThreshold=0.99, double similarityThreshold=0.3, double discountFactor=0.9, double epsilon=1, Idx observationPhaseLenght=100, Idx nbValueIterationStep=10)
@
Definition: sdyna.h:119
bool verbose_
Definition: sdyna.h:443
Idx _nbValueIterationStep_
The number of Value Iteration step we perform.
Definition: sdyna.h:433
static SDYNA * spitiInstance(double attributeSelectionThreshold=0.99, double discountFactor=0.9, double epsilon=1, Idx observationPhaseLenght=100, Idx nbValueIterationStep=10)
@
Definition: sdyna.h:75
void makePlanning(Idx nbStep)
Starts a new planning.
Definition: sdyna.cpp:188
Idx takeAction(const Instantiation &curState)
Definition: sdyna.cpp:206
ILearningStrategy * _learner_
The learner used to learn the FMDP.
Definition: sdyna.h:417
void initialize()
Initializes the Sdyna instance.
Definition: sdyna.cpp:97
static SDYNA * spimddiInstance(double attributeSelectionThreshold=0.99, double similarityThreshold=0.3, double discountFactor=0.9, double epsilon=1, Idx observationPhaseLenght=100, Idx nbValueIterationStep=10)
@
Definition: sdyna.h:93
void initialize(const Instantiation &initialState)
Initializes the Sdyna instance at given state.
Definition: sdyna.cpp:110
Size optimalPolicySize()
optimalPolicySize
Definition: sdyna.h:403
static SDYNA * RandomMDDInstance(double attributeSelectionThreshold=0.99, double similarityThreshold=0.3, double discountFactor=0.9, double epsilon=1, Idx observationPhaseLenght=100, Idx nbValueIterationStep=10)
@
Definition: sdyna.h:157
Idx _lastAction_
The last performed action.
Definition: sdyna.h:436
bool _actionReward_
Definition: sdyna.h:441
Size modelSize()
modelSize
Definition: sdyna.h:387