aGrUM  0.20.2
a C++ library for (probabilistic) graphical models
sdyna.h
Go to the documentation of this file.
1 /**
2  *
3  * Copyright 2005-2020 Pierre-Henri WUILLEMIN(@LIP6) & Christophe GONZALES(@AMU)
4  * info_at_agrum_dot_org
5  *
6  * This library is free software: you can redistribute it and/or modify
7  * it under the terms of the GNU Lesser General Public License as published by
8  * the Free Software Foundation, either version 3 of the License, or
9  * (at your option) any later version.
10  *
11  * This library is distributed in the hope that it will be useful,
12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14  * GNU Lesser General Public License for more details.
15  *
16  * You should have received a copy of the GNU Lesser General Public License
17  * along with this library. If not, see <http://www.gnu.org/licenses/>.
18  *
19  */
20 
21 
22 /**
23  * @file
24  * @brief Headers of the SDyna abstract class.
25  *
26  * @author Pierre-Henri WUILLEMIN(@LIP6) and Jean-Christophe MAGNAN and Christophe
27  * GONZALES(@AMU)
28  */
29 
30 // =========================================================================
31 #ifndef GUM_SDYNA_H
32 #define GUM_SDYNA_H
33 // =========================================================================
34 #include <agrum/agrum.h>
35 #include <agrum/tools/multidim/instantiation.h>
36 // =========================================================================
37 #include <agrum/tools/variables/discreteVariable.h>
38 // =========================================================================
39 #include <agrum/FMDP/SDyna/Strategies/IDecisionStrategy.h>
40 #include <agrum/FMDP/SDyna/Strategies/ILearningStrategy.h>
41 #include <agrum/FMDP/SDyna/Strategies/IPlanningStrategy.h>
42 #include <agrum/FMDP/decision/E_GreedyDecider.h>
43 #include <agrum/FMDP/decision/lazyDecider.h>
44 #include <agrum/FMDP/decision/randomDecider.h>
45 #include <agrum/FMDP/decision/statisticalLazyDecider.h>
46 #include <agrum/FMDP/fmdp.h>
47 #include <agrum/FMDP/learning/fmdpLearner.h>
48 #include <agrum/FMDP/learning/observation.h>
49 #include <agrum/FMDP/planning/actionSet.h>
50 #include <agrum/FMDP/planning/adaptiveRMaxPlaner.h>
51 #include <agrum/FMDP/planning/structuredPlaner.h>
52 // =========================================================================
53 
54 namespace gum {
55 
56  /**
57  * @class SDYNA
58  * @headerfile sdyna.h <agrum/FMDP/SDyna/sdyna.h>
59  * @brief
60  * @ingroup fmdp_group
61  *
62  * The general SDyna architecture abstract class.
63  * Instance of SDyna architecture should inherit
64  *
65  */
66  class SDYNA {
67  // ###################################################################
68  /// @name
69  // ###################################################################
70  /// @
71  public:
72  // ==========================================================================
73  ///
74  // ==========================================================================
75  static SDYNA* spitiInstance(double attributeSelectionThreshold = 0.99,
76  double discountFactor = 0.9,
77  double epsilon = 1,
78  Idx observationPhaseLenght = 100,
79  Idx nbValueIterationStep = 10) {
80  bool actionReward = false;
81  ILearningStrategy* ls = new FMDPLearner< CHI2TEST, CHI2TEST, ITILEARNER >(
82  attributeSelectionThreshold,
83  actionReward);
84  IPlanningStrategy< double >* ps
85  = StructuredPlaner< double >::sviInstance(discountFactor, epsilon);
86  IDecisionStrategy* ds = new E_GreedyDecider();
87  return new SDYNA(ls,
88  ps,
89  ds,
90  observationPhaseLenght,
91  nbValueIterationStep,
92  actionReward);
93  }
94 
95  // ==========================================================================
96  ///
97  // ==========================================================================
98  static SDYNA* spimddiInstance(double attributeSelectionThreshold = 0.99,
99  double similarityThreshold = 0.3,
100  double discountFactor = 0.9,
101  double epsilon = 1,
102  Idx observationPhaseLenght = 100,
103  Idx nbValueIterationStep = 10) {
104  bool actionReward = false;
105  ILearningStrategy* ls = new FMDPLearner< GTEST, GTEST, IMDDILEARNER >(
106  attributeSelectionThreshold,
107  actionReward,
108  similarityThreshold);
109  IPlanningStrategy< double >* ps
110  = StructuredPlaner< double >::spumddInstance(discountFactor,
111  epsilon,
112  false);
113  IDecisionStrategy* ds = new E_GreedyDecider();
114  return new SDYNA(ls,
115  ps,
116  ds,
117  observationPhaseLenght,
118  nbValueIterationStep,
119  actionReward,
120  false);
121  }
122 
123  // ==========================================================================
124  ///
125  // ==========================================================================
126  static SDYNA* RMaxMDDInstance(double attributeSelectionThreshold = 0.99,
127  double similarityThreshold = 0.3,
128  double discountFactor = 0.9,
129  double epsilon = 1,
130  Idx observationPhaseLenght = 100,
131  Idx nbValueIterationStep = 10) {
132  bool actionReward = true;
133  ILearningStrategy* ls = new FMDPLearner< GTEST, GTEST, IMDDILEARNER >(
134  attributeSelectionThreshold,
135  actionReward,
136  similarityThreshold);
137  AdaptiveRMaxPlaner* rm
138  = AdaptiveRMaxPlaner::ReducedAndOrderedInstance(ls,
139  discountFactor,
140  epsilon);
141  IPlanningStrategy< double >* ps = rm;
142  IDecisionStrategy* ds = rm;
143  return new SDYNA(ls,
144  ps,
145  ds,
146  observationPhaseLenght,
147  nbValueIterationStep,
148  actionReward);
149  }
150 
151  // ==========================================================================
152  ///
153  // ==========================================================================
154  static SDYNA* RMaxTreeInstance(double attributeSelectionThreshold = 0.99,
155  double discountFactor = 0.9,
156  double epsilon = 1,
157  Idx observationPhaseLenght = 100,
158  Idx nbValueIterationStep = 10) {
159  bool actionReward = true;
160  ILearningStrategy* ls
161  = new FMDPLearner< GTEST, GTEST, ITILEARNER >(attributeSelectionThreshold,
162  actionReward);
163  AdaptiveRMaxPlaner* rm
164  = AdaptiveRMaxPlaner::TreeInstance(ls, discountFactor, epsilon);
165  IPlanningStrategy< double >* ps = rm;
166  IDecisionStrategy* ds = rm;
167  return new SDYNA(ls,
168  ps,
169  ds,
170  observationPhaseLenght,
171  nbValueIterationStep,
172  actionReward);
173  }
174 
175  // ==========================================================================
176  ///
177  // ==========================================================================
178  static SDYNA* RandomMDDInstance(double attributeSelectionThreshold = 0.99,
179  double similarityThreshold = 0.3,
180  double discountFactor = 0.9,
181  double epsilon = 1,
182  Idx observationPhaseLenght = 100,
183  Idx nbValueIterationStep = 10) {
184  bool actionReward = true;
185  ILearningStrategy* ls = new FMDPLearner< GTEST, GTEST, IMDDILEARNER >(
186  attributeSelectionThreshold,
187  actionReward,
188  similarityThreshold);
189  IPlanningStrategy< double >* ps
190  = StructuredPlaner< double >::spumddInstance(discountFactor, epsilon);
191  IDecisionStrategy* ds = new RandomDecider();
192  return new SDYNA(ls,
193  ps,
194  ds,
195  observationPhaseLenght,
196  nbValueIterationStep,
197  actionReward);
198  }
199 
200  // ==========================================================================
201  ///
202  // ==========================================================================
203  static SDYNA* RandomTreeInstance(double attributeSelectionThreshold = 0.99,
204  double discountFactor = 0.9,
205  double epsilon = 1,
206  Idx observationPhaseLenght = 100,
207  Idx nbValueIterationStep = 10) {
208  bool actionReward = true;
209  ILearningStrategy* ls = new FMDPLearner< CHI2TEST, CHI2TEST, ITILEARNER >(
210  attributeSelectionThreshold,
211  actionReward);
212  IPlanningStrategy< double >* ps
213  = StructuredPlaner< double >::sviInstance(discountFactor, epsilon);
214  IDecisionStrategy* ds = new RandomDecider();
215  return new SDYNA(ls,
216  ps,
217  ds,
218  observationPhaseLenght,
219  nbValueIterationStep,
220  actionReward);
221  }
222 
223 
224  /// @}
225 
226  // ###################################################################
227  /// @name Constructor & destructor.
228  // ###################################################################
229  /// @{
230 
231  // ==========================================================================
232  /**
233  * Constructor
234  *
235  * @return an instance of SDyna architecture
236  */
237  // ==========================================================================
238  private:
239  SDYNA(ILearningStrategy* learner,
240  IPlanningStrategy< double >* planer,
241  IDecisionStrategy* decider,
242  Idx observationPhaseLenght,
243  Idx nbValueIterationStep,
244  bool actionReward,
245  bool verbose = true);
246 
247  // ==========================================================================
248  /// Destructor
249  // ==========================================================================
250  public:
251  ~SDYNA();
252 
253  /// @}
254 
255 
256  // ###################################################################
257  /// @name Problem specification methods
258  // ###################################################################
259  /// @{
260  public:
261  // ==========================================================================
262  /**
263  * Inserts a new action in the SDyna instance.
264  * @warning Without effect until method initialize is called
265  * @param actionId : an id to identify the action
266  * @param actionName : its human name
267  */
268  // ==========================================================================
269  void addAction(const Idx actionId, const std::string& actionName) {
270  fmdp_->addAction(actionId, actionName);
271  }
272 
273  // ==========================================================================
274  /**
275  * Inserts a new variable in the SDyna instance.
276  * @warning Without effect until method initialize is called
277  * @param var : the var to be added.
278  * Note that variable may or may not have all its modalities given.
279  * If not they will be discovered by the SDyna architecture during the
280  * process
281  */
282  // ==========================================================================
283  void addVariable(const DiscreteVariable* var) { fmdp_->addVariable(var); }
284 
285  /// @}
286 
287 
288  // ###################################################################
289  /// @name Initialization
290  // ###################################################################
291  /// @{
292  public:
293  // ==========================================================================
294  /**
295  * Initializes the Sdyna instance.
296  */
297  // ==========================================================================
298  void initialize();
299 
300  // ==========================================================================
301  /**
302  * Initializes the Sdyna instance at given state.
303  * @param initialState : the state of the studied system from which we will
304  * begin the explore, learn and exploit process
305  */
306  // ==========================================================================
307  void initialize(const Instantiation& initialState);
308 
309  /// @}
310 
311 
312  // ###################################################################
313  /// @name Incremental methods
314  // ###################################################################
315  /// @{
316  public:
317  // ==========================================================================
318  /**
319  * Sets last state visited to the given state.
320  * During the learning process, we will consider that were in this state
321  * before the transition.
322  * @param currentState : the state
323  */
324  // ==========================================================================
325  void setCurrentState(const Instantiation& currentState) {
326  lastState_ = currentState;
327  }
328 
329  // ==========================================================================
330  /**
331  * @return actionId the id of the action the SDyna instance wish to be
332  * performed
333  * @param curState the state in which we currently are
334  */
335  // ==========================================================================
337 
338  // ==========================================================================
339  /**
340  * @return the id of the action the SDyna instance wish to be performed
341  */
342  // ==========================================================================
343  Idx takeAction();
344 
345  // ==========================================================================
346  /**
347  * Performs a feedback on the last transition.
348  * In extenso, learn from the transition.
349  * @param originalState : the state we were in before the transition
350  * @param reachedState : the state we reached after
351  * @param performedAction : the action we performed
352  * @param obtainedReward : the reward we obtained
353  */
354  // ==========================================================================
355  void feedback(const Instantiation& originalState,
356  const Instantiation& reachedState,
357  Idx performedAction,
358  double obtainedReward);
359 
360  // ==========================================================================
361  /**
362  * Performs a feedback on the last transition.
363  * In extenso, learn from the transition.
364  * @param reachedState : the state reached after the transition
365  * @param obtainedReward : the reward obtained during the transition
366  * @warning Uses the originalState__ and performedAction__ stored in cache
367  * If you want to specify the original state and the performed action, see
368  * below
369  */
370  // ==========================================================================
371  void feedback(const Instantiation& reachedState, double obtainedReward);
372 
373  // ==========================================================================
374  /**
375  * Starts a new planning
376  * @param nbStep : the maximal number of value iteration performed in this
377  * planning
378  */
379  // ==========================================================================
380  void makePlanning(Idx nbStep);
381 
382  /// @}
383 
384 
385  public:
386  // ==========================================================================
387  /**
388  * Returns
389  * @return a string describing the learned FMDP, and the associated
390  * optimal policy.
391  * Both in DOT language.
392  */
393  // ==========================================================================
394  std::string toString();
395 
396  std::string optimalPolicy2String() { return planer__->optimalPolicy2String(); }
397 
398 
399  // ###################################################################
400  /// @name Size methods
401  /// @brief just to get the size of the different data structure for
402  /// performance evaluation purposes only
403  // ###################################################################
404  /// @{
405  public:
406  // ==========================================================================
407  /**
408  * @brief learnerSize
409  * @return
410  */
411  // ==========================================================================
412  Size learnerSize() { return learner__->size(); }
413 
414  // ==========================================================================
415  /**
416  * @brief modelSize
417  * @return
418  */
419  // ==========================================================================
420  Size modelSize() { return fmdp_->size(); }
421 
422  // ==========================================================================
423  /**
424  * @brief valueFunctionSize
425  * @return
426  */
427  // ==========================================================================
428  Size valueFunctionSize() { return planer__->vFunctionSize(); }
429 
430  // ==========================================================================
431  /**
432  * @brief optimalPolicySize
433  * @return
434  */
435  // ==========================================================================
436  Size optimalPolicySize() { return planer__->optimalPolicySize(); }
437 
438  /// @}
439 
440 
441  protected:
442  /// The learnt Markovian Decision Process
443  FMDP< double >* fmdp_;
444 
445  /// The state in which the system is before we perform a new action
447 
448  private:
449  /// The learner used to learn the FMDP
451 
452  /// The planer used to plan an optimal strategy
454 
455  /// The decider
457 
458 
459  /// The number of observation we make before using again the planer
461 
462  /// The total number of observation made so far
464 
465  /// The number of Value Iteration step we perform
467 
468  /// The last performed action
470 
471  /// Since SDYNA made these observation, it has to delete them on quitting
473 
475 
476  bool verbose_;
477  };
478 
479 
480 } /* namespace gum */
481 
482 
483 #endif // GUM_SDYNA_H
~SDYNA()
Destructor.
Definition: sdyna.cpp:79
Idx nbObservation__
The total number of observation made so far.
Definition: sdyna.h:463
std::string optimalPolicy2String()
Definition: sdyna.h:396
std::string toString()
Returns.
Definition: sdyna.cpp:232
Idx takeAction()
Definition: sdyna.cpp:218
INLINE void emplace(Args &&... args)
Definition: set_tpl.h:669
void setCurrentState(const Instantiation &currentState)
Sets last state visited to the given state.
Definition: sdyna.h:325
SDYNA(ILearningStrategy *learner, IPlanningStrategy< double > *planer, IDecisionStrategy *decider, Idx observationPhaseLenght, Idx nbValueIterationStep, bool actionReward, bool verbose=true)
Constructor.
Definition: sdyna.cpp:57
static SDYNA * RMaxTreeInstance(double attributeSelectionThreshold=0.99, double discountFactor=0.9, double epsilon=1, Idx observationPhaseLenght=100, Idx nbValueIterationStep=10)
@
Definition: sdyna.h:154
void feedback(const Instantiation &originalState, const Instantiation &reachedState, Idx performedAction, double obtainedReward)
Performs a feedback on the last transition.
Definition: sdyna.cpp:130
IPlanningStrategy< double > * planer__
The planer used to plan an optimal strategy.
Definition: sdyna.h:453
void addVariable(const DiscreteVariable *var)
Inserts a new variable in the SDyna instance.
Definition: sdyna.h:283
static SDYNA * RandomTreeInstance(double attributeSelectionThreshold=0.99, double discountFactor=0.9, double epsilon=1, Idx observationPhaseLenght=100, Idx nbValueIterationStep=10)
@
Definition: sdyna.h:203
Instantiation lastState_
The state in which the system is before we perform a new action.
Definition: sdyna.h:446
FMDP< double > * fmdp_
The learnt Markovian Decision Process.
Definition: sdyna.h:443
The general SDyna architecture abstract class.
Definition: sdyna.h:66
Size learnerSize()
learnerSize
Definition: sdyna.h:412
IDecisionStrategy * decider__
The decider.
Definition: sdyna.h:456
Size valueFunctionSize()
valueFunctionSize
Definition: sdyna.h:428
Idx lastAction__
The last performed action.
Definition: sdyna.h:469
Set< Observation *> bin__
Since SDYNA made these observation, it has to delete them on quitting.
Definition: sdyna.h:472
Idx observationPhaseLenght__
The number of observation we make before using again the planer.
Definition: sdyna.h:460
void feedback(const Instantiation &reachedState, double obtainedReward)
Performs a feedback on the last transition.
Definition: sdyna.cpp:150
void addAction(const Idx actionId, const std::string &actionName)
Inserts a new action in the SDyna instance.
Definition: sdyna.h:269
static SDYNA * RMaxMDDInstance(double attributeSelectionThreshold=0.99, double similarityThreshold=0.3, double discountFactor=0.9, double epsilon=1, Idx observationPhaseLenght=100, Idx nbValueIterationStep=10)
@
Definition: sdyna.h:126
bool verbose_
Definition: sdyna.h:476
static SDYNA * spitiInstance(double attributeSelectionThreshold=0.99, double discountFactor=0.9, double epsilon=1, Idx observationPhaseLenght=100, Idx nbValueIterationStep=10)
@
Definition: sdyna.h:75
bool actionReward__
Definition: sdyna.h:474
void makePlanning(Idx nbStep)
Starts a new planning.
Definition: sdyna.cpp:190
Idx takeAction(const Instantiation &curState)
Definition: sdyna.cpp:208
void initialize()
Initializes the Sdyna instance.
Definition: sdyna.cpp:98
Idx nbValueIterationStep__
The number of Value Iteration step we perform.
Definition: sdyna.h:466
static SDYNA * spimddiInstance(double attributeSelectionThreshold=0.99, double similarityThreshold=0.3, double discountFactor=0.9, double epsilon=1, Idx observationPhaseLenght=100, Idx nbValueIterationStep=10)
@
Definition: sdyna.h:98
void initialize(const Instantiation &initialState)
Initializes the Sdyna instance at given state.
Definition: sdyna.cpp:111
Size optimalPolicySize()
optimalPolicySize
Definition: sdyna.h:436
static SDYNA * RandomMDDInstance(double attributeSelectionThreshold=0.99, double similarityThreshold=0.3, double discountFactor=0.9, double epsilon=1, Idx observationPhaseLenght=100, Idx nbValueIterationStep=10)
@
Definition: sdyna.h:178
Size modelSize()
modelSize
Definition: sdyna.h:420
ILearningStrategy * learner__
The learner used to learn the FMDP.
Definition: sdyna.h:450