aGrUM  0.16.0
sdyna.cpp
Go to the documentation of this file.
1 
31 // =========================================================================
32 #include <cstdlib>
33 #include <random>
34 // =========================================================================
35 #include <agrum/FMDP/SDyna/sdyna.h>
36 // =========================================================================
37 
38 namespace gum {
39 
40  // ==========================================================================
41  // Constructor & destructor.
42  // ==========================================================================
43 
44  // ###################################################################
45  /*
46  * Constructor
47  *
48  * @param observationPhaseLenght : the number of observation done before a
49  * replanning is launch. If equals 0, a planning is done after each structural
50  * change.
51  * @param nbValueIterationStep : the number of value iteration done during
52  * one planning
53  * @return an instance of SDyna architecture
54  */
55  // ###################################################################
56 
59  IDecisionStrategy* decider,
60  Idx observationPhaseLenght,
61  Idx nbValueIterationStep,
62  bool actionReward,
63  bool verbose) :
64  __learner(learner),
65  __planer(planer), __decider(decider),
66  __observationPhaseLenght(observationPhaseLenght),
67  __nbValueIterationStep(nbValueIterationStep), __actionReward(actionReward),
68  _verbose(verbose) {
69  GUM_CONSTRUCTOR(SDYNA);
70 
71  _fmdp = new FMDP< double >();
72 
73  __nbObservation = 1;
74  }
75 
76  // ###################################################################
77  // Destructor
78  // ###################################################################
80  delete __decider;
81 
82  delete __learner;
83 
84  delete __planer;
85 
86  for (auto obsIter = __bin.beginSafe(); obsIter != __bin.endSafe(); ++obsIter)
87  delete *obsIter;
88 
89  delete _fmdp;
90 
91  GUM_DESTRUCTOR(SDYNA);
92  }
93 
94  // ==========================================================================
95  // Initialization
96  // ==========================================================================
97 
102  }
103 
104  // ###################################################################
105  /*
106  * Initializes the Sdyna instance.
107  * @param initialState : the state of the studied system from which we will
108  * begin the explore, learn and exploit process
109  */
110  // ###################################################################
111  void SDYNA::initialize(const Instantiation& initialState) {
112  initialize();
113  setCurrentState(initialState);
114  }
115 
116  // ==========================================================================
118  // ==========================================================================
119 
120  // ###################################################################
121  /*
122  * Performs a feedback on the last transition.
123  * In extenso, learn from the transition.
124  * @param originalState : the state we were in before the transition
125  * @param reachedState : the state we reached after
126  * @param performedAction : the action we performed
127  * @param obtainedReward : the reward we obtained
128  */
129  // ###################################################################
130  void SDYNA::feedback(const Instantiation& curState,
131  const Instantiation& prevState,
132  Idx lastAction,
133  double reward) {
134  __lastAction = lastAction;
135  _lastState = prevState;
136  feedback(curState, reward);
137  }
138 
139  // ###################################################################
140  /*
141  * Performs a feedback on the last transition.
142  * In extenso, learn from the transition.
143  * @param reachedState : the state reached after the transition
144  * @param obtainedReward : the reward obtained during the transition
145  * @warning Uses the __originalState and __performedAction stored in cache
146  * If you want to specify the original state and the performed action, see
147  * below
148  */
149  // ###################################################################
150  void SDYNA::feedback(const Instantiation& newState, double reward) {
151  Observation* obs = new Observation();
152 
153  for (auto varIter = _lastState.variablesSequence().beginSafe();
154  varIter != _lastState.variablesSequence().endSafe();
155  ++varIter)
156  obs->setModality(*varIter, _lastState.val(**varIter));
157 
158  for (auto varIter = newState.variablesSequence().beginSafe();
159  varIter != newState.variablesSequence().endSafe();
160  ++varIter) {
161  obs->setModality(_fmdp->main2prime(*varIter), newState.val(**varIter));
162 
163  if (this->__actionReward)
164  obs->setRModality(*varIter, _lastState.val(**varIter));
165  else
166  obs->setRModality(*varIter, newState.val(**varIter));
167  }
168 
169  obs->setReward(reward);
170 
172  __bin.insert(obs);
173 
174  setCurrentState(newState);
176 
179 
180  __nbObservation++;
181  }
182 
183  // ###################################################################
184  /*
185  * Starts a new planning
186  * @param Idx : the maximal number of value iteration performed in this
187  * planning
188  */
189  // ###################################################################
190  void SDYNA::makePlanning(Idx nbValueIterationStep) {
191  if (_verbose) std::cout << "Updating decision trees ..." << std::endl;
193  // std::cout << << "Done" << std::endl;
194 
195  if (_verbose) std::cout << "Planning ..." << std::endl;
196  __planer->makePlanning(nbValueIterationStep);
197  // std::cout << << "Done" << std::endl;
198 
200  }
201 
202  // ##################################################################
203  /*
204  * @return the id of the action the SDyna instance wish to be performed
205  * @param the state in which we currently are
206  */
207  // ###################################################################
209  _lastState = curState;
210  return takeAction();
211  }
212 
213  // ###################################################################
214  /*
215  * @return the id of the action the SDyna instance wish to be performed
216  */
217  // ###################################################################
220  if (actionSet.size() == 1) {
221  __lastAction = actionSet[0];
222  } else {
223  Idx randy = (Idx)((double)std::rand() / (double)RAND_MAX * actionSet.size());
224  __lastAction = actionSet[randy == actionSet.size() ? 0 : randy];
225  }
226  return __lastAction;
227  }
228 
229  // ###################################################################
230  //
231  // ###################################################################
232  std::string SDYNA::toString() {
233  std::stringstream description;
234 
235  description << _fmdp->toString() << std::endl;
236  description << __planer->optimalPolicy2String() << std::endl;
237 
238  return description.str();
239  }
240 
241 } // End of namespace gum
Copyright 2005-2019 Pierre-Henri WUILLEMIN et Christophe GONZALES (LIP6) {prenom.nom}_at_lip6.fr.
~SDYNA()
Destructor.
Definition: sdyna.cpp:79
virtual void initialize(const FMDP< GUM_SCALAR > *fmdp)=0
Initializes the learner.
virtual std::string optimalPolicy2String()=0
Returns a string describing the optimal policy in a dot format.
Size size() const
Gives the size.
Definition: actionSet.h:211
virtual void initialize(const FMDP< double > *fmdp)
Initializes the learner.
std::string toString()
Returns.
Definition: sdyna.cpp:232
virtual ActionSet stateOptimalPolicy(const Instantiation &curState)
Idx takeAction()
Definition: sdyna.cpp:218
A class to store the optimal actions.
Definition: actionSet.h:88
<agrum/FMDP/SDyna/IDecisionStrategy.h>
Instantiation _lastState
The state in which the system is before we perform a new action.
Definition: sdyna.h:413
void setCurrentState(const Instantiation &currentState)
Sets last state visited to the given state.
Definition: sdyna.h:292
Idx __lastAction
The last performed action.
Definition: sdyna.h:436
virtual void updateFMDP()=0
Starts an update of datastructure in the associated FMDP.
SDYNA(ILearningStrategy *learner, IPlanningStrategy< double > *planer, IDecisionStrategy *decider, Idx observationPhaseLenght, Idx nbValueIterationStep, bool actionReward, bool verbose=true)
Constructor.
Definition: sdyna.cpp:57
bool _verbose
Definition: sdyna.h:443
void feedback(const Instantiation &originalState, const Instantiation &reachedState, Idx performedAction, double obtainedReward)
Performs a feedback on the last transition.
Definition: sdyna.cpp:130
virtual void checkState(const Instantiation &newState, Idx actionId)=0
IPlanningStrategy< double > * __planer
The planer used to plan an optimal strategy.
Definition: sdyna.h:420
Copyright 2005-2019 Pierre-Henri WUILLEMIN et Christophe GONZALES (LIP6) {prenom.nom}_at_lip6.fr.
Definition: agrum.h:25
const Sequence< const DiscreteVariable *> & variablesSequence() const final
Returns the sequence of DiscreteVariable of this instantiation.
virtual void initialize(FMDP< double > *fmdp)=0
Initializes the learner.
ILearningStrategy * __learner
The learner used to learn the FMDP.
Definition: sdyna.h:417
Idx val(Idx i) const
Returns the current value of the variable at position i.
The general SDyna architecture abstract class.
Definition: sdyna.h:66
void setReward(double reward)
Returns the modality assumed by the given variable in this observation.
Definition: observation.h:124
<agrum/FMDP/SDyna/ILearningStrategy.h>
virtual bool addObservation(Idx actionId, const Observation *obs)=0
Gives to the learner a new transition.
virtual void makePlanning(Idx nbIte)=0
Starts a new planning.
INLINE void setRModality(const DiscreteVariable *var, Idx modality)
Returns the modality assumed by the given variable in this observation.
Definition: observation.h:112
Idx __observationPhaseLenght
The number of observation we make before using again the planer.
Definition: sdyna.h:427
Idx __nbObservation
The total number of observation made so far.
Definition: sdyna.h:430
const DiscreteVariable * main2prime(const DiscreteVariable *mainVar) const
Returns the primed variable associate to the given main variable.
Definition: fmdp.h:109
virtual const MultiDimFunctionGraph< ActionSet, SetTerminalNodePolicy > * optimalPolicy()=0
Returns optimalPolicy computed so far current size.
void makePlanning(Idx nbStep)
Starts a new planning.
Definition: sdyna.cpp:190
void initialize()
Initializes the Sdyna instance.
Definition: sdyna.cpp:98
Class for assigning/browsing values to tuples of discrete variables.
Definition: instantiation.h:83
FMDP< double > * _fmdp
The learnt Markovian Decision Process.
Definition: sdyna.h:410
Size Idx
Type for indexes.
Definition: types.h:53
IDecisionStrategy * __decider
The decider.
Definition: sdyna.h:423
bool __actionReward
Definition: sdyna.h:441
Idx __nbValueIterationStep
The number of Value Iteration step we perform.
Definition: sdyna.h:433
INLINE void setModality(const DiscreteVariable *var, Idx modality)
Sets the modality assumed by the given variable in this observation.
Definition: observation.h:109
void setOptimalStrategy(const MultiDimFunctionGraph< ActionSet, SetTerminalNodePolicy > *optPol)
Set< Observation *> __bin
Since SDYNA made these observation, it has to delete them on quitting.
Definition: sdyna.h:439
std::string toString() const
Displays the FMDP in a Dot format.
Definition: fmdp_tpl.h:370