aGrUM  0.14.2
sdyna.cpp
Go to the documentation of this file.
1 /***************************************************************************
2  * Copyright (C) 2005 by Christophe GONZALES and Pierre-Henri WUILLEMIN *
3  * {prenom.nom}_at_lip6.fr *
4  * *
5  * This program is free software; you can redistribute it and/or modify *
6  * it under the terms of the GNU General Public License as published by *
7  * the Free Software Foundation; either version 2 of the License, or *
8  * (at your option) any later version. *
9  * *
10  * This program is distributed in the hope that it will be useful, *
11  * but WITHOUT ANY WARRANTY; without even the implied warranty of *
12  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the *
13  * GNU General Public License for more details. *
14  * *
15  * You should have received a copy of the GNU General Public License *
16  * along with this program; if not, write to the *
17  * Free Software Foundation, Inc., *
18  * 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. *
19  ***************************************************************************/
28 // =========================================================================
29 #include <cstdlib>
30 #include <random>
31 // =========================================================================
32 #include <agrum/FMDP/SDyna/sdyna.h>
33 // =========================================================================
34 
35 namespace gum {
36 
37  // ==========================================================================
38  // Constructor & destructor.
39  // ==========================================================================
40 
41  // ###################################################################
42  /*
43  * Constructor
44  *
45  * @param observationPhaseLenght : the number of observation done before a
46  * replanning is launch. If equals 0, a planning is done after each structural
47  * change.
48  * @param nbValueIterationStep : the number of value iteration done during
49  * one planning
50  * @return an instance of SDyna architecture
51  */
52  // ###################################################################
53 
56  IDecisionStrategy* decider,
57  Idx observationPhaseLenght,
58  Idx nbValueIterationStep,
59  bool actionReward,
60  bool verbose) :
61  __learner(learner),
62  __planer(planer), __decider(decider),
63  __observationPhaseLenght(observationPhaseLenght),
64  __nbValueIterationStep(nbValueIterationStep), __actionReward(actionReward),
65  _verbose(verbose) {
66  GUM_CONSTRUCTOR(SDYNA);
67 
68  _fmdp = new FMDP< double >();
69 
70  __nbObservation = 1;
71  }
72 
73  // ###################################################################
74  // Destructor
75  // ###################################################################
77  delete __decider;
78 
79  delete __learner;
80 
81  delete __planer;
82 
83  for (auto obsIter = __bin.beginSafe(); obsIter != __bin.endSafe(); ++obsIter)
84  delete *obsIter;
85 
86  delete _fmdp;
87 
88  GUM_DESTRUCTOR(SDYNA);
89  }
90 
91  // ==========================================================================
92  // Initialization
93  // ==========================================================================
94 
99  }
100 
101  // ###################################################################
102  /*
103  * Initializes the Sdyna instance.
104  * @param initialState : the state of the studied system from which we will
105  * begin the explore, learn and exploit process
106  */
107  // ###################################################################
108  void SDYNA::initialize(const Instantiation& initialState) {
109  initialize();
110  setCurrentState(initialState);
111  }
112 
113  // ==========================================================================
115  // ==========================================================================
116 
117  // ###################################################################
118  /*
119  * Performs a feedback on the last transition.
120  * In extenso, learn from the transition.
121  * @param originalState : the state we were in before the transition
122  * @param reachedState : the state we reached after
123  * @param performedAction : the action we performed
124  * @param obtainedReward : the reward we obtained
125  */
126  // ###################################################################
127  void SDYNA::feedback(const Instantiation& curState,
128  const Instantiation& prevState,
129  Idx lastAction,
130  double reward) {
131  __lastAction = lastAction;
132  _lastState = prevState;
133  feedback(curState, reward);
134  }
135 
136  // ###################################################################
137  /*
138  * Performs a feedback on the last transition.
139  * In extenso, learn from the transition.
140  * @param reachedState : the state reached after the transition
141  * @param obtainedReward : the reward obtained during the transition
142  * @warning Uses the __originalState and __performedAction stored in cache
143  * If you want to specify the original state and the performed action, see
144  * below
145  */
146  // ###################################################################
147  void SDYNA::feedback(const Instantiation& newState, double reward) {
148  Observation* obs = new Observation();
149 
150  for (auto varIter = _lastState.variablesSequence().beginSafe();
151  varIter != _lastState.variablesSequence().endSafe();
152  ++varIter)
153  obs->setModality(*varIter, _lastState.val(**varIter));
154 
155  for (auto varIter = newState.variablesSequence().beginSafe();
156  varIter != newState.variablesSequence().endSafe();
157  ++varIter) {
158  obs->setModality(_fmdp->main2prime(*varIter), newState.val(**varIter));
159 
160  if (this->__actionReward)
161  obs->setRModality(*varIter, _lastState.val(**varIter));
162  else
163  obs->setRModality(*varIter, newState.val(**varIter));
164  }
165 
166  obs->setReward(reward);
167 
169  __bin.insert(obs);
170 
171  setCurrentState(newState);
173 
176 
177  __nbObservation++;
178  }
179 
180  // ###################################################################
181  /*
182  * Starts a new planning
183  * @param Idx : the maximal number of value iteration performed in this
184  * planning
185  */
186  // ###################################################################
187  void SDYNA::makePlanning(Idx nbValueIterationStep) {
188  if (_verbose) std::cout << "Updating decision trees ..." << std::endl;
190  // std::cout << << "Done" << std::endl;
191 
192  if (_verbose) std::cout << "Planning ..." << std::endl;
193  __planer->makePlanning(nbValueIterationStep);
194  // std::cout << << "Done" << std::endl;
195 
197  }
198 
199  // ##################################################################
200  /*
201  * @return the id of the action the SDyna instance wish to be performed
202  * @param the state in which we currently are
203  */
204  // ###################################################################
206  _lastState = curState;
207  return takeAction();
208  }
209 
210  // ###################################################################
211  /*
212  * @return the id of the action the SDyna instance wish to be performed
213  */
214  // ###################################################################
217  if (actionSet.size() == 1) {
218  __lastAction = actionSet[0];
219  } else {
220  Idx randy = (Idx)((double)std::rand() / (double)RAND_MAX * actionSet.size());
221  __lastAction = actionSet[randy == actionSet.size() ? 0 : randy];
222  }
223  return __lastAction;
224  }
225 
226  // ###################################################################
227  //
228  // ###################################################################
229  std::string SDYNA::toString() {
230  std::stringstream description;
231 
232  description << _fmdp->toString() << std::endl;
233  description << __planer->optimalPolicy2String() << std::endl;
234 
235  return description.str();
236  }
237 
238 } // End of namespace gum
Headers of the SDyna abstract class.
~SDYNA()
Destructor.
Definition: sdyna.cpp:76
virtual void initialize(const FMDP< GUM_SCALAR > *fmdp)=0
Initializes the learner.
virtual std::string optimalPolicy2String()=0
Returns a string describing the optimal policy in a dot format.
Size size() const
Gives the size.
Definition: actionSet.h:208
virtual void initialize(const FMDP< double > *fmdp)
Initializes the learner.
std::string toString()
Returns.
Definition: sdyna.cpp:229
virtual ActionSet stateOptimalPolicy(const Instantiation &curState)
Idx takeAction()
Definition: sdyna.cpp:215
A class to store the optimal actions.
Definition: actionSet.h:85
<agrum/FMDP/SDyna/IDecisionStrategy.h>
Instantiation _lastState
The state in which the system is before we perform a new action.
Definition: sdyna.h:410
void setCurrentState(const Instantiation &currentState)
Sets last state visited to the given state.
Definition: sdyna.h:289
Idx __lastAction
The last performed action.
Definition: sdyna.h:433
virtual void updateFMDP()=0
Starts an update of datastructure in the associated FMDP.
SDYNA(ILearningStrategy *learner, IPlanningStrategy< double > *planer, IDecisionStrategy *decider, Idx observationPhaseLenght, Idx nbValueIterationStep, bool actionReward, bool verbose=true)
Constructor.
Definition: sdyna.cpp:54
bool _verbose
Definition: sdyna.h:440
void feedback(const Instantiation &originalState, const Instantiation &reachedState, Idx performedAction, double obtainedReward)
Performs a feedback on the last transition.
Definition: sdyna.cpp:127
virtual void checkState(const Instantiation &newState, Idx actionId)=0
IPlanningStrategy< double > * __planer
The planer used to plan an optimal strategy.
Definition: sdyna.h:417
gum is the global namespace for all aGrUM entities
Definition: agrum.h:25
const Sequence< const DiscreteVariable *> & variablesSequence() const final
Returns the sequence of DiscreteVariable of this instantiation.
virtual void initialize(FMDP< double > *fmdp)=0
Initializes the learner.
ILearningStrategy * __learner
The learner used to learn the FMDP.
Definition: sdyna.h:414
Idx val(Idx i) const
Returns the current value of the variable at position i.
The general SDyna architecture abstract class.
Definition: sdyna.h:63
void setReward(double reward)
Returns the modality assumed by the given variable in this observation.
Definition: observation.h:121
<agrum/FMDP/SDyna/ILearningStrategy.h>
virtual bool addObservation(Idx actionId, const Observation *obs)=0
Gives to the learner a new transition.
virtual void makePlanning(Idx nbIte)=0
Starts a new planning.
INLINE void setRModality(const DiscreteVariable *var, Idx modality)
Returns the modality assumed by the given variable in this observation.
Definition: observation.h:109
Idx __observationPhaseLenght
The number of observation we make before using again the planer.
Definition: sdyna.h:424
Idx __nbObservation
The total number of observation made so far.
Definition: sdyna.h:427
const DiscreteVariable * main2prime(const DiscreteVariable *mainVar) const
Returns the primed variable associate to the given main variable.
Definition: fmdp.h:106
virtual const MultiDimFunctionGraph< ActionSet, SetTerminalNodePolicy > * optimalPolicy()=0
Returns optimalPolicy computed so far current size.
void makePlanning(Idx nbStep)
Starts a new planning.
Definition: sdyna.cpp:187
void initialize()
Initializes the Sdyna instance.
Definition: sdyna.cpp:95
Class for assigning/browsing values to tuples of discrete variables.
Definition: instantiation.h:80
FMDP< double > * _fmdp
The learnt Markovian Decision Process.
Definition: sdyna.h:407
Size Idx
Type for indexes.
Definition: types.h:50
IDecisionStrategy * __decider
The decider.
Definition: sdyna.h:420
bool __actionReward
Definition: sdyna.h:438
Idx __nbValueIterationStep
The number of Value Iteration step we perform.
Definition: sdyna.h:430
INLINE void setModality(const DiscreteVariable *var, Idx modality)
Sets the modality assumed by the given variable in this observation.
Definition: observation.h:106
void setOptimalStrategy(const MultiDimFunctionGraph< ActionSet, SetTerminalNodePolicy > *optPol)
Set< Observation *> __bin
Since SDYNA made these observation, it has to delete them on quitting.
Definition: sdyna.h:436
std::string toString() const
Displays the FMDP in a Dot format.
Definition: fmdp_tpl.h:367