aGrUM  0.14.2
sdyna.h
Go to the documentation of this file.
1 /***************************************************************************
2  * Copyright (C) 2005 by Christophe GONZALES and Pierre-Henri WUILLEMIN *
3  * {prenom.nom}_at_lip6.fr *
4  * *
5  * This program is free software; you can redistribute it and/or modify *
6  * it under the terms of the GNU General Public License as published by *
7  * the Free Software Foundation; either version 2 of the License, or *
8  * (at your option) any later version. *
9  * *
10  * This program is distributed in the hope that it will be useful, *
11  * but WITHOUT ANY WARRANTY; without even the implied warranty of *
12  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the *
13  * GNU General Public License for more details. *
14  * *
15  * You should have received a copy of the GNU General Public License *
16  * along with this program; if not, write to the *
17  * Free Software Foundation, Inc., *
18  * 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. *
19  ***************************************************************************/
27 // =========================================================================
28 #ifndef GUM_SDYNA_H
29 #define GUM_SDYNA_H
30 // =========================================================================
31 #include <agrum/agrum.h>
33 // =========================================================================
35 // =========================================================================
43 #include <agrum/FMDP/fmdp.h>
49 // =========================================================================
50 
51 namespace gum {
52 
63  class SDYNA {
64  // ###################################################################
66  // ###################################################################
68  public:
69  // ==========================================================================
71  // ==========================================================================
72  static SDYNA* spitiInstance(double attributeSelectionThreshold = 0.99,
73  double discountFactor = 0.9,
74  double epsilon = 1,
75  Idx observationPhaseLenght = 100,
76  Idx nbValueIterationStep = 10) {
77  bool actionReward = false;
79  attributeSelectionThreshold, actionReward);
81  StructuredPlaner< double >::sviInstance(discountFactor, epsilon);
83  return new SDYNA(
84  ls, ps, ds, observationPhaseLenght, nbValueIterationStep, actionReward);
85  }
86 
87  // ==========================================================================
89  // ==========================================================================
90  static SDYNA* spimddiInstance(double attributeSelectionThreshold = 0.99,
91  double similarityThreshold = 0.3,
92  double discountFactor = 0.9,
93  double epsilon = 1,
94  Idx observationPhaseLenght = 100,
95  Idx nbValueIterationStep = 10) {
96  bool actionReward = false;
98  attributeSelectionThreshold, actionReward, similarityThreshold);
100  discountFactor, epsilon, false);
102  return new SDYNA(ls,
103  ps,
104  ds,
105  observationPhaseLenght,
106  nbValueIterationStep,
107  actionReward,
108  false);
109  }
110 
111  // ==========================================================================
113  // ==========================================================================
114  static SDYNA* RMaxMDDInstance(double attributeSelectionThreshold = 0.99,
115  double similarityThreshold = 0.3,
116  double discountFactor = 0.9,
117  double epsilon = 1,
118  Idx observationPhaseLenght = 100,
119  Idx nbValueIterationStep = 10) {
120  bool actionReward = true;
122  attributeSelectionThreshold, actionReward, similarityThreshold);
124  ls, discountFactor, epsilon);
126  IDecisionStrategy* ds = rm;
127  return new SDYNA(
128  ls, ps, ds, observationPhaseLenght, nbValueIterationStep, actionReward);
129  }
130 
131  // ==========================================================================
133  // ==========================================================================
134  static SDYNA* RMaxTreeInstance(double attributeSelectionThreshold = 0.99,
135  double discountFactor = 0.9,
136  double epsilon = 1,
137  Idx observationPhaseLenght = 100,
138  Idx nbValueIterationStep = 10) {
139  bool actionReward = true;
141  attributeSelectionThreshold, actionReward);
142  AdaptiveRMaxPlaner* rm =
143  AdaptiveRMaxPlaner::TreeInstance(ls, discountFactor, epsilon);
145  IDecisionStrategy* ds = rm;
146  return new SDYNA(
147  ls, ps, ds, observationPhaseLenght, nbValueIterationStep, actionReward);
148  }
149 
150  // ==========================================================================
152  // ==========================================================================
153  static SDYNA* RandomMDDInstance(double attributeSelectionThreshold = 0.99,
154  double similarityThreshold = 0.3,
155  double discountFactor = 0.9,
156  double epsilon = 1,
157  Idx observationPhaseLenght = 100,
158  Idx nbValueIterationStep = 10) {
159  bool actionReward = true;
161  attributeSelectionThreshold, actionReward, similarityThreshold);
163  StructuredPlaner< double >::spumddInstance(discountFactor, epsilon);
164  IDecisionStrategy* ds = new RandomDecider();
165  return new SDYNA(
166  ls, ps, ds, observationPhaseLenght, nbValueIterationStep, actionReward);
167  }
168 
169  // ==========================================================================
171  // ==========================================================================
172  static SDYNA* RandomTreeInstance(double attributeSelectionThreshold = 0.99,
173  double discountFactor = 0.9,
174  double epsilon = 1,
175  Idx observationPhaseLenght = 100,
176  Idx nbValueIterationStep = 10) {
177  bool actionReward = true;
179  attributeSelectionThreshold, actionReward);
181  StructuredPlaner< double >::sviInstance(discountFactor, epsilon);
182  IDecisionStrategy* ds = new RandomDecider();
183  return new SDYNA(
184  ls, ps, ds, observationPhaseLenght, nbValueIterationStep, actionReward);
185  }
186 
187 
189 
190  // ###################################################################
192  // ###################################################################
194 
195  // ==========================================================================
201  // ==========================================================================
202  private:
203  SDYNA(ILearningStrategy* learner,
205  IDecisionStrategy* decider,
206  Idx observationPhaseLenght,
207  Idx nbValueIterationStep,
208  bool actionReward,
209  bool verbose = true);
210 
211  // ==========================================================================
213  // ==========================================================================
214  public:
215  ~SDYNA();
216 
218 
219 
220  // ###################################################################
222  // ###################################################################
224  public:
225  // ==========================================================================
232  // ==========================================================================
233  void addAction(const Idx actionId, const std::string& actionName) {
234  _fmdp->addAction(actionId, actionName);
235  }
236 
237  // ==========================================================================
246  // ==========================================================================
247  void addVariable(const DiscreteVariable* var) { _fmdp->addVariable(var); }
248 
250 
251 
252  // ###################################################################
254  // ###################################################################
256  public:
257  // ==========================================================================
261  // ==========================================================================
262  void initialize();
263 
264  // ==========================================================================
270  // ==========================================================================
271  void initialize(const Instantiation& initialState);
272 
274 
275 
276  // ###################################################################
278  // ###################################################################
280  public:
281  // ==========================================================================
288  // ==========================================================================
289  void setCurrentState(const Instantiation& currentState) {
290  _lastState = currentState;
291  }
292 
293  // ==========================================================================
299  // ==========================================================================
300  Idx takeAction(const Instantiation& curState);
301 
302  // ==========================================================================
306  // ==========================================================================
307  Idx takeAction();
308 
309  // ==========================================================================
318  // ==========================================================================
319  void feedback(const Instantiation& originalState,
320  const Instantiation& reachedState,
321  Idx performedAction,
322  double obtainedReward);
323 
324  // ==========================================================================
334  // ==========================================================================
335  void feedback(const Instantiation& reachedState, double obtainedReward);
336 
337  // ==========================================================================
343  // ==========================================================================
344  void makePlanning(Idx nbStep);
345 
347 
348 
349  public:
350  // ==========================================================================
357  // ==========================================================================
358  std::string toString();
359 
361 
362 
363  // ###################################################################
367  // ###################################################################
369  public:
370  // ==========================================================================
375  // ==========================================================================
376  Size learnerSize() { return __learner->size(); }
377 
378  // ==========================================================================
383  // ==========================================================================
384  Size modelSize() { return _fmdp->size(); }
385 
386  // ==========================================================================
391  // ==========================================================================
393 
394  // ==========================================================================
399  // ==========================================================================
401 
403 
404 
405  protected:
408 
411 
412  private:
415 
418 
421 
422 
425 
428 
431 
434 
437 
439 
440  bool _verbose;
441  };
442 
443 
444 } /* namespace gum */
445 
446 
447 #endif // GUM_SDYNA_H
static StructuredPlaner< GUM_SCALAR > * sviInstance(GUM_SCALAR discountFactor=0.9, GUM_SCALAR epsilon=0.00001, bool verbose=true)
~SDYNA()
Destructor.
Definition: sdyna.cpp:76
virtual std::string optimalPolicy2String()=0
Returns a string describing the optimal policy in a dot format.
std::string optimalPolicy2String()
Definition: sdyna.h:360
std::string toString()
Returns.
Definition: sdyna.cpp:229
Base class for discrete random variable.
Idx takeAction()
Definition: sdyna.cpp:215
<agrum/FMDP/SDyna/IDecisionStrategy.h>
Headers of the Learning Strategy interface.
Instantiation _lastState
The state in which the system is before we perform a new action.
Definition: sdyna.h:410
void setCurrentState(const Instantiation &currentState)
Sets last state visited to the given state.
Definition: sdyna.h:289
Idx __lastAction
The last performed action.
Definition: sdyna.h:433
SDYNA(ILearningStrategy *learner, IPlanningStrategy< double > *planer, IDecisionStrategy *decider, Idx observationPhaseLenght, Idx nbValueIterationStep, bool actionReward, bool verbose=true)
Constructor.
Definition: sdyna.cpp:54
static AdaptiveRMaxPlaner * ReducedAndOrderedInstance(const ILearningStrategy *learner, double discountFactor=0.9, double epsilon=0.00001, bool verbose=true)
bool _verbose
Definition: sdyna.h:440
static SDYNA * RMaxTreeInstance(double attributeSelectionThreshold=0.99, double discountFactor=0.9, double epsilon=1, Idx observationPhaseLenght=100, Idx nbValueIterationStep=10)
@
Definition: sdyna.h:134
void feedback(const Instantiation &originalState, const Instantiation &reachedState, Idx performedAction, double obtainedReward)
Performs a feedback on the last transition.
Definition: sdyna.cpp:127
Headers of the Statistical lazy decision maker class.
virtual Size optimalPolicySize()=0
Returns optimalPolicy computed so far current size.
Base class for discrete random variable.
IPlanningStrategy< double > * __planer
The planer used to plan an optimal strategy.
Definition: sdyna.h:417
gum is the global namespace for all aGrUM entities
Definition: agrum.h:25
void addVariable(const DiscreteVariable *var)
Inserts a new variable in the SDyna instance.
Definition: sdyna.h:247
static SDYNA * RandomTreeInstance(double attributeSelectionThreshold=0.99, double discountFactor=0.9, double epsilon=1, Idx observationPhaseLenght=100, Idx nbValueIterationStep=10)
@
Definition: sdyna.h:172
ILearningStrategy * __learner
The learner used to learn the FMDP.
Definition: sdyna.h:414
Headers of the StructuredPlaner planer class.
Headers of the Decision Strategy interface.
Headers of the MDDOperatorStrategy planer class.
The general SDyna architecture abstract class.
Definition: sdyna.h:63
Representation of a setA Set is a structure that contains arbitrary elements.
Definition: set.h:162
Class for implementation of factored markov decision process.
void addVariable(const DiscreteVariable *var)
Adds a variable to FMDP description.
Definition: fmdp_tpl.h:121
Size learnerSize()
learnerSize
Definition: sdyna.h:376
Headers of the RMax planer class.
<agrum/FMDP/SDyna/ILearningStrategy.h>
<agrum/FMDP/decision/E_GreedyDecider.h>
Class to make decision randomlyDoes nothing more than the interface for DecisionStrategy does...
Definition: randomDecider.h:46
Size valueFunctionSize()
valueFunctionSize
Definition: sdyna.h:392
Size size() const
Returns the map binding main variables and prime variables.
Definition: fmdp_tpl.h:389
static AdaptiveRMaxPlaner * TreeInstance(const ILearningStrategy *learner, double discountFactor=0.9, double epsilon=0.00001, bool verbose=true)
Idx __observationPhaseLenght
The number of observation we make before using again the planer.
Definition: sdyna.h:424
void addAction(const Idx actionId, const std::string &actionName)
Inserts a new action in the SDyna instance.
Definition: sdyna.h:233
Idx __nbObservation
The total number of observation made so far.
Definition: sdyna.h:427
Header files of gum::Instantiation.
static SDYNA * RMaxMDDInstance(double attributeSelectionThreshold=0.99, double similarityThreshold=0.3, double discountFactor=0.9, double epsilon=1, Idx observationPhaseLenght=100, Idx nbValueIterationStep=10)
@
Definition: sdyna.h:114
Headers of the Observation class.
static SDYNA * spitiInstance(double attributeSelectionThreshold=0.99, double discountFactor=0.9, double epsilon=1, Idx observationPhaseLenght=100, Idx nbValueIterationStep=10)
@
Definition: sdyna.h:72
void makePlanning(Idx nbStep)
Starts a new planning.
Definition: sdyna.cpp:187
Headers of the Planning Strategy interface.
void initialize()
Initializes the Sdyna instance.
Definition: sdyna.cpp:95
Class for assigning/browsing values to tuples of discrete variables.
Definition: instantiation.h:80
FMDP< double > * _fmdp
The learnt Markovian Decision Process.
Definition: sdyna.h:407
<agrum/FMDP/planning/adaptiveRMaxPlaner.h>
Headers of the lazy decision maker class.
static SDYNA * spimddiInstance(double attributeSelectionThreshold=0.99, double similarityThreshold=0.3, double discountFactor=0.9, double epsilon=1, Idx observationPhaseLenght=100, Idx nbValueIterationStep=10)
@
Definition: sdyna.h:90
virtual Size size()=0
learnerSize
Size optimalPolicySize()
optimalPolicySize
Definition: sdyna.h:400
Headers of the Random decision maker class.
Size Idx
Type for indexes.
Definition: types.h:50
static SDYNA * RandomMDDInstance(double attributeSelectionThreshold=0.99, double similarityThreshold=0.3, double discountFactor=0.9, double epsilon=1, Idx observationPhaseLenght=100, Idx nbValueIterationStep=10)
@
Definition: sdyna.h:153
virtual Size vFunctionSize()=0
Returns vFunction computed so far current size.
IDecisionStrategy * __decider
The decider.
Definition: sdyna.h:420
bool __actionReward
Definition: sdyna.h:438
std::size_t Size
In aGrUM, hashed values are unsigned long int.
Definition: types.h:45
Headers of the FMDPLearner class.
Size modelSize()
modelSize
Definition: sdyna.h:384
Idx __nbValueIterationStep
The number of Value Iteration step we perform.
Definition: sdyna.h:430
Set< Observation *> __bin
Since SDYNA made these observation, it has to delete them on quitting.
Definition: sdyna.h:436
static StructuredPlaner< GUM_SCALAR > * spumddInstance(GUM_SCALAR discountFactor=0.9, GUM_SCALAR epsilon=0.00001, bool verbose=true)
void addAction(Idx actionId, const std::string &action)
Adds an action to FMDP description.
Definition: fmdp_tpl.h:150
Headers of the epsilon-greedy decision maker class.