aGrUM  0.14.2
adaptiveRMaxPlaner.h
Go to the documentation of this file.
1 /***************************************************************************
2  * Copyright (C) 2005 by Christophe GONZALES and Pierre-Henri WUILLEMIN *
3  * {prenom.nom}_at_lip6.fr *
4  * *
5  * This program is free software; you can redistribute it and/or modify *
6  * it under the terms of the GNU General Public License as published by *
7  * the Free Software Foundation; either version 2 of the License, or *
8  * (at your option) any later version. *
9  * *
10  * This program is distributed in the hope that it will be useful, *
11  * but WITHOUT ANY WARRANTY; without even the implied warranty of *
12  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the *
13  * GNU General Public License for more details. *
14  * *
15  * You should have received a copy of the GNU General Public License *
16  * along with this program; if not, write to the *
17  * Free Software Foundation, Inc., *
18  * 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. *
19  ***************************************************************************/
27 // =========================================================================
28 #ifndef GUM_ADAPTIVE_RMAX_PLANER_H
29 #define GUM_ADAPTIVE_RMAX_PLANER_H
30 // =========================================================================
32 #include <agrum/FMDP/fmdp.h>
36 // =========================================================================
37 
38 namespace gum {
39 
51  : public StructuredPlaner< double >
52  , public IDecisionStrategy {
53  // ###################################################################
55  // ###################################################################
57  public:
58  // ==========================================================================
60  // ==========================================================================
61  static AdaptiveRMaxPlaner*
63  double discountFactor = 0.9,
64  double epsilon = 0.00001,
65  bool verbose = true) {
67  discountFactor,
68  epsilon,
69  learner,
70  verbose);
71  }
72 
73  // ==========================================================================
75  // ==========================================================================
77  double discountFactor = 0.9,
78  double epsilon = 0.00001,
79  bool verbose = true) {
81  discountFactor,
82  epsilon,
83  learner,
84  verbose);
85  }
86 
88 
89  // ###################################################################
91  // ###################################################################
93  private:
94  // ==========================================================================
96  // ==========================================================================
98  double discountFactor,
99  double epsilon,
100  const ILearningStrategy* learner,
101  bool verbose);
102 
103  // ==========================================================================
105  // ==========================================================================
106  public:
108 
110 
111 
112  // ###################################################################
114  // ###################################################################
116 
117  public:
118  // ==========================================================================
124  // ==========================================================================
125  void initialize(const FMDP< double >* fmdp);
126 
127 
128  // ==========================================================================
137  // ==========================================================================
138  void makePlanning(Idx nbStep = 1000000);
139 
141 
142 
143  // ###################################################################
145  // ###################################################################
147 
148  protected:
149  // ==========================================================================
151  // ==========================================================================
152  virtual void _initVFunction();
153 
154  // ==========================================================================
156  // ==========================================================================
158 
160 
161 
162  // ###################################################################
164  // ###################################################################
166 
167  protected:
168  // ==========================================================================
170  // ==========================================================================
171  virtual void _evalPolicy();
172 
174 
175  private:
177 
178  std::pair< NodeId, NodeId > __visitLearner(const IVisitableGraphLearner*,
179  NodeId currentNodeId,
182  void __clearTables();
183 
184  private:
188 
189  double __rThreshold;
190  double __rmax;
191 
192 
193  // ###################################################################
195  // ###################################################################
197  public:
198  void checkState(const Instantiation& newState, Idx actionId) {
199  if (!__initializedTable[actionId]) {
200  __counterTable[actionId]->reset(newState);
201  __initializedTable[actionId] = true;
202  } else
203  __counterTable[actionId]->incState(newState);
204  }
205 
206  private:
209 
212  };
213 
214 } /* namespace gum */
215 
216 #endif // GUM_ADAPTIVE_RMAX_PLANER_H
void makePlanning(Idx nbStep=1000000)
Performs a value iteration.
HashTable< Idx, StatesCounter *> __counterTable
HashTable< Idx, MultiDimFunctionGraph< double > *> __actionsBoolTable
<agrum/FMDP/planning/structuredPlaner.h>
~AdaptiveRMaxPlaner()
Default destructor.
virtual MultiDimFunctionGraph< double > * _valueIteration()
Performs a single step of value iteration.
Headers of the States Counter class.
<agrum/FMDP/SDyna/IDecisionStrategy.h>
static AdaptiveRMaxPlaner * ReducedAndOrderedInstance(const ILearningStrategy *learner, double discountFactor=0.9, double epsilon=0.00001, bool verbose=true)
std::pair< NodeId, NodeId > __visitLearner(const IVisitableGraphLearner *, NodeId currentNodeId, MultiDimFunctionGraph< double > *, MultiDimFunctionGraph< double > *)
<agrum/FMDP/SDyna/IVisitableGraphLearner.h>
const ILearningStrategy * __fmdpLearner
AdaptiveRMaxPlaner(IOperatorStrategy< double > *opi, double discountFactor, double epsilon, const ILearningStrategy *learner, bool verbose)
Default constructor.
<agrum/FMDP/planning/mddOperatorStrategy.h>
gum is the global namespace for all aGrUM entities
Definition: agrum.h:25
The class for generic Hash Tables.
Definition: hashTable.h:676
HashTable< Idx, bool > __initializedTable
Headers of the StructuredPlaner planer class.
Headers of the Decision Strategy interface.
Class for implementation of factored markov decision process.
<agrum/FMDP/SDyna/ILearningStrategy.h>
<agrum/FMDP/planning/treeOperatorStrategy.h>
HashTable< Idx, MultiDimFunctionGraph< double > *> __actionsRMaxTable
static AdaptiveRMaxPlaner * TreeInstance(const ILearningStrategy *learner, double discountFactor=0.9, double epsilon=0.00001, bool verbose=true)
Class for assigning/browsing values to tuples of discrete variables.
Definition: instantiation.h:80
<agrum/FMDP/planning/adaptiveRMaxPlaner.h>
void initialize(const FMDP< double > *fmdp)
Initializes data structure needed for making the planning.
INLINE const FMDP< double > * fmdp()
Returns a const ptr on the Factored Markov Decision Process on which we&#39;re planning.
virtual void _initVFunction()
Performs a single step of value iteration.
Size Idx
Type for indexes.
Definition: types.h:50
void checkState(const Instantiation &newState, Idx actionId)
Headers of the FMDPLearner class.
Size NodeId
Type for node ids.
Definition: graphElements.h:97
virtual void _evalPolicy()
Perform the required tasks to extract an optimal policy.