aGrUM  0.20.3
a C++ library for (probabilistic) graphical models
adaptiveRMaxPlaner.h
Go to the documentation of this file.
1 /**
2  *
3  * Copyright (c) 2005-2021 by Pierre-Henri WUILLEMIN(@LIP6) & Christophe GONZALES(@AMU)
4  * info_at_agrum_dot_org
5  *
6  * This library is free software: you can redistribute it and/or modify
7  * it under the terms of the GNU Lesser General Public License as published by
8  * the Free Software Foundation, either version 3 of the License, or
9  * (at your option) any later version.
10  *
11  * This library is distributed in the hope that it will be useful,
12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14  * GNU Lesser General Public License for more details.
15  *
16  * You should have received a copy of the GNU Lesser General Public License
17  * along with this library. If not, see <http://www.gnu.org/licenses/>.
18  *
19  */
20 
21 
22 /**
23  * @file
24  * @brief Headers of the RMax planer class.
25  *
26  * @author Pierre-Henri WUILLEMIN(@LIP6) and Jean-Christophe MAGNAN and Christophe
27  * GONZALES(@AMU)
28  */
29 
30 // =========================================================================
31 #ifndef GUM_ADAPTIVE_RMAX_PLANER_H
32 #define GUM_ADAPTIVE_RMAX_PLANER_H
33 // =========================================================================
34 #include <agrum/FMDP/SDyna/Strategies/IDecisionStrategy.h>
35 #include <agrum/FMDP/fmdp.h>
36 #include <agrum/FMDP/learning/fmdpLearner.h>
37 #include <agrum/FMDP/planning/structuredPlaner.h>
38 #include <agrum/FMDP/simulation/statesCounter.h>
39 // =========================================================================
40 
41 namespace gum {
42 
43  /**
44  * @class AdaptiveRMaxPlaner adaptiveRMaxPlaner.h
45  * <agrum/FMDP/planning/adaptiveRMaxPlaner.h>
46  * @brief A class to find optimal policy for a given FMDP.
47  * @ingroup fmdp_group
48  *
49  * Perform a RMax planning on given in parameter factored markov decision
50  * process
51  *
52  */
53  class AdaptiveRMaxPlaner: public StructuredPlaner< double >, public IDecisionStrategy {
54  // ###################################################################
55  /// @name
56  // ###################################################################
57  /// @{
58  public:
59  // ==========================================================================
60  ///
61  // ==========================================================================
62  static AdaptiveRMaxPlaner* ReducedAndOrderedInstance(const ILearningStrategy* learner,
63  double discountFactor = 0.9,
64  double epsilon = 0.00001,
65  bool verbose = true) {
66  return new AdaptiveRMaxPlaner(new MDDOperatorStrategy< double >(),
67  discountFactor,
68  epsilon,
69  learner,
70  verbose);
71  }
72 
73  // ==========================================================================
74  ///
75  // ==========================================================================
76  static AdaptiveRMaxPlaner* TreeInstance(const ILearningStrategy* learner,
77  double discountFactor = 0.9,
78  double epsilon = 0.00001,
79  bool verbose = true) {
80  return new AdaptiveRMaxPlaner(new TreeOperatorStrategy< double >(),
81  discountFactor,
82  epsilon,
83  learner,
84  verbose);
85  }
86 
87  /// @}
88 
89  // ###################################################################
90  /// @name Constructor & destructor.
91  // ###################################################################
92  /// @{
93  private:
94  // ==========================================================================
95  /// Default constructor
96  // ==========================================================================
97  AdaptiveRMaxPlaner(IOperatorStrategy< double >* opi,
98  double discountFactor,
99  double epsilon,
100  const ILearningStrategy* learner,
101  bool verbose);
102 
103  // ==========================================================================
104  /// Default destructor
105  // ==========================================================================
106  public:
108 
109  /// @}
110 
111 
112  // ###################################################################
113  /// @name Planning Methods
114  // ###################################################################
115  /// @{
116 
117  public:
118  // ==========================================================================
119  /**
120  * Initializes data structure needed for making the planning
121  * @warning No calling this methods before starting the first makePlaninng
122  * will surely and definitely result in a crash
123  */
124  // ==========================================================================
125  void initialize(const FMDP< double >* fmdp);
126 
127 
128  // ==========================================================================
129  /**
130  * Performs a value iteration
131  *
132  * @param nbStep : enables you to specify how many value iterations you wish
133  * to do.
134  * makePlanning will then stop whether when optimal value function is reach
135  * or when nbStep have been performed
136  */
137  // ==========================================================================
138  void makePlanning(Idx nbStep = 1000000);
139 
140  /// @}
141 
142 
143  // ###################################################################
144  /// @name Value Iteration Methods
145  // ###################################################################
146  /// @{
147 
148  protected:
149  // ==========================================================================
150  ///
151  // ==========================================================================
152  virtual void initVFunction_();
153 
154  // ==========================================================================
155  /// Performs a single step of value iteration
156  // ==========================================================================
157  virtual MultiDimFunctionGraph< double >* valueIteration_();
158 
159  /// @}
160 
161 
162  // ###################################################################
163  /// @name Optimal policy extraction methods
164  // ###################################################################
165  /// @{
166 
167  protected:
168  // ==========================================================================
169  /// Perform the required tasks to extract an optimal policy
170  // ==========================================================================
171  virtual void evalPolicy_();
172 
173  /// @}
174 
175  private:
177 
180  MultiDimFunctionGraph< double >*,
181  MultiDimFunctionGraph< double >*);
182  void _clearTables_();
183 
184  private:
188 
189  double _rThreshold_;
190  double _rmax_;
191 
192 
193  // ###################################################################
194  /// @name Incremental methods
195  // ###################################################################
196  /// @{
197  public:
198  void checkState(const Instantiation& newState, Idx actionId) {
199  if (!_initializedTable_[actionId]) {
200  _counterTable_[actionId]->reset(newState);
201  _initializedTable_[actionId] = true;
202  } else
203  _counterTable_[actionId]->incState(newState);
204  }
205 
206  private:
209 
211  /// @}
212  };
213 
214 } /* namespace gum */
215 
216 #endif // GUM_ADAPTIVE_RMAX_PLANER_H
void makePlanning(Idx nbStep=1000000)
Performs a value iteration.
~AdaptiveRMaxPlaner()
Default destructor.
INLINE void emplace(Args &&... args)
Definition: set_tpl.h:643
static AdaptiveRMaxPlaner * ReducedAndOrderedInstance(const ILearningStrategy *learner, double discountFactor=0.9, double epsilon=0.00001, bool verbose=true)
virtual void initVFunction_()
Performs a single step of value iteration.
AdaptiveRMaxPlaner(IOperatorStrategy< double > *opi, double discountFactor, double epsilon, const ILearningStrategy *learner, bool verbose)
Default constructor.
HashTable< Idx, StatesCounter *> _counterTable_
const ILearningStrategy * _fmdpLearner_
std::pair< NodeId, NodeId > _visitLearner_(const IVisitableGraphLearner *, NodeId currentNodeId, MultiDimFunctionGraph< double > *, MultiDimFunctionGraph< double > *)
static AdaptiveRMaxPlaner * TreeInstance(const ILearningStrategy *learner, double discountFactor=0.9, double epsilon=0.00001, bool verbose=true)
virtual MultiDimFunctionGraph< double > * valueIteration_()
Performs a single step of value iteration.
virtual void evalPolicy_()
Perform the required tasks to extract an optimal policy.
HashTable< Idx, bool > _initializedTable_
void initialize(const FMDP< double > *fmdp)
Initializes data structure needed for making the planning.
void checkState(const Instantiation &newState, Idx actionId)
HashTable< Idx, MultiDimFunctionGraph< double > *> _actionsBoolTable_