aGrUM  0.20.2
a C++ library for (probabilistic) graphical models
adaptiveRMaxPlaner.h
Go to the documentation of this file.
1 /**
2  *
3  * Copyright 2005-2020 Pierre-Henri WUILLEMIN(@LIP6) & Christophe GONZALES(@AMU)
4  * info_at_agrum_dot_org
5  *
6  * This library is free software: you can redistribute it and/or modify
7  * it under the terms of the GNU Lesser General Public License as published by
8  * the Free Software Foundation, either version 3 of the License, or
9  * (at your option) any later version.
10  *
11  * This library is distributed in the hope that it will be useful,
12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14  * GNU Lesser General Public License for more details.
15  *
16  * You should have received a copy of the GNU Lesser General Public License
17  * along with this library. If not, see <http://www.gnu.org/licenses/>.
18  *
19  */
20 
21 
22 /**
23  * @file
24  * @brief Headers of the RMax planer class.
25  *
26  * @author Pierre-Henri WUILLEMIN(@LIP6) and Jean-Christophe MAGNAN and Christophe
27  * GONZALES(@AMU)
28  */
29 
30 // =========================================================================
31 #ifndef GUM_ADAPTIVE_RMAX_PLANER_H
32 #define GUM_ADAPTIVE_RMAX_PLANER_H
33 // =========================================================================
34 #include <agrum/FMDP/SDyna/Strategies/IDecisionStrategy.h>
35 #include <agrum/FMDP/fmdp.h>
36 #include <agrum/FMDP/learning/fmdpLearner.h>
37 #include <agrum/FMDP/planning/structuredPlaner.h>
38 #include <agrum/FMDP/simulation/statesCounter.h>
39 // =========================================================================
40 
41 namespace gum {
42 
43  /**
44  * @class AdaptiveRMaxPlaner adaptiveRMaxPlaner.h
45  * <agrum/FMDP/planning/adaptiveRMaxPlaner.h>
46  * @brief A class to find optimal policy for a given FMDP.
47  * @ingroup fmdp_group
48  *
49  * Perform a RMax planning on given in parameter factored markov decision
50  * process
51  *
52  */
54  public StructuredPlaner< double >,
55  public IDecisionStrategy {
56  // ###################################################################
57  /// @name
58  // ###################################################################
59  /// @{
60  public:
61  // ==========================================================================
62  ///
63  // ==========================================================================
64  static AdaptiveRMaxPlaner*
65  ReducedAndOrderedInstance(const ILearningStrategy* learner,
66  double discountFactor = 0.9,
67  double epsilon = 0.00001,
68  bool verbose = true) {
69  return new AdaptiveRMaxPlaner(new MDDOperatorStrategy< double >(),
70  discountFactor,
71  epsilon,
72  learner,
73  verbose);
74  }
75 
76  // ==========================================================================
77  ///
78  // ==========================================================================
79  static AdaptiveRMaxPlaner* TreeInstance(const ILearningStrategy* learner,
80  double discountFactor = 0.9,
81  double epsilon = 0.00001,
82  bool verbose = true) {
83  return new AdaptiveRMaxPlaner(new TreeOperatorStrategy< double >(),
84  discountFactor,
85  epsilon,
86  learner,
87  verbose);
88  }
89 
90  /// @}
91 
92  // ###################################################################
93  /// @name Constructor & destructor.
94  // ###################################################################
95  /// @{
96  private:
97  // ==========================================================================
98  /// Default constructor
99  // ==========================================================================
100  AdaptiveRMaxPlaner(IOperatorStrategy< double >* opi,
101  double discountFactor,
102  double epsilon,
103  const ILearningStrategy* learner,
104  bool verbose);
105 
106  // ==========================================================================
107  /// Default destructor
108  // ==========================================================================
109  public:
111 
112  /// @}
113 
114 
115  // ###################################################################
116  /// @name Planning Methods
117  // ###################################################################
118  /// @{
119 
120  public:
121  // ==========================================================================
122  /**
123  * Initializes data structure needed for making the planning
124  * @warning No calling this methods before starting the first makePlaninng
125  * will surely and definitely result in a crash
126  */
127  // ==========================================================================
128  void initialize(const FMDP< double >* fmdp);
129 
130 
131  // ==========================================================================
132  /**
133  * Performs a value iteration
134  *
135  * @param nbStep : enables you to specify how many value iterations you wish
136  * to do.
137  * makePlanning will then stop whether when optimal value function is reach
138  * or when nbStep have been performed
139  */
140  // ==========================================================================
141  void makePlanning(Idx nbStep = 1000000);
142 
143  /// @}
144 
145 
146  // ###################################################################
147  /// @name Value Iteration Methods
148  // ###################################################################
149  /// @{
150 
151  protected:
152  // ==========================================================================
153  ///
154  // ==========================================================================
155  virtual void initVFunction_();
156 
157  // ==========================================================================
158  /// Performs a single step of value iteration
159  // ==========================================================================
160  virtual MultiDimFunctionGraph< double >* valueIteration_();
161 
162  /// @}
163 
164 
165  // ###################################################################
166  /// @name Optimal policy extraction methods
167  // ###################################################################
168  /// @{
169 
170  protected:
171  // ==========================================================================
172  /// Perform the required tasks to extract an optimal policy
173  // ==========================================================================
174  virtual void evalPolicy_();
175 
176  /// @}
177 
178  private:
180 
183  MultiDimFunctionGraph< double >*,
184  MultiDimFunctionGraph< double >*);
185  void clearTables__();
186 
187  private:
191 
192  double rThreshold__;
193  double rmax__;
194 
195 
196  // ###################################################################
197  /// @name Incremental methods
198  // ###################################################################
199  /// @{
200  public:
201  void checkState(const Instantiation& newState, Idx actionId) {
202  if (!initializedTable__[actionId]) {
203  counterTable__[actionId]->reset(newState);
204  initializedTable__[actionId] = true;
205  } else
206  counterTable__[actionId]->incState(newState);
207  }
208 
209  private:
212 
214  /// @}
215  };
216 
217 } /* namespace gum */
218 
219 #endif // GUM_ADAPTIVE_RMAX_PLANER_H
void makePlanning(Idx nbStep=1000000)
Performs a value iteration.
~AdaptiveRMaxPlaner()
Default destructor.
INLINE void emplace(Args &&... args)
Definition: set_tpl.h:669
static AdaptiveRMaxPlaner * ReducedAndOrderedInstance(const ILearningStrategy *learner, double discountFactor=0.9, double epsilon=0.00001, bool verbose=true)
virtual void initVFunction_()
Performs a single step of value iteration.
AdaptiveRMaxPlaner(IOperatorStrategy< double > *opi, double discountFactor, double epsilon, const ILearningStrategy *learner, bool verbose)
Default constructor.
std::pair< NodeId, NodeId > visitLearner__(const IVisitableGraphLearner *, NodeId currentNodeId, MultiDimFunctionGraph< double > *, MultiDimFunctionGraph< double > *)
const ILearningStrategy * fmdpLearner__
HashTable< Idx, MultiDimFunctionGraph< double > *> actionsBoolTable__
static AdaptiveRMaxPlaner * TreeInstance(const ILearningStrategy *learner, double discountFactor=0.9, double epsilon=0.00001, bool verbose=true)
virtual MultiDimFunctionGraph< double > * valueIteration_()
Performs a single step of value iteration.
virtual void evalPolicy_()
Perform the required tasks to extract an optimal policy.
HashTable< Idx, StatesCounter *> counterTable__
HashTable< Idx, bool > initializedTable__
void initialize(const FMDP< double > *fmdp)
Initializes data structure needed for making the planning.
void checkState(const Instantiation &newState, Idx actionId)