31 #ifndef GUM_ADAPTIVE_RMAX_PLANER_H 32 #define GUM_ADAPTIVE_RMAX_PLANER_H 34 #include <agrum/FMDP/SDyna/Strategies/IDecisionStrategy.h> 35 #include <agrum/FMDP/fmdp.h> 36 #include <agrum/FMDP/learning/fmdpLearner.h> 37 #include <agrum/FMDP/planning/structuredPlaner.h> 38 #include <agrum/FMDP/simulation/statesCounter.h> 63 double discountFactor = 0.9,
64 double epsilon = 0.00001,
65 bool verbose =
true) {
66 return new AdaptiveRMaxPlaner(
new MDDOperatorStrategy<
double >(),
77 double discountFactor = 0.9,
78 double epsilon = 0.00001,
79 bool verbose =
true) {
80 return new AdaptiveRMaxPlaner(
new TreeOperatorStrategy<
double >(),
98 double discountFactor,
100 const ILearningStrategy* learner,
198 void checkState(
const Instantiation& newState, Idx actionId) {
199 if (!_initializedTable_[actionId]) {
200 _counterTable_[actionId]->reset(newState);
201 _initializedTable_[actionId] =
true;
203 _counterTable_[actionId]->incState(newState);
void makePlanning(Idx nbStep=1000000)
Performs a value iteration.
~AdaptiveRMaxPlaner()
Default destructor.
INLINE void emplace(Args &&... args)
static AdaptiveRMaxPlaner * ReducedAndOrderedInstance(const ILearningStrategy *learner, double discountFactor=0.9, double epsilon=0.00001, bool verbose=true)
virtual void initVFunction_()
Performs a single step of value iteration.
AdaptiveRMaxPlaner(IOperatorStrategy< double > *opi, double discountFactor, double epsilon, const ILearningStrategy *learner, bool verbose)
Default constructor.
HashTable< Idx, StatesCounter *> _counterTable_
const ILearningStrategy * _fmdpLearner_
std::pair< NodeId, NodeId > _visitLearner_(const IVisitableGraphLearner *, NodeId currentNodeId, MultiDimFunctionGraph< double > *, MultiDimFunctionGraph< double > *)
static AdaptiveRMaxPlaner * TreeInstance(const ILearningStrategy *learner, double discountFactor=0.9, double epsilon=0.00001, bool verbose=true)
virtual MultiDimFunctionGraph< double > * valueIteration_()
Performs a single step of value iteration.
virtual void evalPolicy_()
Perform the required tasks to extract an optimal policy.
HashTable< Idx, bool > _initializedTable_
void initialize(const FMDP< double > *fmdp)
Initializes data structure needed for making the planning.
void _makeRMaxFunctionGraphs_()
void checkState(const Instantiation &newState, Idx actionId)
HashTable< Idx, MultiDimFunctionGraph< double > *> _actionsBoolTable_