48 #define RECASTED(x) reinterpret_cast< const MultiDimFunctionGraph< double >* >(x) 64 double discountFactor,
155 std::vector< MultiDimFunctionGraph< double >* > qActionsSet;
171 qActionsSet.push_back(qAction);
212 qAction = this->
_addReward(qAction, *actionIter);
219 argMaxQActionsSet.push_back(
_makeArgMax(qAction, *actionIter));
246 std::vector< MultiDimFunctionGraph< double >* > rmaxs;
247 std::vector< MultiDimFunctionGraph< double >* > boolQs;
262 std::pair< NodeId, NodeId > rooty =
271 rmaxs.push_back(varRMax);
272 boolQs.push_back(varBoolQ);
308 std::pair< NodeId, NodeId >
313 std::pair< NodeId, NodeId > rep;
330 visited, visited->
nodeSon(currentNodeId, moda), rmax, boolQ);
331 rmaxsons[moda] = sonp.first;
332 bqsons[moda] = sonp.second;
346 for (
auto actionIter = this->
fmdp()->beginActions();
void makePlanning(Idx nbStep=1000000)
Performs a value iteration.
HashTable< Idx, StatesCounter *> __counterTable
Copyright 2005-2019 Pierre-Henri WUILLEMIN et Christophe GONZALES (LIP6) {prenom.nom}_at_lip6.fr.
HashTable< Idx, MultiDimFunctionGraph< double > *> __actionsBoolTable
<agrum/FMDP/planning/structuredPlaner.h>
~AdaptiveRMaxPlaner()
Default destructor.
virtual void initialize(const FMDP< double > *fmdp)
Initializes the learner.
virtual MultiDimFunctionGraph< double > * _valueIteration()
Performs a single step of value iteration.
void clean()
Removes var without nodes in the diagram.
Copyright 2005-2019 Pierre-Henri WUILLEMIN et Christophe GONZALES (LIP6) {prenom.nom}_at_lip6.fr.
void setRootNode(const NodeId &root)
Sets root node of decision diagram.
<agrum/FMDP/SDyna/IDecisionStrategy.h>
<agrum/FMDP/simulation/statesCounter.h>
SequenceIteratorSafe< Idx > beginActions() const
Returns an iterator reference to he beginning of the list of actions.
virtual MultiDimFunctionGraph< ArgMaxSet< double, Idx >, SetTerminalNodePolicy > * _argmaximiseQactions(std::vector< MultiDimFunctionGraph< ArgMaxSet< double, Idx >, SetTerminalNodePolicy > *> &)
Performs argmax_a Q(s,a)
void copyAndReassign(const MultiDimFunctionGraph< GUM_SCALAR, TerminalNodePolicy > &src, const Bijection< const DiscreteVariable *, const DiscreteVariable * > &reassign)
Copies src diagrams structure into this diagrams.
double _discountFactor
Discount Factor used for infinite horizon planning.
IOperatorStrategy< double > * _operator
std::pair< NodeId, NodeId > __visitLearner(const IVisitableGraphLearner *, NodeId currentNodeId, MultiDimFunctionGraph< double > *, MultiDimFunctionGraph< double > *)
virtual MultiDimFunctionGraph< GUM_SCALAR > * maximize(const MultiDimFunctionGraph< GUM_SCALAR > *f1, const MultiDimFunctionGraph< GUM_SCALAR > *f2, Idx del=3)=0
SequenceIteratorSafe< const DiscreteVariable *> beginVariables() const
Returns an iterator reference to he beginning of the list of variables.
INLINE const Bijection< const DiscreteVariable *, const DiscreteVariable *> & mapMainPrime() const
Returns the map binding main variables and prime variables.
<agrum/FMDP/SDyna/IVisitableGraphLearner.h>
const ILearningStrategy * __fmdpLearner
AdaptiveRMaxPlaner(IOperatorStrategy< double > *opi, double discountFactor, double epsilon, const ILearningStrategy *learner, bool verbose)
Default constructor.
NodeId addInternalNode(const DiscreteVariable *var)
Inserts a new non terminal node in graph.
void _extractOptimalPolicy(const MultiDimFunctionGraph< ArgMaxSet< double, Idx >, SetTerminalNodePolicy > *optimalValueFunction)
From V(s)* = argmax_a Q*(s,a), this function extract pi*(s) This function mainly consists in extracti...
Safe Iterators for hashtables.
Copyright 2005-2019 Pierre-Henri WUILLEMIN et Christophe GONZALES (LIP6) {prenom.nom}_at_lip6.fr.
const FMDP< double > * _fmdp
The Factored Markov Decision Process describing our planning situation (NB : this one must have funct...
Copyright 2005-2019 Pierre-Henri WUILLEMIN et Christophe GONZALES (LIP6) {prenom.nom}_at_lip6.fr.
virtual NodeId root() const =0
virtual MultiDimFunctionGraph< GUM_SCALAR > * add(const MultiDimFunctionGraph< GUM_SCALAR > *f1, const MultiDimFunctionGraph< GUM_SCALAR > *f2, Idx del=1)=0
HashTable< Idx, bool > __initializedTable
virtual Size domainSize() const =0
virtual MultiDimFunctionGraph< GUM_SCALAR > * multiply(const MultiDimFunctionGraph< GUM_SCALAR > *f1, const MultiDimFunctionGraph< GUM_SCALAR > *f2, Idx del=3)=0
Copyright 2005-2019 Pierre-Henri WUILLEMIN et Christophe GONZALES (LIP6) {prenom.nom}_at_lip6.fr.
Copyright 2005-2019 Pierre-Henri WUILLEMIN et Christophe GONZALES (LIP6) {prenom.nom}_at_lip6.fr.
virtual const DiscreteVariable * nodeVar(NodeId ni) const =0
<agrum/FMDP/SDyna/ILearningStrategy.h>
void __makeRMaxFunctionGraphs()
HashTable< Idx, MultiDimFunctionGraph< double > *> __actionsRMaxTable
virtual bool isTerminal(NodeId ni) const =0
virtual MultiDimFunctionGraph< double > * _evalQaction(const MultiDimFunctionGraph< double > *, Idx)
Performs the P(s'|s,a).V^{t-1}(s') part of the value itération.
virtual void insertSetOfVars(MultiDimFunctionGraph< double > *) const =0
const MultiDimImplementation< GUM_SCALAR > * reward(Idx actionId=0) const
Returns the reward table of mdp.
Copyright 2005-2019 Pierre-Henri WUILLEMIN et Christophe GONZALES (LIP6) {prenom.nom}_at_lip6.fr.
MultiDimFunctionGraph< ArgMaxSet< double, Idx >, SetTerminalNodePolicy > * _makeArgMax(const MultiDimFunctionGraph< double > *Qaction, Idx actionId)
Creates a copy of given Qaction that can be exploit by a Argmax.
virtual double modaMax() const =0
learnerSize
Copyright 2005-2019 Pierre-Henri WUILLEMIN et Christophe GONZALES (LIP6) {prenom.nom}_at_lip6.fr.
virtual void initialize(const FMDP< GUM_SCALAR > *fmdp)
Initializes data structure needed for making the planning.
Implementation of a Terminal Node Policy that maps nodeid to a set of value.
virtual NodeId nodeSon(NodeId ni, Idx modality) const =0
<agrum/FMDP/planning/adaptiveRMaxPlaner.h>
void initialize(const FMDP< double > *fmdp)
Initializes data structure needed for making the planning.
virtual double rMax() const =0
learnerSize
INLINE const FMDP< double > * fmdp()
Returns a const ptr on the Factored Markov Decision Process on which we're planning.
virtual MultiDimFunctionGraph< double > * _addReward(MultiDimFunctionGraph< double > *function, Idx actionId=0)
Perform the R(s) + gamma . function.
virtual void _initVFunction()
Performs a single step of value iteration.
SequenceIteratorSafe< Idx > endActions() const
Returns an iterator reference to the end of the list of actions.
NodeId addTerminalNode(const GUM_SCALAR &value)
Adds a value to the MultiDimFunctionGraph.
Size Idx
Type for indexes.
MultiDimFunctionGraphManager< GUM_SCALAR, TerminalNodePolicy > * manager()
Returns a const reference to the manager of this diagram.
SequenceIteratorSafe< const DiscreteVariable *> endVariables() const
Returns an iterator reference to the end of the list of variables.
#define RECASTED(x)
For shorter line and hence more comprehensive code purposes only.
value_type & insert(const Key &key, const Val &val)
Adds a new element (actually a copy of this element) into the hash table.
virtual Idx nodeNbObservation(NodeId ni) const =0
virtual void reduce()=0
Ensures that every isomorphic subgraphs are merged together.
virtual MultiDimFunctionGraph< double > * _minimiseFunctions(std::vector< MultiDimFunctionGraph< double > *> &)
Performs min_i F_i.
Size NodeId
Type for node ids.
MultiDimFunctionGraph< double > * _vFunction
The Value Function computed iteratively.
virtual void _evalPolicy()
Perform the required tasks to extract an optimal policy.
virtual MultiDimFunctionGraph< double > * _maximiseQactions(std::vector< MultiDimFunctionGraph< double > *> &)
Performs max_a Q(s,a)
virtual MultiDimFunctionGraph< GUM_SCALAR, ExactTerminalNodePolicy > * getFunctionInstance()=0
virtual void makePlanning(Idx nbStep=1000000)
Performs a value iteration.