31 #ifndef GUM_STRUCTURED_PLANNING_H 32 #define GUM_STRUCTURED_PLANNING_H 69 template <
typename GUM_SCALAR >
81 GUM_SCALAR epsilon = 0.00001,
82 bool verbose =
true) {
95 GUM_SCALAR epsilon = 0.00001,
96 bool verbose =
true) {
115 GUM_SCALAR discountFactor,
388 #endif // GUM_STRUCTURED_PLANNING_H static StructuredPlaner< GUM_SCALAR > * sviInstance(GUM_SCALAR discountFactor=0.9, GUM_SCALAR epsilon=0.00001, bool verbose=true)
Copyright 2005-2019 Pierre-Henri WUILLEMIN et Christophe GONZALES (LIP6) {prenom.nom}_at_lip6.fr.
<agrum/FMDP/planning/structuredPlaner.h>
virtual ~StructuredPlaner()
Default destructor.
Copyright 2005-2019 Pierre-Henri WUILLEMIN et Christophe GONZALES (LIP6) {prenom.nom}_at_lip6.fr.
A class to store the optimal actions.
virtual Size realSize() const
Returns the real number of parameters used for this table.
virtual MultiDimFunctionGraph< ArgMaxSet< GUM_SCALAR, Idx >, SetTerminalNodePolicy > * _argmaximiseQactions(std::vector< MultiDimFunctionGraph< ArgMaxSet< GUM_SCALAR, Idx >, SetTerminalNodePolicy > * > &)
Performs argmax_a Q(s,a)
Copyright 2005-2019 Pierre-Henri WUILLEMIN et Christophe GONZALES (LIP6) {prenom.nom}_at_lip6.fr.
Copyright 2005-2019 Pierre-Henri WUILLEMIN et Christophe GONZALES (LIP6) {prenom.nom}_at_lip6.fr.
NodeId __recurExtractOptPol(NodeId, const MultiDimFunctionGraph< ArgMaxSet< GUM_SCALAR, Idx >, SetTerminalNodePolicy > *, HashTable< NodeId, NodeId > &)
Recursion part for the createArgMaxCopy.
GUM_SCALAR _discountFactor
Discount Factor used for infinite horizon planning.
virtual void _evalPolicy()
Perform the required tasks to extract an optimal policy.
IOperatorStrategy< GUM_SCALAR > * _operator
<agrum/FMDP/SDyna/IOperatorStrategy.h>
bool _verbose
Boolean used to indcates whether or not iteration informations should be displayed on terminal...
virtual MultiDimFunctionGraph< GUM_SCALAR > * _valueIteration()
Performs a single step of value iteration.
void _extractOptimalPolicy(const MultiDimFunctionGraph< ArgMaxSet< GUM_SCALAR, Idx >, SetTerminalNodePolicy > *optimalValueFunction)
From V(s)* = argmax_a Q*(s,a), this function extract pi*(s) This function mainly consists in extracti...
This class is used to implement factored decision process.
<agrum/FMDP/planning/mddOperatorStrategy.h>
void __transferActionIds(const ArgMaxSet< GUM_SCALAR, Idx > &, ActionSet &)
Extract from an ArgMaxSet the associated ActionSet.
Copyright 2005-2019 Pierre-Henri WUILLEMIN et Christophe GONZALES (LIP6) {prenom.nom}_at_lip6.fr.
const FMDP< GUM_SCALAR > * _fmdp
The Factored Markov Decision Process describing our planning situation (NB : this one must have funct...
Copyright 2005-2019 Pierre-Henri WUILLEMIN et Christophe GONZALES (LIP6) {prenom.nom}_at_lip6.fr.
Class to handle efficiently argMaxSet.
Copyright 2005-2019 Pierre-Henri WUILLEMIN et Christophe GONZALES (LIP6) {prenom.nom}_at_lip6.fr.
Representation of a setA Set is a structure that contains arbitrary elements.
Copyright 2005-2019 Pierre-Henri WUILLEMIN et Christophe GONZALES (LIP6) {prenom.nom}_at_lip6.fr.
StructuredPlaner(IOperatorStrategy< GUM_SCALAR > *opi, GUM_SCALAR discountFactor, GUM_SCALAR epsilon, bool verbose)
Default constructor.
<agrum/FMDP/planning/treeOperatorStrategy.h>
virtual void _initVFunction()
Performs a single step of value iteration.
virtual MultiDimFunctionGraph< GUM_SCALAR > * _evalQaction(const MultiDimFunctionGraph< GUM_SCALAR > *, Idx)
Performs the P(s'|s,a).V^{t-1}(s') part of the value itération.
GUM_SCALAR __threshold
The threshold value Whenever | V^{n} - V^{n+1} | < threshold, we consider that V ~ V*...
Set< const DiscreteVariable *> _elVarSeq
A Set to eleminate primed variables.
MultiDimFunctionGraph< ActionSet, SetTerminalNodePolicy > * _optimalPolicy
The associated optimal policy.
virtual Size vFunctionSize()
Returns vFunction computed so far current size.
std::string optimalPolicy2String()
Provide a better toDot for the optimal policy where the leaves have the action name instead of its id...
MultiDimFunctionGraph< ArgMaxSet< GUM_SCALAR, Idx >, SetTerminalNodePolicy > * _makeArgMax(const MultiDimFunctionGraph< GUM_SCALAR > *Qaction, Idx actionId)
Creates a copy of given Qaction that can be exploit by a Argmax.
Copyright 2005-2019 Pierre-Henri WUILLEMIN et Christophe GONZALES (LIP6) {prenom.nom}_at_lip6.fr.
virtual void initialize(const FMDP< GUM_SCALAR > *fmdp)
Initializes data structure needed for making the planning.
Implementation of a Terminal Node Policy that maps nodeid to a set of value.
Copyright 2005-2019 Pierre-Henri WUILLEMIN et Christophe GONZALES (LIP6) {prenom.nom}_at_lip6.fr.
virtual Size optimalPolicySize()
Returns optimalPolicy computed so far current size.
NodeId __recurArgMaxCopy(NodeId, Idx, const MultiDimFunctionGraph< GUM_SCALAR > *, MultiDimFunctionGraph< ArgMaxSet< GUM_SCALAR, Idx >, SetTerminalNodePolicy > *, HashTable< NodeId, NodeId > &)
Recursion part for the createArgMaxCopy.
Copyright 2005-2019 Pierre-Henri WUILLEMIN et Christophe GONZALES (LIP6) {prenom.nom}_at_lip6.fr.
INLINE const FMDP< GUM_SCALAR > * fmdp()
Returns a const ptr on the Factored Markov Decision Process on which we're planning.
virtual MultiDimFunctionGraph< GUM_SCALAR > * _addReward(MultiDimFunctionGraph< GUM_SCALAR > *function, Idx actionId=0)
Perform the R(s) + gamma . function.
Copyright 2005-2019 Pierre-Henri WUILLEMIN et Christophe GONZALES (LIP6) {prenom.nom}_at_lip6.fr.
Copyright 2005-2019 Pierre-Henri WUILLEMIN et Christophe GONZALES (LIP6) {prenom.nom}_at_lip6.fr.
Copyright 2005-2019 Pierre-Henri WUILLEMIN et Christophe GONZALES (LIP6) {prenom.nom}_at_lip6.fr.
Size Idx
Type for indexes.
<agrum/FMDP/SDyna/IPlanningStrategy.h>
std::size_t Size
In aGrUM, hashed values are unsigned long int.
virtual MultiDimFunctionGraph< GUM_SCALAR > * _minimiseFunctions(std::vector< MultiDimFunctionGraph< GUM_SCALAR > * > &)
Performs min_i F_i.
Size NodeId
Type for node ids.
static StructuredPlaner< GUM_SCALAR > * spumddInstance(GUM_SCALAR discountFactor=0.9, GUM_SCALAR epsilon=0.00001, bool verbose=true)
MultiDimFunctionGraph< GUM_SCALAR > * _vFunction
The Value Function computed iteratively.
virtual MultiDimFunctionGraph< GUM_SCALAR > * _maximiseQactions(std::vector< MultiDimFunctionGraph< GUM_SCALAR > * > &)
Performs max_a Q(s,a)
INLINE const MultiDimFunctionGraph< GUM_SCALAR > * vFunction()
Returns a const ptr on the value function computed so far.
INLINE const MultiDimFunctionGraph< ActionSet, SetTerminalNodePolicy > * optimalPolicy()
Returns the best policy obtained so far.
virtual void makePlanning(Idx nbStep=1000000)
Performs a value iteration.