28 #ifndef GUM_STRUCTURED_PLANNING_H 29 #define GUM_STRUCTURED_PLANNING_H 66 template <
typename GUM_SCALAR >
78 GUM_SCALAR epsilon = 0.00001,
79 bool verbose =
true) {
92 GUM_SCALAR epsilon = 0.00001,
93 bool verbose =
true) {
112 GUM_SCALAR discountFactor,
385 #endif // GUM_STRUCTURED_PLANNING_H static StructuredPlaner< GUM_SCALAR > * sviInstance(GUM_SCALAR discountFactor=0.9, GUM_SCALAR epsilon=0.00001, bool verbose=true)
Headers of the ITerminalNodePolicy.
<agrum/FMDP/planning/structuredPlaner.h>
virtual ~StructuredPlaner()
Default destructor.
Headers of gum::SmallObjectAllocator.
A class to store the optimal actions.
virtual Size realSize() const
Returns the real number of parameters used for this table.
virtual MultiDimFunctionGraph< ArgMaxSet< GUM_SCALAR, Idx >, SetTerminalNodePolicy > * _argmaximiseQactions(std::vector< MultiDimFunctionGraph< ArgMaxSet< GUM_SCALAR, Idx >, SetTerminalNodePolicy > * > &)
Performs argmax_a Q(s,a)
Headers of the Operator Strategy interface.
aGrUM's inline/outline selection
NodeId __recurExtractOptPol(NodeId, const MultiDimFunctionGraph< ArgMaxSet< GUM_SCALAR, Idx >, SetTerminalNodePolicy > *, HashTable< NodeId, NodeId > &)
Recursion part for the createArgMaxCopy.
GUM_SCALAR _discountFactor
Discount Factor used for infinite horizon planning.
virtual void _evalPolicy()
Perform the required tasks to extract an optimal policy.
IOperatorStrategy< GUM_SCALAR > * _operator
<agrum/FMDP/SDyna/IOperatorStrategy.h>
bool _verbose
Boolean used to indcates whether or not iteration informations should be displayed on terminal...
virtual MultiDimFunctionGraph< GUM_SCALAR > * _valueIteration()
Performs a single step of value iteration.
void _extractOptimalPolicy(const MultiDimFunctionGraph< ArgMaxSet< GUM_SCALAR, Idx >, SetTerminalNodePolicy > *optimalValueFunction)
From V(s)* = argmax_a Q*(s,a), this function extract pi*(s) This function mainly consists in extracti...
This class is used to implement factored decision process.
<agrum/FMDP/planning/mddOperatorStrategy.h>
void __transferActionIds(const ArgMaxSet< GUM_SCALAR, Idx > &, ActionSet &)
Extract from an ArgMaxSet the associated ActionSet.
gum is the global namespace for all aGrUM entities
const FMDP< GUM_SCALAR > * _fmdp
The Factored Markov Decision Process describing our planning situation (NB : this one must have funct...
This files contains several function objects that are not (yet) defined in the STL.
Class to handle efficiently argMaxSet.
Headers of the MDDOperatorStrategy planer class.
Representation of a setA Set is a structure that contains arbitrary elements.
Class for implementation of factored markov decision process.
StructuredPlaner(IOperatorStrategy< GUM_SCALAR > *opi, GUM_SCALAR discountFactor, GUM_SCALAR epsilon, bool verbose)
Default constructor.
<agrum/FMDP/planning/treeOperatorStrategy.h>
virtual void _initVFunction()
Performs a single step of value iteration.
virtual MultiDimFunctionGraph< GUM_SCALAR > * _evalQaction(const MultiDimFunctionGraph< GUM_SCALAR > *, Idx)
Performs the P(s'|s,a).V^{t-1}(s') part of the value itération.
GUM_SCALAR __threshold
The threshold value Whenever | V^{n} - V^{n+1} | < threshold, we consider that V ~ V*...
Set< const DiscreteVariable *> _elVarSeq
A Set to eleminate primed variables.
MultiDimFunctionGraph< ActionSet, SetTerminalNodePolicy > * _optimalPolicy
The associated optimal policy.
virtual Size vFunctionSize()
Returns vFunction computed so far current size.
std::string optimalPolicy2String()
Provide a better toDot for the optimal policy where the leaves have the action name instead of its id...
MultiDimFunctionGraph< ArgMaxSet< GUM_SCALAR, Idx >, SetTerminalNodePolicy > * _makeArgMax(const MultiDimFunctionGraph< GUM_SCALAR > *Qaction, Idx actionId)
Creates a copy of given Qaction that can be exploit by a Argmax.
Headers of MultiDimFunctionGraph.
virtual void initialize(const FMDP< GUM_SCALAR > *fmdp)
Initializes data structure needed for making the planning.
Implementation of a Terminal Node Policy that maps nodeid to a set of value.
Headers of the Planning Strategy interface.
virtual Size optimalPolicySize()
Returns optimalPolicy computed so far current size.
NodeId __recurArgMaxCopy(NodeId, Idx, const MultiDimFunctionGraph< GUM_SCALAR > *, MultiDimFunctionGraph< ArgMaxSet< GUM_SCALAR, Idx >, SetTerminalNodePolicy > *, HashTable< NodeId, NodeId > &)
Recursion part for the createArgMaxCopy.
Template implementation of FMDP/planning/StructuredPlaner.h classes.
INLINE const FMDP< GUM_SCALAR > * fmdp()
Returns a const ptr on the Factored Markov Decision Process on which we're planning.
virtual MultiDimFunctionGraph< GUM_SCALAR > * _addReward(MultiDimFunctionGraph< GUM_SCALAR > *function, Idx actionId=0)
Perform the R(s) + gamma . function.
Headers of the TreeOperatorStrategy planer class.
Headers of the MDDOperatorStrategy planer class.
This files contains several function objects that are not (yet) defined in the STL.
Size Idx
Type for indexes.
<agrum/FMDP/SDyna/IPlanningStrategy.h>
std::size_t Size
In aGrUM, hashed values are unsigned long int.
virtual MultiDimFunctionGraph< GUM_SCALAR > * _minimiseFunctions(std::vector< MultiDimFunctionGraph< GUM_SCALAR > * > &)
Performs min_i F_i.
Size NodeId
Type for node ids.
static StructuredPlaner< GUM_SCALAR > * spumddInstance(GUM_SCALAR discountFactor=0.9, GUM_SCALAR epsilon=0.00001, bool verbose=true)
MultiDimFunctionGraph< GUM_SCALAR > * _vFunction
The Value Function computed iteratively.
virtual MultiDimFunctionGraph< GUM_SCALAR > * _maximiseQactions(std::vector< MultiDimFunctionGraph< GUM_SCALAR > * > &)
Performs max_a Q(s,a)
INLINE const MultiDimFunctionGraph< GUM_SCALAR > * vFunction()
Returns a const ptr on the value function computed so far.
INLINE const MultiDimFunctionGraph< ActionSet, SetTerminalNodePolicy > * optimalPolicy()
Returns the best policy obtained so far.
virtual void makePlanning(Idx nbStep=1000000)
Performs a value iteration.