31 #ifndef GUM_STRUCTURED_PLANNING_H 32 #define GUM_STRUCTURED_PLANNING_H 36 #include <agrum/tools/core/argMaxSet.h> 37 #include <agrum/tools/core/functors.h> 38 #include <agrum/tools/core/inline.h> 39 #include <agrum/tools/core/smallobjectallocator/smallObjectAllocator.h> 41 #include <agrum/tools/multidim/implementations/multiDimFunctionGraph.h> 42 #include <agrum/tools/multidim/utils/FunctionGraphUtilities/terminalNodePolicies/SetTerminalNodePolicy.h> 44 #include <agrum/FMDP/SDyna/Strategies/IPlanningStrategy.h> 45 #include <agrum/FMDP/fmdp.h> 46 #include <agrum/FMDP/planning/IOperatorStrategy.h> 47 #include <agrum/FMDP/planning/actionSet.h> 48 #include <agrum/FMDP/planning/mddOperatorStrategy.h> 49 #include <agrum/FMDP/planning/treeOperatorStrategy.h> 69 template <
typename GUM_SCALAR >
80 GUM_SCALAR epsilon = 0.00001,
81 bool verbose =
true) {
82 return new StructuredPlaner< GUM_SCALAR >(
new MDDOperatorStrategy< GUM_SCALAR >(),
92 GUM_SCALAR epsilon = 0.00001,
93 bool verbose =
true) {
94 return new StructuredPlaner< GUM_SCALAR >(
new TreeOperatorStrategy< GUM_SCALAR >(),
111 GUM_SCALAR discountFactor,
182 virtual void initialize(
const FMDP< GUM_SCALAR >* fmdp);
373 #include <agrum/FMDP/planning/structuredPlaner_tpl.h> static StructuredPlaner< GUM_SCALAR > * sviInstance(GUM_SCALAR discountFactor=0.9, GUM_SCALAR epsilon=0.00001, bool verbose=true)
virtual ~StructuredPlaner()
Default destructor.
bool verbose_
Boolean used to indcates whether or not iteration informations should be displayed on terminal...
NodeId _recurExtractOptPol_(NodeId, const MultiDimFunctionGraph< ArgMaxSet< GUM_SCALAR, Idx >, SetTerminalNodePolicy > *, HashTable< NodeId, NodeId > &)
Recursion part for the createArgMaxCopy.
INLINE void emplace(Args &&... args)
NodeId _recurArgMaxCopy_(NodeId, Idx, const MultiDimFunctionGraph< GUM_SCALAR > *, MultiDimFunctionGraph< ArgMaxSet< GUM_SCALAR, Idx >, SetTerminalNodePolicy > *, HashTable< NodeId, NodeId > &)
Recursion part for the createArgMaxCopy.
const FMDP< GUM_SCALAR > * fmdp_
The Factored Markov Decision Process describing our planning situation (NB : this one must have funct...
virtual MultiDimFunctionGraph< GUM_SCALAR > * evalQaction_(const MultiDimFunctionGraph< GUM_SCALAR > *, Idx)
Performs the P(s'|s,a).V^{t-1}(s') part of the value itération.
GUM_SCALAR _threshold_
The threshold value Whenever | V^{n} - V^{n+1} | < threshold, we consider that V ~ V*...
StructuredPlaner(IOperatorStrategy< GUM_SCALAR > *opi, GUM_SCALAR discountFactor, GUM_SCALAR epsilon, bool verbose)
Default constructor.
void _transferActionIds_(const ArgMaxSet< GUM_SCALAR, Idx > &, ActionSet &)
Extract from an ArgMaxSet the associated ActionSet.
virtual MultiDimFunctionGraph< GUM_SCALAR > * addReward_(MultiDimFunctionGraph< GUM_SCALAR > *function, Idx actionId=0)
Perform the R(s) + gamma . function.
virtual MultiDimFunctionGraph< GUM_SCALAR > * minimiseFunctions_(std::vector< MultiDimFunctionGraph< GUM_SCALAR > * > &)
Performs min_i F_i.
virtual void initialize(const FMDP< GUM_SCALAR > *fmdp)
Initializes data structure needed for making the planning.
virtual MultiDimFunctionGraph< GUM_SCALAR > * valueIteration_()
Performs a single step of value iteration.
void extractOptimalPolicy_(const MultiDimFunctionGraph< ArgMaxSet< GUM_SCALAR, Idx >, SetTerminalNodePolicy > *optimalValueFunction)
From V(s)* = argmax_a Q*(s,a), this function extract pi*(s) This function mainly consists in extracti...
virtual void evalPolicy_()
Perform the required tasks to extract an optimal policy.
virtual MultiDimFunctionGraph< ArgMaxSet< GUM_SCALAR, Idx >, SetTerminalNodePolicy > * argmaximiseQactions_(std::vector< MultiDimFunctionGraph< ArgMaxSet< GUM_SCALAR, Idx >, SetTerminalNodePolicy > * > &)
Performs argmax_a Q(s,a)
MultiDimFunctionGraph< ActionSet, SetTerminalNodePolicy > * optimalPolicy_
The associated optimal policy.
GUM_SCALAR discountFactor_
Discount Factor used for infinite horizon planning.
Set< const DiscreteVariable *> elVarSeq_
A Set to eleminate primed variables.
virtual void initVFunction_()
Performs a single step of value iteration.
MultiDimFunctionGraph< GUM_SCALAR > * vFunction_
The Value Function computed iteratively.
virtual MultiDimFunctionGraph< GUM_SCALAR > * maximiseQactions_(std::vector< MultiDimFunctionGraph< GUM_SCALAR > * > &)
Performs max_a Q(s,a)
static StructuredPlaner< GUM_SCALAR > * spumddInstance(GUM_SCALAR discountFactor=0.9, GUM_SCALAR epsilon=0.00001, bool verbose=true)
IOperatorStrategy< GUM_SCALAR > * operator_
virtual void makePlanning(Idx nbStep=1000000)
Performs a value iteration.