76 double discountFactor = 0.9,
78 Idx observationPhaseLenght = 100,
79 Idx nbValueIterationStep = 10) {
80 bool actionReward =
false;
82 attributeSelectionThreshold, actionReward);
87 ls, ps, ds, observationPhaseLenght, nbValueIterationStep, actionReward);
94 double similarityThreshold = 0.3,
95 double discountFactor = 0.9,
97 Idx observationPhaseLenght = 100,
98 Idx nbValueIterationStep = 10) {
99 bool actionReward =
false;
101 attributeSelectionThreshold, actionReward, similarityThreshold);
103 discountFactor, epsilon,
false);
108 observationPhaseLenght,
109 nbValueIterationStep,
118 double similarityThreshold = 0.3,
119 double discountFactor = 0.9,
121 Idx observationPhaseLenght = 100,
122 Idx nbValueIterationStep = 10) {
123 bool actionReward =
true;
125 attributeSelectionThreshold, actionReward, similarityThreshold);
127 ls, discountFactor, epsilon);
131 ls, ps, ds, observationPhaseLenght, nbValueIterationStep, actionReward);
138 double discountFactor = 0.9,
140 Idx observationPhaseLenght = 100,
141 Idx nbValueIterationStep = 10) {
142 bool actionReward =
true;
144 attributeSelectionThreshold, actionReward);
150 ls, ps, ds, observationPhaseLenght, nbValueIterationStep, actionReward);
157 double similarityThreshold = 0.3,
158 double discountFactor = 0.9,
160 Idx observationPhaseLenght = 100,
161 Idx nbValueIterationStep = 10) {
162 bool actionReward =
true;
164 attributeSelectionThreshold, actionReward, similarityThreshold);
169 ls, ps, ds, observationPhaseLenght, nbValueIterationStep, actionReward);
176 double discountFactor = 0.9,
178 Idx observationPhaseLenght = 100,
179 Idx nbValueIterationStep = 10) {
180 bool actionReward =
true;
182 attributeSelectionThreshold, actionReward);
187 ls, ps, ds, observationPhaseLenght, nbValueIterationStep, actionReward);
209 Idx observationPhaseLenght,
210 Idx nbValueIterationStep,
212 bool verbose =
true);
325 double obtainedReward);
450 #endif // GUM_SDYNA_H static StructuredPlaner< GUM_SCALAR > * sviInstance(GUM_SCALAR discountFactor=0.9, GUM_SCALAR epsilon=0.00001, bool verbose=true)
virtual std::string optimalPolicy2String()=0
Returns a string describing the optimal policy in a dot format.
std::string optimalPolicy2String()
std::string toString()
Returns.
Copyright 2005-2019 Pierre-Henri WUILLEMIN et Christophe GONZALES (LIP6) {prenom.nom}_at_lip6.fr.
<agrum/FMDP/SDyna/IDecisionStrategy.h>
Copyright 2005-2019 Pierre-Henri WUILLEMIN et Christophe GONZALES (LIP6) {prenom.nom}_at_lip6.fr.
Instantiation _lastState
The state in which the system is before we perform a new action.
void setCurrentState(const Instantiation ¤tState)
Sets last state visited to the given state.
Idx __lastAction
The last performed action.
SDYNA(ILearningStrategy *learner, IPlanningStrategy< double > *planer, IDecisionStrategy *decider, Idx observationPhaseLenght, Idx nbValueIterationStep, bool actionReward, bool verbose=true)
Constructor.
static AdaptiveRMaxPlaner * ReducedAndOrderedInstance(const ILearningStrategy *learner, double discountFactor=0.9, double epsilon=0.00001, bool verbose=true)
static SDYNA * RMaxTreeInstance(double attributeSelectionThreshold=0.99, double discountFactor=0.9, double epsilon=1, Idx observationPhaseLenght=100, Idx nbValueIterationStep=10)
@
void feedback(const Instantiation &originalState, const Instantiation &reachedState, Idx performedAction, double obtainedReward)
Performs a feedback on the last transition.
Copyright 2005-2019 Pierre-Henri WUILLEMIN et Christophe GONZALES (LIP6) {prenom.nom}_at_lip6.fr.
virtual Size optimalPolicySize()=0
Returns optimalPolicy computed so far current size.
Base class for discrete random variable.
IPlanningStrategy< double > * __planer
The planer used to plan an optimal strategy.
Copyright 2005-2019 Pierre-Henri WUILLEMIN et Christophe GONZALES (LIP6) {prenom.nom}_at_lip6.fr.
void addVariable(const DiscreteVariable *var)
Inserts a new variable in the SDyna instance.
static SDYNA * RandomTreeInstance(double attributeSelectionThreshold=0.99, double discountFactor=0.9, double epsilon=1, Idx observationPhaseLenght=100, Idx nbValueIterationStep=10)
@
ILearningStrategy * __learner
The learner used to learn the FMDP.
Copyright 2005-2019 Pierre-Henri WUILLEMIN et Christophe GONZALES (LIP6) {prenom.nom}_at_lip6.fr.
Copyright 2005-2019 Pierre-Henri WUILLEMIN et Christophe GONZALES (LIP6) {prenom.nom}_at_lip6.fr.
Copyright 2005-2019 Pierre-Henri WUILLEMIN et Christophe GONZALES (LIP6) {prenom.nom}_at_lip6.fr.
The general SDyna architecture abstract class.
Representation of a setA Set is a structure that contains arbitrary elements.
Copyright 2005-2019 Pierre-Henri WUILLEMIN et Christophe GONZALES (LIP6) {prenom.nom}_at_lip6.fr.
void addVariable(const DiscreteVariable *var)
Adds a variable to FMDP description.
Size learnerSize()
learnerSize
Copyright 2005-2019 Pierre-Henri WUILLEMIN et Christophe GONZALES (LIP6) {prenom.nom}_at_lip6.fr.
<agrum/FMDP/SDyna/ILearningStrategy.h>
<agrum/FMDP/decision/E_GreedyDecider.h>
Class to make decision randomlyDoes nothing more than the interface for DecisionStrategy does...
Size valueFunctionSize()
valueFunctionSize
Size size() const
Returns the map binding main variables and prime variables.
static AdaptiveRMaxPlaner * TreeInstance(const ILearningStrategy *learner, double discountFactor=0.9, double epsilon=0.00001, bool verbose=true)
Idx __observationPhaseLenght
The number of observation we make before using again the planer.
void addAction(const Idx actionId, const std::string &actionName)
Inserts a new action in the SDyna instance.
Idx __nbObservation
The total number of observation made so far.
Copyright 2005-2019 Pierre-Henri WUILLEMIN et Christophe GONZALES (LIP6) {prenom.nom}_at_lip6.fr.
static SDYNA * RMaxMDDInstance(double attributeSelectionThreshold=0.99, double similarityThreshold=0.3, double discountFactor=0.9, double epsilon=1, Idx observationPhaseLenght=100, Idx nbValueIterationStep=10)
@
Copyright 2005-2019 Pierre-Henri WUILLEMIN et Christophe GONZALES (LIP6) {prenom.nom}_at_lip6.fr.
static SDYNA * spitiInstance(double attributeSelectionThreshold=0.99, double discountFactor=0.9, double epsilon=1, Idx observationPhaseLenght=100, Idx nbValueIterationStep=10)
@
void makePlanning(Idx nbStep)
Starts a new planning.
Copyright 2005-2019 Pierre-Henri WUILLEMIN et Christophe GONZALES (LIP6) {prenom.nom}_at_lip6.fr.
void initialize()
Initializes the Sdyna instance.
Class for assigning/browsing values to tuples of discrete variables.
FMDP< double > * _fmdp
The learnt Markovian Decision Process.
<agrum/FMDP/planning/adaptiveRMaxPlaner.h>
Copyright 2005-2019 Pierre-Henri WUILLEMIN et Christophe GONZALES (LIP6) {prenom.nom}_at_lip6.fr.
static SDYNA * spimddiInstance(double attributeSelectionThreshold=0.99, double similarityThreshold=0.3, double discountFactor=0.9, double epsilon=1, Idx observationPhaseLenght=100, Idx nbValueIterationStep=10)
@
virtual Size size()=0
learnerSize
Size optimalPolicySize()
optimalPolicySize
Copyright 2005-2019 Pierre-Henri WUILLEMIN et Christophe GONZALES (LIP6) {prenom.nom}_at_lip6.fr.
Size Idx
Type for indexes.
static SDYNA * RandomMDDInstance(double attributeSelectionThreshold=0.99, double similarityThreshold=0.3, double discountFactor=0.9, double epsilon=1, Idx observationPhaseLenght=100, Idx nbValueIterationStep=10)
@
virtual Size vFunctionSize()=0
Returns vFunction computed so far current size.
IDecisionStrategy * __decider
The decider.
std::size_t Size
In aGrUM, hashed values are unsigned long int.
Copyright 2005-2019 Pierre-Henri WUILLEMIN et Christophe GONZALES (LIP6) {prenom.nom}_at_lip6.fr.
Size modelSize()
modelSize
Idx __nbValueIterationStep
The number of Value Iteration step we perform.
Set< Observation *> __bin
Since SDYNA made these observation, it has to delete them on quitting.
static StructuredPlaner< GUM_SCALAR > * spumddInstance(GUM_SCALAR discountFactor=0.9, GUM_SCALAR epsilon=0.00001, bool verbose=true)
void addAction(Idx actionId, const std::string &action)
Adds an action to FMDP description.
Copyright 2005-2019 Pierre-Henri WUILLEMIN et Christophe GONZALES (LIP6) {prenom.nom}_at_lip6.fr.