73 double discountFactor = 0.9,
75 Idx observationPhaseLenght = 100,
76 Idx nbValueIterationStep = 10) {
77 bool actionReward =
false;
79 attributeSelectionThreshold, actionReward);
84 ls, ps, ds, observationPhaseLenght, nbValueIterationStep, actionReward);
91 double similarityThreshold = 0.3,
92 double discountFactor = 0.9,
94 Idx observationPhaseLenght = 100,
95 Idx nbValueIterationStep = 10) {
96 bool actionReward =
false;
98 attributeSelectionThreshold, actionReward, similarityThreshold);
100 discountFactor, epsilon,
false);
105 observationPhaseLenght,
106 nbValueIterationStep,
115 double similarityThreshold = 0.3,
116 double discountFactor = 0.9,
118 Idx observationPhaseLenght = 100,
119 Idx nbValueIterationStep = 10) {
120 bool actionReward =
true;
122 attributeSelectionThreshold, actionReward, similarityThreshold);
124 ls, discountFactor, epsilon);
128 ls, ps, ds, observationPhaseLenght, nbValueIterationStep, actionReward);
135 double discountFactor = 0.9,
137 Idx observationPhaseLenght = 100,
138 Idx nbValueIterationStep = 10) {
139 bool actionReward =
true;
141 attributeSelectionThreshold, actionReward);
147 ls, ps, ds, observationPhaseLenght, nbValueIterationStep, actionReward);
154 double similarityThreshold = 0.3,
155 double discountFactor = 0.9,
157 Idx observationPhaseLenght = 100,
158 Idx nbValueIterationStep = 10) {
159 bool actionReward =
true;
161 attributeSelectionThreshold, actionReward, similarityThreshold);
166 ls, ps, ds, observationPhaseLenght, nbValueIterationStep, actionReward);
173 double discountFactor = 0.9,
175 Idx observationPhaseLenght = 100,
176 Idx nbValueIterationStep = 10) {
177 bool actionReward =
true;
179 attributeSelectionThreshold, actionReward);
184 ls, ps, ds, observationPhaseLenght, nbValueIterationStep, actionReward);
206 Idx observationPhaseLenght,
207 Idx nbValueIterationStep,
209 bool verbose =
true);
322 double obtainedReward);
447 #endif // GUM_SDYNA_H static StructuredPlaner< GUM_SCALAR > * sviInstance(GUM_SCALAR discountFactor=0.9, GUM_SCALAR epsilon=0.00001, bool verbose=true)
virtual std::string optimalPolicy2String()=0
Returns a string describing the optimal policy in a dot format.
std::string optimalPolicy2String()
std::string toString()
Returns.
Base class for discrete random variable.
<agrum/FMDP/SDyna/IDecisionStrategy.h>
Headers of the Learning Strategy interface.
Instantiation _lastState
The state in which the system is before we perform a new action.
void setCurrentState(const Instantiation ¤tState)
Sets last state visited to the given state.
Idx __lastAction
The last performed action.
SDYNA(ILearningStrategy *learner, IPlanningStrategy< double > *planer, IDecisionStrategy *decider, Idx observationPhaseLenght, Idx nbValueIterationStep, bool actionReward, bool verbose=true)
Constructor.
static AdaptiveRMaxPlaner * ReducedAndOrderedInstance(const ILearningStrategy *learner, double discountFactor=0.9, double epsilon=0.00001, bool verbose=true)
static SDYNA * RMaxTreeInstance(double attributeSelectionThreshold=0.99, double discountFactor=0.9, double epsilon=1, Idx observationPhaseLenght=100, Idx nbValueIterationStep=10)
@
void feedback(const Instantiation &originalState, const Instantiation &reachedState, Idx performedAction, double obtainedReward)
Performs a feedback on the last transition.
Headers of the Statistical lazy decision maker class.
virtual Size optimalPolicySize()=0
Returns optimalPolicy computed so far current size.
Base class for discrete random variable.
IPlanningStrategy< double > * __planer
The planer used to plan an optimal strategy.
gum is the global namespace for all aGrUM entities
void addVariable(const DiscreteVariable *var)
Inserts a new variable in the SDyna instance.
static SDYNA * RandomTreeInstance(double attributeSelectionThreshold=0.99, double discountFactor=0.9, double epsilon=1, Idx observationPhaseLenght=100, Idx nbValueIterationStep=10)
@
ILearningStrategy * __learner
The learner used to learn the FMDP.
Headers of the StructuredPlaner planer class.
Headers of the Decision Strategy interface.
Headers of the MDDOperatorStrategy planer class.
The general SDyna architecture abstract class.
Representation of a setA Set is a structure that contains arbitrary elements.
Class for implementation of factored markov decision process.
void addVariable(const DiscreteVariable *var)
Adds a variable to FMDP description.
Size learnerSize()
learnerSize
Headers of the RMax planer class.
<agrum/FMDP/SDyna/ILearningStrategy.h>
<agrum/FMDP/decision/E_GreedyDecider.h>
Class to make decision randomlyDoes nothing more than the interface for DecisionStrategy does...
Size valueFunctionSize()
valueFunctionSize
Size size() const
Returns the map binding main variables and prime variables.
static AdaptiveRMaxPlaner * TreeInstance(const ILearningStrategy *learner, double discountFactor=0.9, double epsilon=0.00001, bool verbose=true)
Idx __observationPhaseLenght
The number of observation we make before using again the planer.
void addAction(const Idx actionId, const std::string &actionName)
Inserts a new action in the SDyna instance.
Idx __nbObservation
The total number of observation made so far.
Header files of gum::Instantiation.
static SDYNA * RMaxMDDInstance(double attributeSelectionThreshold=0.99, double similarityThreshold=0.3, double discountFactor=0.9, double epsilon=1, Idx observationPhaseLenght=100, Idx nbValueIterationStep=10)
@
Headers of the Observation class.
static SDYNA * spitiInstance(double attributeSelectionThreshold=0.99, double discountFactor=0.9, double epsilon=1, Idx observationPhaseLenght=100, Idx nbValueIterationStep=10)
@
void makePlanning(Idx nbStep)
Starts a new planning.
Headers of the Planning Strategy interface.
void initialize()
Initializes the Sdyna instance.
Class for assigning/browsing values to tuples of discrete variables.
FMDP< double > * _fmdp
The learnt Markovian Decision Process.
<agrum/FMDP/planning/adaptiveRMaxPlaner.h>
Headers of the lazy decision maker class.
static SDYNA * spimddiInstance(double attributeSelectionThreshold=0.99, double similarityThreshold=0.3, double discountFactor=0.9, double epsilon=1, Idx observationPhaseLenght=100, Idx nbValueIterationStep=10)
@
virtual Size size()=0
learnerSize
Size optimalPolicySize()
optimalPolicySize
Headers of the Random decision maker class.
Size Idx
Type for indexes.
static SDYNA * RandomMDDInstance(double attributeSelectionThreshold=0.99, double similarityThreshold=0.3, double discountFactor=0.9, double epsilon=1, Idx observationPhaseLenght=100, Idx nbValueIterationStep=10)
@
virtual Size vFunctionSize()=0
Returns vFunction computed so far current size.
IDecisionStrategy * __decider
The decider.
std::size_t Size
In aGrUM, hashed values are unsigned long int.
Headers of the FMDPLearner class.
Size modelSize()
modelSize
Idx __nbValueIterationStep
The number of Value Iteration step we perform.
Set< Observation *> __bin
Since SDYNA made these observation, it has to delete them on quitting.
static StructuredPlaner< GUM_SCALAR > * spumddInstance(GUM_SCALAR discountFactor=0.9, GUM_SCALAR epsilon=0.00001, bool verbose=true)
void addAction(Idx actionId, const std::string &action)
Adds an action to FMDP description.
Headers of the epsilon-greedy decision maker class.