60 Idx observationPhaseLenght,
61 Idx nbValueIterationStep,
65 __planer(planer), __decider(decider),
66 __observationPhaseLenght(observationPhaseLenght),
67 __nbValueIterationStep(nbValueIterationStep), __actionReward(actionReward),
69 GUM_CONSTRUCTOR(
SDYNA);
86 for (
auto obsIter =
__bin.beginSafe(); obsIter !=
__bin.endSafe(); ++obsIter)
91 GUM_DESTRUCTOR(
SDYNA);
191 if (
_verbose) std::cout <<
"Updating decision trees ..." << std::endl;
195 if (
_verbose) std::cout <<
"Planning ..." << std::endl;
220 if (actionSet.
size() == 1) {
223 Idx randy = (
Idx)((
double)std::rand() / (
double)RAND_MAX * actionSet.
size());
233 std::stringstream description;
238 return description.str();
Copyright 2005-2019 Pierre-Henri WUILLEMIN et Christophe GONZALES (LIP6) {prenom.nom}_at_lip6.fr.
virtual void initialize(const FMDP< GUM_SCALAR > *fmdp)=0
Initializes the learner.
virtual std::string optimalPolicy2String()=0
Returns a string describing the optimal policy in a dot format.
Size size() const
Gives the size.
virtual void initialize(const FMDP< double > *fmdp)
Initializes the learner.
std::string toString()
Returns.
virtual ActionSet stateOptimalPolicy(const Instantiation &curState)
A class to store the optimal actions.
<agrum/FMDP/SDyna/IDecisionStrategy.h>
Instantiation _lastState
The state in which the system is before we perform a new action.
void setCurrentState(const Instantiation ¤tState)
Sets last state visited to the given state.
Idx __lastAction
The last performed action.
virtual void updateFMDP()=0
Starts an update of datastructure in the associated FMDP.
SDYNA(ILearningStrategy *learner, IPlanningStrategy< double > *planer, IDecisionStrategy *decider, Idx observationPhaseLenght, Idx nbValueIterationStep, bool actionReward, bool verbose=true)
Constructor.
void feedback(const Instantiation &originalState, const Instantiation &reachedState, Idx performedAction, double obtainedReward)
Performs a feedback on the last transition.
virtual void checkState(const Instantiation &newState, Idx actionId)=0
IPlanningStrategy< double > * __planer
The planer used to plan an optimal strategy.
Copyright 2005-2019 Pierre-Henri WUILLEMIN et Christophe GONZALES (LIP6) {prenom.nom}_at_lip6.fr.
const Sequence< const DiscreteVariable *> & variablesSequence() const final
Returns the sequence of DiscreteVariable of this instantiation.
virtual void initialize(FMDP< double > *fmdp)=0
Initializes the learner.
ILearningStrategy * __learner
The learner used to learn the FMDP.
Idx val(Idx i) const
Returns the current value of the variable at position i.
The general SDyna architecture abstract class.
void setReward(double reward)
Returns the modality assumed by the given variable in this observation.
<agrum/FMDP/SDyna/ILearningStrategy.h>
virtual bool addObservation(Idx actionId, const Observation *obs)=0
Gives to the learner a new transition.
virtual void makePlanning(Idx nbIte)=0
Starts a new planning.
INLINE void setRModality(const DiscreteVariable *var, Idx modality)
Returns the modality assumed by the given variable in this observation.
Idx __observationPhaseLenght
The number of observation we make before using again the planer.
Idx __nbObservation
The total number of observation made so far.
const DiscreteVariable * main2prime(const DiscreteVariable *mainVar) const
Returns the primed variable associate to the given main variable.
virtual const MultiDimFunctionGraph< ActionSet, SetTerminalNodePolicy > * optimalPolicy()=0
Returns optimalPolicy computed so far current size.
void makePlanning(Idx nbStep)
Starts a new planning.
void initialize()
Initializes the Sdyna instance.
Class for assigning/browsing values to tuples of discrete variables.
FMDP< double > * _fmdp
The learnt Markovian Decision Process.
Size Idx
Type for indexes.
IDecisionStrategy * __decider
The decider.
Idx __nbValueIterationStep
The number of Value Iteration step we perform.
INLINE void setModality(const DiscreteVariable *var, Idx modality)
Sets the modality assumed by the given variable in this observation.
void setOptimalStrategy(const MultiDimFunctionGraph< ActionSet, SetTerminalNodePolicy > *optPol)
Set< Observation *> __bin
Since SDYNA made these observation, it has to delete them on quitting.
std::string toString() const
Displays the FMDP in a Dot format.