34 #include <agrum/agrum.h> 35 #include <agrum/tools/multidim/instantiation.h> 37 #include <agrum/tools/variables/discreteVariable.h> 39 #include <agrum/FMDP/SDyna/Strategies/IDecisionStrategy.h> 40 #include <agrum/FMDP/SDyna/Strategies/ILearningStrategy.h> 41 #include <agrum/FMDP/SDyna/Strategies/IPlanningStrategy.h> 42 #include <agrum/FMDP/decision/E_GreedyDecider.h> 43 #include <agrum/FMDP/decision/lazyDecider.h> 44 #include <agrum/FMDP/decision/randomDecider.h> 45 #include <agrum/FMDP/decision/statisticalLazyDecider.h> 46 #include <agrum/FMDP/fmdp.h> 47 #include <agrum/FMDP/learning/fmdpLearner.h> 48 #include <agrum/FMDP/learning/observation.h> 49 #include <agrum/FMDP/planning/actionSet.h> 50 #include <agrum/FMDP/planning/adaptiveRMaxPlaner.h> 51 #include <agrum/FMDP/planning/structuredPlaner.h> 76 double discountFactor = 0.9,
78 Idx observationPhaseLenght = 100,
79 Idx nbValueIterationStep = 10) {
80 bool actionReward =
false;
82 =
new FMDPLearner< CHI2TEST, CHI2TEST, ITILEARNER >(attributeSelectionThreshold,
84 IPlanningStrategy<
double >* ps
85 = StructuredPlaner<
double >::sviInstance(discountFactor, epsilon);
86 IDecisionStrategy* ds =
new E_GreedyDecider();
87 return new SDYNA(ls, ps, ds, observationPhaseLenght, nbValueIterationStep, actionReward);
94 double similarityThreshold = 0.3,
95 double discountFactor = 0.9,
97 Idx observationPhaseLenght = 100,
98 Idx nbValueIterationStep = 10) {
99 bool actionReward =
false;
100 ILearningStrategy* ls
101 =
new FMDPLearner< GTEST, GTEST, IMDDILEARNER >(attributeSelectionThreshold,
103 similarityThreshold);
104 IPlanningStrategy<
double >* ps
105 = StructuredPlaner<
double >::spumddInstance(discountFactor, epsilon,
false);
106 IDecisionStrategy* ds =
new E_GreedyDecider();
110 observationPhaseLenght,
111 nbValueIterationStep,
120 double similarityThreshold = 0.3,
121 double discountFactor = 0.9,
123 Idx observationPhaseLenght = 100,
124 Idx nbValueIterationStep = 10) {
125 bool actionReward =
true;
126 ILearningStrategy* ls
127 =
new FMDPLearner< GTEST, GTEST, IMDDILEARNER >(attributeSelectionThreshold,
129 similarityThreshold);
130 AdaptiveRMaxPlaner* rm
131 = AdaptiveRMaxPlaner::ReducedAndOrderedInstance(ls, discountFactor, epsilon);
132 IPlanningStrategy<
double >* ps = rm;
133 IDecisionStrategy* ds = rm;
134 return new SDYNA(ls, ps, ds, observationPhaseLenght, nbValueIterationStep, actionReward);
141 double discountFactor = 0.9,
143 Idx observationPhaseLenght = 100,
144 Idx nbValueIterationStep = 10) {
145 bool actionReward =
true;
146 ILearningStrategy* ls
147 =
new FMDPLearner< GTEST, GTEST, ITILEARNER >(attributeSelectionThreshold, actionReward);
148 AdaptiveRMaxPlaner* rm = AdaptiveRMaxPlaner::TreeInstance(ls, discountFactor, epsilon);
149 IPlanningStrategy<
double >* ps = rm;
150 IDecisionStrategy* ds = rm;
151 return new SDYNA(ls, ps, ds, observationPhaseLenght, nbValueIterationStep, actionReward);
158 double similarityThreshold = 0.3,
159 double discountFactor = 0.9,
161 Idx observationPhaseLenght = 100,
162 Idx nbValueIterationStep = 10) {
163 bool actionReward =
true;
164 ILearningStrategy* ls
165 =
new FMDPLearner< GTEST, GTEST, IMDDILEARNER >(attributeSelectionThreshold,
167 similarityThreshold);
168 IPlanningStrategy<
double >* ps
169 = StructuredPlaner<
double >::spumddInstance(discountFactor, epsilon);
170 IDecisionStrategy* ds =
new RandomDecider();
171 return new SDYNA(ls, ps, ds, observationPhaseLenght, nbValueIterationStep, actionReward);
178 double discountFactor = 0.9,
180 Idx observationPhaseLenght = 100,
181 Idx nbValueIterationStep = 10) {
182 bool actionReward =
true;
183 ILearningStrategy* ls
184 =
new FMDPLearner< CHI2TEST, CHI2TEST, ITILEARNER >(attributeSelectionThreshold,
186 IPlanningStrategy<
double >* ps
187 = StructuredPlaner<
double >::sviInstance(discountFactor, epsilon);
188 IDecisionStrategy* ds =
new RandomDecider();
189 return new SDYNA(ls, ps, ds, observationPhaseLenght, nbValueIterationStep, actionReward);
208 SDYNA(ILearningStrategy* learner,
209 IPlanningStrategy<
double >* planer,
210 IDecisionStrategy* decider,
211 Idx observationPhaseLenght,
212 Idx nbValueIterationStep,
214 bool verbose =
true);
238 void addAction(
const Idx actionId,
const std::string& actionName) {
239 fmdp_->addAction(actionId, actionName);
252 void addVariable(
const DiscreteVariable* var) { fmdp_->addVariable(var); }
276 void initialize(
const Instantiation& initialState);
294 void setCurrentState(
const Instantiation& currentState) { lastState_ = currentState; }
322 void feedback(
const Instantiation& originalState,
323 const Instantiation& reachedState,
325 double obtainedReward);
338 void feedback(
const Instantiation& reachedState,
double obtainedReward);
std::string optimalPolicy2String()
std::string toString()
Returns.
INLINE void emplace(Args &&... args)
void setCurrentState(const Instantiation ¤tState)
Sets last state visited to the given state.
SDYNA(ILearningStrategy *learner, IPlanningStrategy< double > *planer, IDecisionStrategy *decider, Idx observationPhaseLenght, Idx nbValueIterationStep, bool actionReward, bool verbose=true)
Constructor.
static SDYNA * RMaxTreeInstance(double attributeSelectionThreshold=0.99, double discountFactor=0.9, double epsilon=1, Idx observationPhaseLenght=100, Idx nbValueIterationStep=10)
@
void feedback(const Instantiation &originalState, const Instantiation &reachedState, Idx performedAction, double obtainedReward)
Performs a feedback on the last transition.
Idx _observationPhaseLenght_
The number of observation we make before using again the planer.
void addVariable(const DiscreteVariable *var)
Inserts a new variable in the SDyna instance.
static SDYNA * RandomTreeInstance(double attributeSelectionThreshold=0.99, double discountFactor=0.9, double epsilon=1, Idx observationPhaseLenght=100, Idx nbValueIterationStep=10)
@
Instantiation lastState_
The state in which the system is before we perform a new action.
FMDP< double > * fmdp_
The learnt Markovian Decision Process.
The general SDyna architecture abstract class.
IDecisionStrategy * _decider_
The decider.
Idx _nbObservation_
The total number of observation made so far.
Size learnerSize()
learnerSize
Set< Observation *> _bin_
Since SDYNA made these observation, it has to delete them on quitting.
Size valueFunctionSize()
valueFunctionSize
void feedback(const Instantiation &reachedState, double obtainedReward)
Performs a feedback on the last transition.
void addAction(const Idx actionId, const std::string &actionName)
Inserts a new action in the SDyna instance.
IPlanningStrategy< double > * _planer_
The planer used to plan an optimal strategy.
static SDYNA * RMaxMDDInstance(double attributeSelectionThreshold=0.99, double similarityThreshold=0.3, double discountFactor=0.9, double epsilon=1, Idx observationPhaseLenght=100, Idx nbValueIterationStep=10)
@
Idx _nbValueIterationStep_
The number of Value Iteration step we perform.
static SDYNA * spitiInstance(double attributeSelectionThreshold=0.99, double discountFactor=0.9, double epsilon=1, Idx observationPhaseLenght=100, Idx nbValueIterationStep=10)
@
void makePlanning(Idx nbStep)
Starts a new planning.
Idx takeAction(const Instantiation &curState)
ILearningStrategy * _learner_
The learner used to learn the FMDP.
void initialize()
Initializes the Sdyna instance.
static SDYNA * spimddiInstance(double attributeSelectionThreshold=0.99, double similarityThreshold=0.3, double discountFactor=0.9, double epsilon=1, Idx observationPhaseLenght=100, Idx nbValueIterationStep=10)
@
void initialize(const Instantiation &initialState)
Initializes the Sdyna instance at given state.
Size optimalPolicySize()
optimalPolicySize
static SDYNA * RandomMDDInstance(double attributeSelectionThreshold=0.99, double similarityThreshold=0.3, double discountFactor=0.9, double epsilon=1, Idx observationPhaseLenght=100, Idx nbValueIterationStep=10)
@
Idx _lastAction_
The last performed action.
Size modelSize()
modelSize