34 #include <agrum/agrum.h> 35 #include <agrum/tools/multidim/instantiation.h> 37 #include <agrum/tools/variables/discreteVariable.h> 39 #include <agrum/FMDP/SDyna/Strategies/IDecisionStrategy.h> 40 #include <agrum/FMDP/SDyna/Strategies/ILearningStrategy.h> 41 #include <agrum/FMDP/SDyna/Strategies/IPlanningStrategy.h> 42 #include <agrum/FMDP/decision/E_GreedyDecider.h> 43 #include <agrum/FMDP/decision/lazyDecider.h> 44 #include <agrum/FMDP/decision/randomDecider.h> 45 #include <agrum/FMDP/decision/statisticalLazyDecider.h> 46 #include <agrum/FMDP/fmdp.h> 47 #include <agrum/FMDP/learning/fmdpLearner.h> 48 #include <agrum/FMDP/learning/observation.h> 49 #include <agrum/FMDP/planning/actionSet.h> 50 #include <agrum/FMDP/planning/adaptiveRMaxPlaner.h> 51 #include <agrum/FMDP/planning/structuredPlaner.h> 76 double discountFactor = 0.9,
78 Idx observationPhaseLenght = 100,
79 Idx nbValueIterationStep = 10) {
80 bool actionReward =
false;
81 ILearningStrategy* ls =
new FMDPLearner< CHI2TEST, CHI2TEST, ITILEARNER >(
82 attributeSelectionThreshold,
84 IPlanningStrategy<
double >* ps
85 = StructuredPlaner<
double >::sviInstance(discountFactor, epsilon);
86 IDecisionStrategy* ds =
new E_GreedyDecider();
90 observationPhaseLenght,
99 double similarityThreshold = 0.3,
100 double discountFactor = 0.9,
102 Idx observationPhaseLenght = 100,
103 Idx nbValueIterationStep = 10) {
104 bool actionReward =
false;
105 ILearningStrategy* ls =
new FMDPLearner< GTEST, GTEST, IMDDILEARNER >(
106 attributeSelectionThreshold,
108 similarityThreshold);
109 IPlanningStrategy<
double >* ps
110 = StructuredPlaner<
double >::spumddInstance(discountFactor,
113 IDecisionStrategy* ds =
new E_GreedyDecider();
117 observationPhaseLenght,
118 nbValueIterationStep,
127 double similarityThreshold = 0.3,
128 double discountFactor = 0.9,
130 Idx observationPhaseLenght = 100,
131 Idx nbValueIterationStep = 10) {
132 bool actionReward =
true;
133 ILearningStrategy* ls =
new FMDPLearner< GTEST, GTEST, IMDDILEARNER >(
134 attributeSelectionThreshold,
136 similarityThreshold);
137 AdaptiveRMaxPlaner* rm
138 = AdaptiveRMaxPlaner::ReducedAndOrderedInstance(ls,
141 IPlanningStrategy<
double >* ps = rm;
142 IDecisionStrategy* ds = rm;
146 observationPhaseLenght,
147 nbValueIterationStep,
155 double discountFactor = 0.9,
157 Idx observationPhaseLenght = 100,
158 Idx nbValueIterationStep = 10) {
159 bool actionReward =
true;
160 ILearningStrategy* ls
161 =
new FMDPLearner< GTEST, GTEST, ITILEARNER >(attributeSelectionThreshold,
163 AdaptiveRMaxPlaner* rm
164 = AdaptiveRMaxPlaner::TreeInstance(ls, discountFactor, epsilon);
165 IPlanningStrategy<
double >* ps = rm;
166 IDecisionStrategy* ds = rm;
170 observationPhaseLenght,
171 nbValueIterationStep,
179 double similarityThreshold = 0.3,
180 double discountFactor = 0.9,
182 Idx observationPhaseLenght = 100,
183 Idx nbValueIterationStep = 10) {
184 bool actionReward =
true;
185 ILearningStrategy* ls =
new FMDPLearner< GTEST, GTEST, IMDDILEARNER >(
186 attributeSelectionThreshold,
188 similarityThreshold);
189 IPlanningStrategy<
double >* ps
190 = StructuredPlaner<
double >::spumddInstance(discountFactor, epsilon);
191 IDecisionStrategy* ds =
new RandomDecider();
195 observationPhaseLenght,
196 nbValueIterationStep,
204 double discountFactor = 0.9,
206 Idx observationPhaseLenght = 100,
207 Idx nbValueIterationStep = 10) {
208 bool actionReward =
true;
209 ILearningStrategy* ls =
new FMDPLearner< CHI2TEST, CHI2TEST, ITILEARNER >(
210 attributeSelectionThreshold,
212 IPlanningStrategy<
double >* ps
213 = StructuredPlaner<
double >::sviInstance(discountFactor, epsilon);
214 IDecisionStrategy* ds =
new RandomDecider();
218 observationPhaseLenght,
219 nbValueIterationStep,
239 SDYNA(ILearningStrategy* learner,
240 IPlanningStrategy<
double >* planer,
241 IDecisionStrategy* decider,
242 Idx observationPhaseLenght,
243 Idx nbValueIterationStep,
245 bool verbose =
true);
269 void addAction(
const Idx actionId,
const std::string& actionName) {
270 fmdp_->addAction(actionId, actionName);
283 void addVariable(
const DiscreteVariable* var) { fmdp_->addVariable(var); }
307 void initialize(
const Instantiation& initialState);
326 lastState_ = currentState;
355 void feedback(
const Instantiation& originalState,
356 const Instantiation& reachedState,
358 double obtainedReward);
371 void feedback(
const Instantiation& reachedState,
double obtainedReward);
Idx nbObservation__
The total number of observation made so far.
std::string optimalPolicy2String()
std::string toString()
Returns.
INLINE void emplace(Args &&... args)
void setCurrentState(const Instantiation ¤tState)
Sets last state visited to the given state.
SDYNA(ILearningStrategy *learner, IPlanningStrategy< double > *planer, IDecisionStrategy *decider, Idx observationPhaseLenght, Idx nbValueIterationStep, bool actionReward, bool verbose=true)
Constructor.
static SDYNA * RMaxTreeInstance(double attributeSelectionThreshold=0.99, double discountFactor=0.9, double epsilon=1, Idx observationPhaseLenght=100, Idx nbValueIterationStep=10)
@
void feedback(const Instantiation &originalState, const Instantiation &reachedState, Idx performedAction, double obtainedReward)
Performs a feedback on the last transition.
IPlanningStrategy< double > * planer__
The planer used to plan an optimal strategy.
void addVariable(const DiscreteVariable *var)
Inserts a new variable in the SDyna instance.
static SDYNA * RandomTreeInstance(double attributeSelectionThreshold=0.99, double discountFactor=0.9, double epsilon=1, Idx observationPhaseLenght=100, Idx nbValueIterationStep=10)
@
Instantiation lastState_
The state in which the system is before we perform a new action.
FMDP< double > * fmdp_
The learnt Markovian Decision Process.
The general SDyna architecture abstract class.
Size learnerSize()
learnerSize
IDecisionStrategy * decider__
The decider.
Size valueFunctionSize()
valueFunctionSize
Idx lastAction__
The last performed action.
Set< Observation *> bin__
Since SDYNA made these observation, it has to delete them on quitting.
Idx observationPhaseLenght__
The number of observation we make before using again the planer.
void feedback(const Instantiation &reachedState, double obtainedReward)
Performs a feedback on the last transition.
void addAction(const Idx actionId, const std::string &actionName)
Inserts a new action in the SDyna instance.
static SDYNA * RMaxMDDInstance(double attributeSelectionThreshold=0.99, double similarityThreshold=0.3, double discountFactor=0.9, double epsilon=1, Idx observationPhaseLenght=100, Idx nbValueIterationStep=10)
@
static SDYNA * spitiInstance(double attributeSelectionThreshold=0.99, double discountFactor=0.9, double epsilon=1, Idx observationPhaseLenght=100, Idx nbValueIterationStep=10)
@
void makePlanning(Idx nbStep)
Starts a new planning.
Idx takeAction(const Instantiation &curState)
void initialize()
Initializes the Sdyna instance.
Idx nbValueIterationStep__
The number of Value Iteration step we perform.
static SDYNA * spimddiInstance(double attributeSelectionThreshold=0.99, double similarityThreshold=0.3, double discountFactor=0.9, double epsilon=1, Idx observationPhaseLenght=100, Idx nbValueIterationStep=10)
@
void initialize(const Instantiation &initialState)
Initializes the Sdyna instance at given state.
Size optimalPolicySize()
optimalPolicySize
static SDYNA * RandomMDDInstance(double attributeSelectionThreshold=0.99, double similarityThreshold=0.3, double discountFactor=0.9, double epsilon=1, Idx observationPhaseLenght=100, Idx nbValueIterationStep=10)
@
Size modelSize()
modelSize
ILearningStrategy * learner__
The learner used to learn the FMDP.