aGrUM  0.20.2
a C++ library for (probabilistic) graphical models
sdyna.cpp
Go to the documentation of this file.
1 /**
2  *
3  * Copyright 2005-2020 Pierre-Henri WUILLEMIN(@LIP6) & Christophe GONZALES(@AMU)
4  * info_at_agrum_dot_org
5  *
6  * This library is free software: you can redistribute it and/or modify
7  * it under the terms of the GNU Lesser General Public License as published by
8  * the Free Software Foundation, either version 3 of the License, or
9  * (at your option) any later version.
10  *
11  * This library is distributed in the hope that it will be useful,
12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14  * GNU Lesser General Public License for more details.
15  *
16  * You should have received a copy of the GNU Lesser General Public License
17  * along with this library. If not, see <http://www.gnu.org/licenses/>.
18  *
19  */
20 
21 
22 /**
23  * @file
24  * @brief Headers of the ModelLearner class.
25  *
26  * @author Pierre-Henri WUILLEMIN(@LIP6) and Jean-Christophe MAGNAN and Christophe
27  * GONZALES(@AMU)
28  */
29 
30 
31 // =========================================================================
32 #include <cstdlib>
33 #include <random>
34 // =========================================================================
35 #include <agrum/FMDP/SDyna/sdyna.h>
36 // =========================================================================
37 
38 namespace gum {
39 
40  // ==========================================================================
41  // Constructor & destructor.
42  // ==========================================================================
43 
44  // ###################################################################
45  /*
46  * Constructor
47  *
48  * @param observationPhaseLenght : the number of observation done before a
49  * replanning is launch. If equals 0, a planning is done after each structural
50  * change.
51  * @param nbValueIterationStep : the number of value iteration done during
52  * one planning
53  * @return an instance of SDyna architecture
54  */
55  // ###################################################################
56 
58  IPlanningStrategy< double >* planer,
62  bool actionReward,
63  bool verbose) :
68  verbose_(verbose) {
70 
71  fmdp_ = new FMDP< double >();
72 
73  nbObservation__ = 1;
74  }
75 
76  // ###################################################################
77  // Destructor
78  // ###################################################################
79  SDYNA::~SDYNA() {
80  delete decider__;
81 
82  delete learner__;
83 
84  delete planer__;
85 
86  for (auto obsIter = bin__.beginSafe(); obsIter != bin__.endSafe(); ++obsIter)
87  delete *obsIter;
88 
89  delete fmdp_;
90 
92  }
93 
94  // ==========================================================================
95  // Initialization
96  // ==========================================================================
97 
98  void SDYNA::initialize() {
102  }
103 
104  // ###################################################################
105  /*
106  * Initializes the Sdyna instance.
107  * @param initialState : the state of the studied system from which we will
108  * begin the explore, learn and exploit process
109  */
110  // ###################################################################
112  initialize();
114  }
115 
116  // ==========================================================================
117  /// Incremental methods
118  // ==========================================================================
119 
120  // ###################################################################
121  /*
122  * Performs a feedback on the last transition.
123  * In extenso, learn from the transition.
124  * @param originalState : the state we were in before the transition
125  * @param reachedState : the state we reached after
126  * @param performedAction : the action we performed
127  * @param obtainedReward : the reward we obtained
128  */
129  // ###################################################################
131  const Instantiation& prevState,
132  Idx lastAction,
133  double reward) {
137  }
138 
139  // ###################################################################
140  /*
141  * Performs a feedback on the last transition.
142  * In extenso, learn from the transition.
143  * @param reachedState : the state reached after the transition
144  * @param obtainedReward : the reward obtained during the transition
145  * @warning Uses the originalState__ and performedAction__ stored in cache
146  * If you want to specify the original state and the performed action, see
147  * below
148  */
149  // ###################################################################
150  void SDYNA::feedback(const Instantiation& newState, double reward) {
151  Observation* obs = new Observation();
152 
155  ++varIter)
157 
160  ++varIter) {
162 
163  if (this->actionReward__)
165  else
167  }
168 
169  obs->setReward(reward);
170 
172  bin__.insert(obs);
173 
176 
179 
180  nbObservation__++;
181  }
182 
183  // ###################################################################
184  /*
185  * Starts a new planning
186  * @param Idx : the maximal number of value iteration performed in this
187  * planning
188  */
189  // ###################################################################
191  if (verbose_) std::cout << "Updating decision trees ..." << std::endl;
193  // std::cout << << "Done" << std::endl;
194 
195  if (verbose_) std::cout << "Planning ..." << std::endl;
197  // std::cout << << "Done" << std::endl;
198 
200  }
201 
202  // ##################################################################
203  /*
204  * @return the id of the action the SDyna instance wish to be performed
205  * @param the state in which we currently are
206  */
207  // ###################################################################
210  return takeAction();
211  }
212 
213  // ###################################################################
214  /*
215  * @return the id of the action the SDyna instance wish to be performed
216  */
217  // ###################################################################
220  if (actionSet.size() == 1) {
221  lastAction__ = actionSet[0];
222  } else {
223  Idx randy = (Idx)((double)std::rand() / (double)RAND_MAX * actionSet.size());
225  }
226  return lastAction__;
227  }
228 
229  // ###################################################################
230  //
231  // ###################################################################
234 
235  description << fmdp_->toString() << std::endl;
237 
238  return description.str();
239  }
240 
241 } // End of namespace gum
INLINE void emplace(Args &&... args)
Definition: set_tpl.h:669