aGrUM  0.20.3
a C++ library for (probabilistic) graphical models
sdyna.cpp
Go to the documentation of this file.
1 /**
2  *
3  * Copyright (c) 2005-2021 by Pierre-Henri WUILLEMIN(@LIP6) & Christophe GONZALES(@AMU)
4  * info_at_agrum_dot_org
5  *
6  * This library is free software: you can redistribute it and/or modify
7  * it under the terms of the GNU Lesser General Public License as published by
8  * the Free Software Foundation, either version 3 of the License, or
9  * (at your option) any later version.
10  *
11  * This library is distributed in the hope that it will be useful,
12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14  * GNU Lesser General Public License for more details.
15  *
16  * You should have received a copy of the GNU Lesser General Public License
17  * along with this library. If not, see <http://www.gnu.org/licenses/>.
18  *
19  */
20 
21 
22 /**
23  * @file
24  * @brief Headers of the ModelLearner class.
25  *
26  * @author Pierre-Henri WUILLEMIN(@LIP6) and Jean-Christophe MAGNAN and Christophe
27  * GONZALES(@AMU)
28  */
29 
30 
31 // =========================================================================
32 #include <cstdlib>
33 #include <random>
34 // =========================================================================
35 #include <agrum/FMDP/SDyna/sdyna.h>
36 // =========================================================================
37 
38 namespace gum {
39 
40  // ==========================================================================
41  // Constructor & destructor.
42  // ==========================================================================
43 
44  // ###################################################################
45  /*
46  * Constructor
47  *
48  * @param observationPhaseLenght : the number of observation done before a
49  * replanning is launch. If equals 0, a planning is done after each structural
50  * change.
51  * @param nbValueIterationStep : the number of value iteration done during
52  * one planning
53  * @return an instance of SDyna architecture
54  */
55  // ###################################################################
56 
58  IPlanningStrategy< double >* planer,
62  bool actionReward,
63  bool verbose) :
67  verbose_(verbose) {
69 
70  fmdp_ = new FMDP< double >();
71 
72  _nbObservation_ = 1;
73  }
74 
75  // ###################################################################
76  // Destructor
77  // ###################################################################
78  SDYNA::~SDYNA() {
79  delete _decider_;
80 
81  delete _learner_;
82 
83  delete _planer_;
84 
85  for (auto obsIter = _bin_.beginSafe(); obsIter != _bin_.endSafe(); ++obsIter)
86  delete *obsIter;
87 
88  delete fmdp_;
89 
91  }
92 
93  // ==========================================================================
94  // Initialization
95  // ==========================================================================
96 
97  void SDYNA::initialize() {
101  }
102 
103  // ###################################################################
104  /*
105  * Initializes the Sdyna instance.
106  * @param initialState : the state of the studied system from which we will
107  * begin the explore, learn and exploit process
108  */
109  // ###################################################################
111  initialize();
113  }
114 
115  // ==========================================================================
116  /// Incremental methods
117  // ==========================================================================
118 
119  // ###################################################################
120  /*
121  * Performs a feedback on the last transition.
122  * In extenso, learn from the transition.
123  * @param originalState : the state we were in before the transition
124  * @param reachedState : the state we reached after
125  * @param performedAction : the action we performed
126  * @param obtainedReward : the reward we obtained
127  */
128  // ###################################################################
130  const Instantiation& prevState,
131  Idx lastAction,
132  double reward) {
136  }
137 
138  // ###################################################################
139  /*
140  * Performs a feedback on the last transition.
141  * In extenso, learn from the transition.
142  * @param reachedState : the state reached after the transition
143  * @param obtainedReward : the reward obtained during the transition
144  * @warning Uses the _originalState_ and _performedAction_ stored in cache
145  * If you want to specify the original state and the performed action, see
146  * below
147  */
148  // ###################################################################
149  void SDYNA::feedback(const Instantiation& newState, double reward) {
150  Observation* obs = new Observation();
151 
154  ++varIter)
156 
159  ++varIter) {
161 
162  if (this->_actionReward_)
164  else
166  }
167 
168  obs->setReward(reward);
169 
171  _bin_.insert(obs);
172 
175 
177 
178  _nbObservation_++;
179  }
180 
181  // ###################################################################
182  /*
183  * Starts a new planning
184  * @param Idx : the maximal number of value iteration performed in this
185  * planning
186  */
187  // ###################################################################
189  if (verbose_) std::cout << "Updating decision trees ..." << std::endl;
191  // std::cout << << "Done" << std::endl;
192 
193  if (verbose_) std::cout << "Planning ..." << std::endl;
195  // std::cout << << "Done" << std::endl;
196 
198  }
199 
200  // ##################################################################
201  /*
202  * @return the id of the action the SDyna instance wish to be performed
203  * @param the state in which we currently are
204  */
205  // ###################################################################
208  return takeAction();
209  }
210 
211  // ###################################################################
212  /*
213  * @return the id of the action the SDyna instance wish to be performed
214  */
215  // ###################################################################
218  if (actionSet.size() == 1) {
219  _lastAction_ = actionSet[0];
220  } else {
221  Idx randy = (Idx)((double)std::rand() / (double)RAND_MAX * actionSet.size());
223  }
224  return _lastAction_;
225  }
226 
227  // ###################################################################
228  //
229  // ###################################################################
232 
233  description << fmdp_->toString() << std::endl;
235 
236  return description.str();
237  }
238 
239 } // End of namespace gum
INLINE void emplace(Args &&... args)
Definition: set_tpl.h:643