aGrUM  0.20.2
a C++ library for (probabilistic) graphical models
fmdpLearner.h
Go to the documentation of this file.
1 /**
2  *
3  * Copyright 2005-2020 Pierre-Henri WUILLEMIN(@LIP6) & Christophe GONZALES(@AMU)
4  * info_at_agrum_dot_org
5  *
6  * This library is free software: you can redistribute it and/or modify
7  * it under the terms of the GNU Lesser General Public License as published by
8  * the Free Software Foundation, either version 3 of the License, or
9  * (at your option) any later version.
10  *
11  * This library is distributed in the hope that it will be useful,
12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14  * GNU Lesser General Public License for more details.
15  *
16  * You should have received a copy of the GNU Lesser General Public License
17  * along with this library. If not, see <http://www.gnu.org/licenses/>.
18  *
19  */
20 
21 
22 /**
23  * @file
24  * @brief Headers of the FMDPLearner class.
25  *
26  * @author Jean-Christophe MAGNAN
27  */
28 
29 // =========================================================================
30 #ifndef GUM_FMDP_LEARNER_H
31 #define GUM_FMDP_LEARNER_H
32 // =========================================================================
33 #include <agrum/tools/core/hashTable.h>
34 // =========================================================================
35 #include <agrum/FMDP/SDyna/Strategies/ILearningStrategy.h>
36 #include <agrum/FMDP/fmdp.h>
37 #include <agrum/FMDP/learning/datastructure/imddi.h>
38 #include <agrum/FMDP/learning/datastructure/iti.h>
39 #include <agrum/FMDP/learning/observation.h>
40 // =========================================================================
41 #include <agrum/tools/variables/discreteVariable.h>
42 // =========================================================================
43 
44 namespace gum {
45 
46  /**
47  * @class FMDPLearner
48  * @headerfile fmdpLearner.h <agrum/FMDP/learning/fmdpLearner.h>
49  * @brief
50  * @ingroup fmdp_group
51  *
52  *
53  *
54  */
55 
56  template < TESTNAME VariableAttributeSelection,
57  TESTNAME RewardAttributeSelection,
58  LEARNERNAME LearnerSelection >
60  typedef
61  typename LearnerSelect< LearnerSelection,
65 
66  typedef typename LearnerSelect< LearnerSelection,
70 
73 
74  public:
75  // ==========================================================================
76  /// @name Constructor & destructor.
77  // ==========================================================================
78  /// @{
79 
80  // ###################################################################
81  /// Default constructor
82  // ###################################################################
83  FMDPLearner(double learningThreshold,
84  bool actionReward,
85  double similarityThreshold = 0.05);
86 
87  // ###################################################################
88  /// Default destructor
89  // ###################################################################
90  ~FMDPLearner();
91 
92  /// @}
93 
94  // ###################################################################
95  /// @name Initialization
96  // ###################################################################
97  /// @{
98  public:
99  // ==========================================================================
100  /// Initializes the learner
101  // ==========================================================================
102  void initialize(FMDP< double >* fmdp);
103 
104  // ==========================================================================
105  ///
106  // ==========================================================================
108  return instantiateFunctionGraph__(Int2Type< LearnerSelection >());
109  }
110 
111  MultiDimFunctionGraph< double >*
113  return MultiDimFunctionGraph< double, ExactTerminalNodePolicy >::
114  getReducedAndOrderedInstance();
115  }
116 
117  MultiDimFunctionGraph< double >*
119  return MultiDimFunctionGraph< double,
120  ExactTerminalNodePolicy >::getTreeInstance();
121  }
122 
123 
124  // ==========================================================================
125  ///
126  // ==========================================================================
130  const DiscreteVariable* learnedVar) {
131  return instantiateVarLearner__(target,
132  mainVariables,
133  learnedVar,
134  Int2Type< LearnerSelection >());
135  }
136 
141  Int2Type< IMDDILEARNER >) {
142  return new VariableLearnerType(target,
143  learningThreshold__,
144  similarityThreshold__,
145  mainVariables,
146  learnedVar);
147  }
148 
153  Int2Type< ITILEARNER >) {
154  return new VariableLearnerType(target,
155  learningThreshold__,
156  mainVariables,
157  learnedVar);
158  }
159 
160 
161  // ==========================================================================
162  ///
163  // ==========================================================================
166  Set< const DiscreteVariable* >& mainVariables) {
167  return instantiateRewardLearner__(target,
168  mainVariables,
169  Int2Type< LearnerSelection >());
170  }
171 
175  Int2Type< IMDDILEARNER >) {
176  return new RewardLearnerType(target,
177  learningThreshold__,
178  similarityThreshold__,
179  mainVariables);
180  }
181 
185  Int2Type< ITILEARNER >) {
186  return new RewardLearnerType(target, learningThreshold__, mainVariables);
187  }
188 
189  /// @}
190 
191 
192  // ###################################################################
193  /// @name Incremental methods
194  // ###################################################################
195  /// @{
196  public:
197  // ==========================================================================
198  /**
199  * Gives to the learner a new transition
200  * @param actionId : the action on which the transition was made
201  * @param obs : the observed transition
202  * @return true if learning this transition implies structural changes
203  * (can trigger a new planning)
204  */
205  // ==========================================================================
206  bool addObservation(Idx actionId, const Observation* obs);
207 
208 
209  // ==========================================================================
210  /**
211  * Starts an update of datastructure in the associated FMDP
212  */
213  // ==========================================================================
214  void updateFMDP();
215 
216  /// @}
217 
218 
219  // ###################################################################
220  /// @name Miscelleanous methods
221  // ###################################################################
222  /// @{
223  public:
224  // ==========================================================================
225  /**
226  * @brief learnerSize
227  * @return
228  */
229  // ==========================================================================
230  Size size();
231 
232  // ==========================================================================
233  /// \brief extractCount
234  // ==========================================================================
236  const DiscreteVariable* var) const {
237  return actionLearners__[actionId]->getWithDefault(var, nullptr);
238  }
239 
240  virtual double rMax() const { return rmax__; }
241 
242  private:
243  double rmax__;
244 
245  public:
246  virtual double modaMax() const { return modaMax__; }
247 
248  private:
249  double modaMax__;
250 
251  /// @}
252 
253 
254  private:
255  /// The FMDP to store the learned model
256  FMDP< double >* fmdp__;
257 
259 
263 
264  const double learningThreshold__;
265  const double similarityThreshold__;
266  };
267 
268 
269 } /* namespace gum */
270 
271 #include <agrum/FMDP/learning/fmdpLearner_tpl.h>
272 
273 #endif // GUM_FMDP_LEARNER_H
RewardLearnerType * rewardLearner__
Definition: fmdpLearner.h:262
void initialize(FMDP< double > *fmdp)
Initializes the learner.
INLINE void emplace(Args &&... args)
Definition: set_tpl.h:669
MultiDimFunctionGraph< double > * instantiateFunctionGraph__(Int2Type< ITILEARNER >)
Initializes the learner.
Definition: fmdpLearner.h:118
const IVisitableGraphLearner * varLearner(Idx actionId, const DiscreteVariable *var) const
extractCount
Definition: fmdpLearner.h:235
VariableLearnerType * instantiateVarLearner__(MultiDimFunctionGraph< double > *target, Set< const DiscreteVariable * > &mainVariables, const DiscreteVariable *learnedVar)
Initializes the learner.
Definition: fmdpLearner.h:128
HashTable< Idx, VarLearnerTable *> actionLearners__
Definition: fmdpLearner.h:258
FMDPLearner(double learningThreshold, bool actionReward, double similarityThreshold=0.05)
Default constructor.
double modaMax__
learnerSize
Definition: fmdpLearner.h:249
~FMDPLearner()
Default destructor.
VariableLearnerType * instantiateVarLearner__(MultiDimFunctionGraph< double > *target, Set< const DiscreteVariable * > &mainVariables, const DiscreteVariable *learnedVar, Int2Type< ITILEARNER >)
Initializes the learner.
Definition: fmdpLearner.h:150
bool addObservation(Idx actionId, const Observation *obs)
Gives to the learner a new transition.
virtual double modaMax() const
learnerSize
Definition: fmdpLearner.h:246
RewardLearnerType * instantiateRewardLearner__(MultiDimFunctionGraph< double > *target, Set< const DiscreteVariable * > &mainVariables, Int2Type< ITILEARNER >)
Initializes the learner.
Definition: fmdpLearner.h:183
Size size()
learnerSize
HashTable< const DiscreteVariable *, VariableLearnerType *> VarLearnerTable
Definition: fmdpLearner.h:72
MultiDimFunctionGraph< double > * instantiateFunctionGraph__()
Initializes the learner.
Definition: fmdpLearner.h:107
RewardLearnerType * instantiateRewardLearner__(MultiDimFunctionGraph< double > *target, Set< const DiscreteVariable * > &mainVariables)
Initializes the learner.
Definition: fmdpLearner.h:165
LearnerSelect< LearnerSelection, IMDDI< RewardAttributeSelection, true >, ITI< RewardAttributeSelection, true > >::type RewardLearnerType
Definition: fmdpLearner.h:69
double rmax__
learnerSize
Definition: fmdpLearner.h:243
void updateFMDP()
Starts an update of datastructure in the associated FMDP.
const double similarityThreshold__
Definition: fmdpLearner.h:265
virtual double rMax() const
learnerSize
Definition: fmdpLearner.h:240
FMDP< double > * fmdp__
The FMDP to store the learned model.
Definition: fmdpLearner.h:256
const double learningThreshold__
Definition: fmdpLearner.h:264
HashTable< Idx, RewardLearnerType *> actionRewardLearners__
Definition: fmdpLearner.h:261