aGrUM  0.20.3
a C++ library for (probabilistic) graphical models
fmdpLearner.h
Go to the documentation of this file.
1 /**
2  *
3  * Copyright (c) 2005-2021 by Pierre-Henri WUILLEMIN(@LIP6) & Christophe GONZALES(@AMU)
4  * info_at_agrum_dot_org
5  *
6  * This library is free software: you can redistribute it and/or modify
7  * it under the terms of the GNU Lesser General Public License as published by
8  * the Free Software Foundation, either version 3 of the License, or
9  * (at your option) any later version.
10  *
11  * This library is distributed in the hope that it will be useful,
12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14  * GNU Lesser General Public License for more details.
15  *
16  * You should have received a copy of the GNU Lesser General Public License
17  * along with this library. If not, see <http://www.gnu.org/licenses/>.
18  *
19  */
20 
21 
22 /**
23  * @file
24  * @brief Headers of the FMDPLearner class.
25  *
26  * @author Jean-Christophe MAGNAN
27  */
28 
29 // =========================================================================
30 #ifndef GUM_FMDP_LEARNER_H
31 #define GUM_FMDP_LEARNER_H
32 // =========================================================================
33 #include <agrum/tools/core/hashTable.h>
34 // =========================================================================
35 #include <agrum/FMDP/SDyna/Strategies/ILearningStrategy.h>
36 #include <agrum/FMDP/fmdp.h>
37 #include <agrum/FMDP/learning/datastructure/imddi.h>
38 #include <agrum/FMDP/learning/datastructure/iti.h>
39 #include <agrum/FMDP/learning/observation.h>
40 // =========================================================================
41 #include <agrum/tools/variables/discreteVariable.h>
42 // =========================================================================
43 
44 namespace gum {
45 
46  /**
47  * @class FMDPLearner
48  * @headerfile fmdpLearner.h <agrum/FMDP/learning/fmdpLearner.h>
49  * @brief
50  * @ingroup fmdp_group
51  *
52  *
53  *
54  */
55 
56  template < TESTNAME VariableAttributeSelection,
57  TESTNAME RewardAttributeSelection,
58  LEARNERNAME LearnerSelection >
60  typedef
61  typename LearnerSelect< LearnerSelection,
64 
65  typedef typename LearnerSelect< LearnerSelection,
68 
70 
71  public:
72  // ==========================================================================
73  /// @name Constructor & destructor.
74  // ==========================================================================
75  /// @{
76 
77  // ###################################################################
78  /// Default constructor
79  // ###################################################################
80  FMDPLearner(double learningThreshold, bool actionReward, double similarityThreshold = 0.05);
81 
82  // ###################################################################
83  /// Default destructor
84  // ###################################################################
85  ~FMDPLearner();
86 
87  /// @}
88 
89  // ###################################################################
90  /// @name Initialization
91  // ###################################################################
92  /// @{
93  public:
94  // ==========================================================================
95  /// Initializes the learner
96  // ==========================================================================
97  void initialize(FMDP< double >* fmdp);
98 
99  // ==========================================================================
100  ///
101  // ==========================================================================
103  return _instantiateFunctionGraph_(Int2Type< LearnerSelection >());
104  }
105 
107  return MultiDimFunctionGraph< double,
108  ExactTerminalNodePolicy >::getReducedAndOrderedInstance();
109  }
110 
112  return MultiDimFunctionGraph< double, ExactTerminalNodePolicy >::getTreeInstance();
113  }
114 
115 
116  // ==========================================================================
117  ///
118  // ==========================================================================
121  const DiscreteVariable* learnedVar) {
122  return _instantiateVarLearner_(target,
123  mainVariables,
124  learnedVar,
125  Int2Type< LearnerSelection >());
126  }
127 
131  Int2Type< IMDDILEARNER >) {
132  return new VariableLearnerType(target,
133  _learningThreshold_,
134  _similarityThreshold_,
135  mainVariables,
136  learnedVar);
137  }
138 
142  Int2Type< ITILEARNER >) {
143  return new VariableLearnerType(target, _learningThreshold_, mainVariables, learnedVar);
144  }
145 
146 
147  // ==========================================================================
148  ///
149  // ==========================================================================
151  Set< const DiscreteVariable* >& mainVariables) {
152  return _instantiateRewardLearner_(target, mainVariables, Int2Type< LearnerSelection >());
153  }
154 
157  Int2Type< IMDDILEARNER >) {
158  return new RewardLearnerType(target,
159  _learningThreshold_,
160  _similarityThreshold_,
161  mainVariables);
162  }
163 
166  Int2Type< ITILEARNER >) {
167  return new RewardLearnerType(target, _learningThreshold_, mainVariables);
168  }
169 
170  /// @}
171 
172 
173  // ###################################################################
174  /// @name Incremental methods
175  // ###################################################################
176  /// @{
177  public:
178  // ==========================================================================
179  /**
180  * Gives to the learner a new transition
181  * @param actionId : the action on which the transition was made
182  * @param obs : the observed transition
183  * @return true if learning this transition implies structural changes
184  * (can trigger a new planning)
185  */
186  // ==========================================================================
187  bool addObservation(Idx actionId, const Observation* obs);
188 
189 
190  // ==========================================================================
191  /**
192  * Starts an update of datastructure in the associated FMDP
193  */
194  // ==========================================================================
195  void updateFMDP();
196 
197  /// @}
198 
199 
200  // ###################################################################
201  /// @name Miscelleanous methods
202  // ###################################################################
203  /// @{
204  public:
205  // ==========================================================================
206  /**
207  * @brief learnerSize
208  * @return
209  */
210  // ==========================================================================
211  Size size();
212 
213  // ==========================================================================
214  /// \brief extractCount
215  // ==========================================================================
217  return _actionLearners_[actionId]->getWithDefault(var, nullptr);
218  }
219 
220  virtual double rMax() const { return _rmax_; }
221 
222  private:
223  double _rmax_;
224 
225  public:
226  virtual double modaMax() const { return _modaMax_; }
227 
228  private:
229  double _modaMax_;
230 
231  /// @}
232 
233 
234  private:
235  /// The FMDP to store the learned model
236  FMDP< double >* _fmdp_;
237 
239 
243 
244  const double _learningThreshold_;
245  const double _similarityThreshold_;
246  };
247 
248 
249 } /* namespace gum */
250 
251 #include <agrum/FMDP/learning/fmdpLearner_tpl.h>
252 
253 #endif // GUM_FMDP_LEARNER_H
MultiDimFunctionGraph< double > * _instantiateFunctionGraph_(Int2Type< ITILEARNER >)
Initializes the learner.
Definition: fmdpLearner.h:111
HashTable< Idx, VarLearnerTable *> _actionLearners_
Definition: fmdpLearner.h:238
void initialize(FMDP< double > *fmdp)
Initializes the learner.
INLINE void emplace(Args &&... args)
Definition: set_tpl.h:643
const IVisitableGraphLearner * varLearner(Idx actionId, const DiscreteVariable *var) const
extractCount
Definition: fmdpLearner.h:216
double _rmax_
learnerSize
Definition: fmdpLearner.h:223
FMDP< double > * _fmdp_
The FMDP to store the learned model.
Definition: fmdpLearner.h:236
FMDPLearner(double learningThreshold, bool actionReward, double similarityThreshold=0.05)
Default constructor.
VariableLearnerType * _instantiateVarLearner_(MultiDimFunctionGraph< double > *target, Set< const DiscreteVariable * > &mainVariables, const DiscreteVariable *learnedVar, Int2Type< ITILEARNER >)
Initializes the learner.
Definition: fmdpLearner.h:139
RewardLearnerType * _instantiateRewardLearner_(MultiDimFunctionGraph< double > *target, Set< const DiscreteVariable * > &mainVariables, Int2Type< ITILEARNER >)
Initializes the learner.
Definition: fmdpLearner.h:164
~FMDPLearner()
Default destructor.
VariableLearnerType * _instantiateVarLearner_(MultiDimFunctionGraph< double > *target, Set< const DiscreteVariable * > &mainVariables, const DiscreteVariable *learnedVar)
Initializes the learner.
Definition: fmdpLearner.h:119
double _modaMax_
learnerSize
Definition: fmdpLearner.h:229
bool addObservation(Idx actionId, const Observation *obs)
Gives to the learner a new transition.
virtual double modaMax() const
learnerSize
Definition: fmdpLearner.h:226
RewardLearnerType * _instantiateRewardLearner_(MultiDimFunctionGraph< double > *target, Set< const DiscreteVariable * > &mainVariables)
Initializes the learner.
Definition: fmdpLearner.h:150
const double _similarityThreshold_
Definition: fmdpLearner.h:245
Size size()
learnerSize
HashTable< const DiscreteVariable *, VariableLearnerType *> VarLearnerTable
Definition: fmdpLearner.h:69
HashTable< Idx, RewardLearnerType *> _actionRewardLearners_
Definition: fmdpLearner.h:241
LearnerSelect< LearnerSelection, IMDDI< RewardAttributeSelection, true >, ITI< RewardAttributeSelection, true > >::type RewardLearnerType
Definition: fmdpLearner.h:67
void updateFMDP()
Starts an update of datastructure in the associated FMDP.
virtual double rMax() const
learnerSize
Definition: fmdpLearner.h:220
MultiDimFunctionGraph< double > * _instantiateFunctionGraph_()
Initializes the learner.
Definition: fmdpLearner.h:102
const double _learningThreshold_
Definition: fmdpLearner.h:244
RewardLearnerType * _rewardLearner_
Definition: fmdpLearner.h:242