aGrUM  0.14.2
fmdpLearner.h
Go to the documentation of this file.
1 /***************************************************************************
2  * Copyright (C) 2005 by Christophe GONZALES and Pierre-Henri WUILLEMIN *
3  * {prenom.nom}_at_lip6.fr *
4  * *
5  * This program is free software; you can redistribute it and/or modify *
6  * it under the terms of the GNU General Public License as published by *
7  * the Free Software Foundation; either version 2 of the License, or *
8  * (at your option) any later version. *
9  * *
10  * This program is distributed in the hope that it will be useful, *
11  * but WITHOUT ANY WARRANTY; without even the implied warranty of *
12  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the *
13  * GNU General Public License for more details. *
14  * *
15  * You should have received a copy of the GNU General Public License *
16  * along with this program; if not, write to the *
17  * Free Software Foundation, Inc., *
18  * 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. *
19  ***************************************************************************/
27 // =========================================================================
28 #ifndef GUM_FMDP_LEARNER_H
29 #define GUM_FMDP_LEARNER_H
30 // =========================================================================
31 #include <agrum/core/hashTable.h>
32 // =========================================================================
34 #include <agrum/FMDP/fmdp.h>
38 // =========================================================================
40 // =========================================================================
41 
42 namespace gum {
43 
54  template < TESTNAME VariableAttributeSelection,
55  TESTNAME RewardAttributeSelection,
56  LEARNERNAME LearnerSelection >
57  class FMDPLearner : public ILearningStrategy {
58  typedef
59  typename LearnerSelect< LearnerSelection,
63 
64  typedef typename LearnerSelect< LearnerSelection,
68 
71 
72  public:
73  // ==========================================================================
75  // ==========================================================================
77 
78  // ###################################################################
80  // ###################################################################
81  FMDPLearner(double learningThreshold,
82  bool actionReward,
83  double similarityThreshold = 0.05);
84 
85  // ###################################################################
87  // ###################################################################
88  ~FMDPLearner();
89 
91 
92  // ###################################################################
94  // ###################################################################
96  public:
97  // ==========================================================================
99  // ==========================================================================
100  void initialize(FMDP< double >* fmdp);
101 
102  // ==========================================================================
104  // ==========================================================================
107  }
108 
113  }
114 
118  ExactTerminalNodePolicy >::getTreeInstance();
119  }
120 
121 
122  // ==========================================================================
124  // ==========================================================================
127  Set< const DiscreteVariable* >& mainVariables,
128  const DiscreteVariable* learnedVar) {
130  target, mainVariables, learnedVar, Int2Type< LearnerSelection >());
131  }
132 
135  Set< const DiscreteVariable* >& mainVariables,
136  const DiscreteVariable* learnedVar,
138  return new VariableLearnerType(target,
141  mainVariables,
142  learnedVar);
143  }
144 
147  Set< const DiscreteVariable* >& mainVariables,
148  const DiscreteVariable* learnedVar,
150  return new VariableLearnerType(
151  target, __learningThreshold, mainVariables, learnedVar);
152  }
153 
154 
155  // ==========================================================================
157  // ==========================================================================
160  Set< const DiscreteVariable* >& mainVariables) {
162  target, mainVariables, Int2Type< LearnerSelection >());
163  }
164 
167  Set< const DiscreteVariable* >& mainVariables,
169  return new RewardLearnerType(
170  target, __learningThreshold, __similarityThreshold, mainVariables);
171  }
172 
175  Set< const DiscreteVariable* >& mainVariables,
177  return new RewardLearnerType(target, __learningThreshold, mainVariables);
178  }
179 
181 
182 
183  // ###################################################################
185  // ###################################################################
187  public:
188  // ==========================================================================
196  // ==========================================================================
197  bool addObservation(Idx actionId, const Observation* obs);
198 
199 
200  // ==========================================================================
204  // ==========================================================================
205  void updateFMDP();
206 
208 
209 
210  // ###################################################################
212  // ###################################################################
214  public:
215  // ==========================================================================
220  // ==========================================================================
221  Size size();
222 
223  // ==========================================================================
225  // ==========================================================================
227  const DiscreteVariable* var) const {
228  return __actionLearners[actionId]->getWithDefault(var, nullptr);
229  }
230 
231  virtual double rMax() const { return __rmax; }
232 
233  private:
234  double __rmax;
235 
236  public:
237  virtual double modaMax() const { return __modaMax; }
238 
239  private:
240  double __modaMax;
241 
243 
244 
245  private:
248 
250 
254 
255  const double __learningThreshold;
256  const double __similarityThreshold;
257  };
258 
259 
260 } /* namespace gum */
261 
263 
264 #endif // GUM_FMDP_LEARNER_H
MultiDimFunctionGraph< double > * __instantiateFunctionGraph(Int2Type< IMDDILEARNER >)
Initializes the learner.
Definition: fmdpLearner.h:110
Learn a graphical representation of a function as a decision tree.
Definition: iti.h:59
Base class for discrete random variable.
MultiDimFunctionGraph< double > * __instantiateFunctionGraph()
Initializes the learner.
Definition: fmdpLearner.h:105
void initialize(FMDP< double > *fmdp)
Initializes the learner.
Headers of the Learning Strategy interface.
Template Implementations of the FMDPLearner class.
HashTable< Idx, VarLearnerTable *> __actionLearners
Definition: fmdpLearner.h:249
Headers of the ITI class.
const IVisitableGraphLearner * varLearner(Idx actionId, const DiscreteVariable *var) const
extractCount
Definition: fmdpLearner.h:226
<agrum/FMDP/SDyna/IVisitableGraphLearner.h>
LearnerSelect< LearnerSelection, IMDDI< VariableAttributeSelection, false >, ITI< VariableAttributeSelection, false > >::type VariableLearnerType
Definition: fmdpLearner.h:62
FMDPLearner(double learningThreshold, bool actionReward, double similarityThreshold=0.05)
Default constructor.
Base class for discrete random variable.
gum is the global namespace for all aGrUM entities
Definition: agrum.h:25
~FMDPLearner()
Default destructor.
bool addObservation(Idx actionId, const Observation *obs)
Gives to the learner a new transition.
The class for generic Hash Tables.
Definition: hashTable.h:676
const double __learningThreshold
Definition: fmdpLearner.h:255
virtual double modaMax() const
learnerSize
Definition: fmdpLearner.h:237
double __rmax
learnerSize
Definition: fmdpLearner.h:234
FMDP< double > * __fmdp
The FMDP to store the learned model.
Definition: fmdpLearner.h:247
RewardLearnerType * __instantiateRewardLearner(MultiDimFunctionGraph< double > *target, Set< const DiscreteVariable * > &mainVariables)
Initializes the learner.
Definition: fmdpLearner.h:159
Representation of a setA Set is a structure that contains arbitrary elements.
Definition: set.h:162
Class for implementation of factored markov decision process.
Size size()
learnerSize
<agrum/FMDP/SDyna/ILearningStrategy.h>
HashTable< const DiscreteVariable *, VariableLearnerType *> VarLearnerTable
Definition: fmdpLearner.h:70
HashTable< Idx, RewardLearnerType *> __actionRewardLearners
Definition: fmdpLearner.h:252
Headers of the Observation class.
RewardLearnerType * __instantiateRewardLearner(MultiDimFunctionGraph< double > *target, Set< const DiscreteVariable * > &mainVariables, Int2Type< ITILEARNER >)
Initializes the learner.
Definition: fmdpLearner.h:174
LearnerSelect< LearnerSelection, IMDDI< RewardAttributeSelection, true >, ITI< RewardAttributeSelection, true > >::type RewardLearnerType
Definition: fmdpLearner.h:67
void updateFMDP()
Starts an update of datastructure in the associated FMDP.
virtual double rMax() const
learnerSize
Definition: fmdpLearner.h:231
VariableLearnerType * __instantiateVarLearner(MultiDimFunctionGraph< double > *target, Set< const DiscreteVariable * > &mainVariables, const DiscreteVariable *learnedVar, Int2Type< IMDDILEARNER >)
Initializes the learner.
Definition: fmdpLearner.h:134
VariableLearnerType * __instantiateVarLearner(MultiDimFunctionGraph< double > *target, Set< const DiscreteVariable * > &mainVariables, const DiscreteVariable *learnedVar)
Initializes the learner.
Definition: fmdpLearner.h:126
Size Idx
Type for indexes.
Definition: types.h:50
double __modaMax
learnerSize
Definition: fmdpLearner.h:240
RewardLearnerType * __instantiateRewardLearner(MultiDimFunctionGraph< double > *target, Set< const DiscreteVariable * > &mainVariables, Int2Type< IMDDILEARNER >)
Initializes the learner.
Definition: fmdpLearner.h:166
Implementation of a Terminal Node Policy that maps nodeid directly to value.
std::size_t Size
In aGrUM, hashed values are unsigned long int.
Definition: types.h:45
Headers of the IMDDI class.
MultiDimFunctionGraph< double > * __instantiateFunctionGraph(Int2Type< ITILEARNER >)
Initializes the learner.
Definition: fmdpLearner.h:116
RewardLearnerType * __rewardLearner
Definition: fmdpLearner.h:253
const double __similarityThreshold
Definition: fmdpLearner.h:256
VariableLearnerType * __instantiateVarLearner(MultiDimFunctionGraph< double > *target, Set< const DiscreteVariable * > &mainVariables, const DiscreteVariable *learnedVar, Int2Type< ITILEARNER >)
Initializes the learner.
Definition: fmdpLearner.h:146
Class hash tables iterators.
static MultiDimFunctionGraph< GUM_SCALAR, TerminalNodePolicy > * getReducedAndOrderedInstance()
Returns a reduced and ordered instance.