aGrUM
0.20.2
a C++ library for (probabilistic) graphical models
fmdpLearner_tpl.h
Go to the documentation of this file.
1
/**
2
*
3
* Copyright 2005-2020 Pierre-Henri WUILLEMIN(@LIP6) & Christophe GONZALES(@AMU)
4
* info_at_agrum_dot_org
5
*
6
* This library is free software: you can redistribute it and/or modify
7
* it under the terms of the GNU Lesser General Public License as published by
8
* the Free Software Foundation, either version 3 of the License, or
9
* (at your option) any later version.
10
*
11
* This library is distributed in the hope that it will be useful,
12
* but WITHOUT ANY WARRANTY; without even the implied warranty of
13
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14
* GNU Lesser General Public License for more details.
15
*
16
* You should have received a copy of the GNU Lesser General Public License
17
* along with this library. If not, see <http://www.gnu.org/licenses/>.
18
*
19
*/
20
21
22
/**
23
* @file
24
* @brief Template Implementations of the FMDPLearner class.
25
*
26
* @author Jean-Christophe MAGNAN
27
*/
28
29
// =========================================================================
30
#
include
<
agrum
/
FMDP
/
learning
/
fmdpLearner
.
h
>
31
// =========================================================================
32
33
namespace
gum
{
34
35
// ==========================================================================
36
// Constructor & destructor.
37
// ==========================================================================
38
39
// ###################################################################
40
// Default constructor
41
// ###################################################################
42
template
< TESTNAME VariableAttributeSelection,
43
TESTNAME RewardAttributeSelection,
44
LEARNERNAME LearnerSelection >
45
FMDPLearner< VariableAttributeSelection,
46
RewardAttributeSelection,
47
LearnerSelection >::FMDPLearner(
double
lT,
48
bool
actionReward,
49
double
sT) :
50
actionReward__(actionReward),
51
learningThreshold__(lT), similarityThreshold__(sT) {
52
GUM_CONSTRUCTOR(FMDPLearner);
53
rewardLearner__ =
nullptr
;
54
}
55
56
57
// ###################################################################
58
// Default destructor
59
// ###################################################################
60
template
<
TESTNAME
VariableAttributeSelection
,
61
TESTNAME
RewardAttributeSelection
,
62
LEARNERNAME
LearnerSelection
>
63
FMDPLearner
<
VariableAttributeSelection
,
64
RewardAttributeSelection
,
65
LearnerSelection
>::~
FMDPLearner
() {
66
for
(
auto
actionIter
=
actionLearners__
.
beginSafe
();
67
actionIter
!=
actionLearners__
.
endSafe
();
68
++
actionIter
) {
69
for
(
auto
learnerIter
=
actionIter
.
val
()->
beginSafe
();
70
learnerIter
!=
actionIter
.
val
()->
endSafe
();
71
++
learnerIter
)
72
delete
learnerIter
.
val
();
73
delete
actionIter
.
val
();
74
if
(
actionRewardLearners__
.
exists
(
actionIter
.
key
()))
75
delete
actionRewardLearners__
[
actionIter
.
key
()];
76
}
77
78
if
(
rewardLearner__
)
delete
rewardLearner__
;
79
80
GUM_DESTRUCTOR
(
FMDPLearner
);
81
}
82
83
84
// ==========================================================================
85
//
86
// ==========================================================================
87
88
// ###################################################################
89
//
90
// ###################################################################
91
template
<
TESTNAME
VariableAttributeSelection
,
92
TESTNAME
RewardAttributeSelection
,
93
LEARNERNAME
LearnerSelection
>
94
void
FMDPLearner
<
VariableAttributeSelection
,
95
RewardAttributeSelection
,
96
LearnerSelection
>::
initialize
(
FMDP
<
double
>*
fmdp
) {
97
fmdp__
=
fmdp
;
98
99
modaMax__
= 0;
100
rmax__
= 0.0;
101
102
Set
<
const
DiscreteVariable
* >
mainVariables
;
103
for
(
auto
varIter
=
fmdp__
->
beginVariables
();
104
varIter
!=
fmdp__
->
endVariables
();
105
++
varIter
) {
106
mainVariables
.
insert
(*
varIter
);
107
modaMax__
=
modaMax__
< (*
varIter
)->
domainSize
() ? (*
varIter
)->
domainSize
()
108
:
modaMax__
;
109
}
110
111
for
(
auto
actionIter
=
fmdp__
->
beginActions
();
112
actionIter
!=
fmdp__
->
endActions
();
113
++
actionIter
) {
114
// Adding a Hashtable for the action
115
actionLearners__
.
insert
(*
actionIter
,
new
VarLearnerTable
());
116
117
// Adding a learner for each variable
118
for
(
auto
varIter
=
fmdp__
->
beginVariables
();
119
varIter
!=
fmdp__
->
endVariables
();
120
++
varIter
) {
121
MultiDimFunctionGraph
<
double
>*
varTrans
=
instantiateFunctionGraph__
();
122
varTrans
->
setTableName
(
"ACTION : "
+
fmdp__
->
actionName
(*
actionIter
)
123
+
" - VARIABLE : "
+ (*
varIter
)->
name
());
124
fmdp__
->
addTransitionForAction
(*
actionIter
, *
varIter
,
varTrans
);
125
actionLearners__
[*
actionIter
]->
insert
(
126
(*
varIter
),
127
instantiateVarLearner__
(
varTrans
,
128
mainVariables
,
129
fmdp__
->
main2prime
(*
varIter
)));
130
}
131
132
if
(
actionReward__
) {
133
MultiDimFunctionGraph
<
double
>*
reward
=
instantiateFunctionGraph__
();
134
reward
->
setTableName
(
"REWARD - ACTION : "
135
+
fmdp__
->
actionName
(*
actionIter
));
136
fmdp__
->
addRewardForAction
(*
actionIter
,
reward
);
137
actionRewardLearners__
.
insert
(
138
*
actionIter
,
139
instantiateRewardLearner__
(
reward
,
mainVariables
));
140
}
141
}
142
143
if
(!
actionReward__
) {
144
MultiDimFunctionGraph
<
double
>*
reward
=
instantiateFunctionGraph__
();
145
reward
->
setTableName
(
"REWARD"
);
146
fmdp__
->
addReward
(
reward
);
147
rewardLearner__
=
instantiateRewardLearner__
(
reward
,
mainVariables
);
148
}
149
}
150
151
// ###################################################################
152
//
153
// ###################################################################
154
template
<
TESTNAME
VariableAttributeSelection
,
155
TESTNAME
RewardAttributeSelection
,
156
LEARNERNAME
LearnerSelection
>
157
bool
FMDPLearner
<
VariableAttributeSelection
,
158
RewardAttributeSelection
,
159
LearnerSelection
>::
addObservation
(
Idx
actionId
,
160
const
Observation
*
newObs
) {
161
for
(
SequenceIteratorSafe
<
const
DiscreteVariable
* >
varIter
162
=
fmdp__
->
beginVariables
();
163
varIter
!=
fmdp__
->
endVariables
();
164
++
varIter
) {
165
actionLearners__
[
actionId
]
166
->
getWithDefault
(*
varIter
,
nullptr
)
167
->
addObservation
(
newObs
);
168
actionLearners__
[
actionId
]->
getWithDefault
(*
varIter
,
nullptr
)->
updateGraph
();
169
}
170
171
if
(
actionReward__
) {
172
actionRewardLearners__
[
actionId
]->
addObservation
(
newObs
);
173
actionRewardLearners__
[
actionId
]->
updateGraph
();
174
}
else
{
175
rewardLearner__
->
addObservation
(
newObs
);
176
rewardLearner__
->
updateGraph
();
177
}
178
179
rmax__
180
=
rmax__
<
std
::
abs
(
newObs
->
reward
()) ?
std
::
abs
(
newObs
->
reward
()) :
rmax__
;
181
182
return
false
;
183
}
184
185
// ###################################################################
186
//
187
// ###################################################################
188
template
<
TESTNAME
VariableAttributeSelection
,
189
TESTNAME
RewardAttributeSelection
,
190
LEARNERNAME
LearnerSelection
>
191
Size
FMDPLearner
<
VariableAttributeSelection
,
192
RewardAttributeSelection
,
193
LearnerSelection
>::
size
() {
194
Size
s
= 0;
195
for
(
SequenceIteratorSafe
<
Idx
>
actionIter
=
fmdp__
->
beginActions
();
196
actionIter
!=
fmdp__
->
endActions
();
197
++
actionIter
) {
198
for
(
SequenceIteratorSafe
<
const
DiscreteVariable
* >
varIter
199
=
fmdp__
->
beginVariables
();
200
varIter
!=
fmdp__
->
endVariables
();
201
++
varIter
)
202
s
+=
actionLearners__
[*
actionIter
]
203
->
getWithDefault
(*
varIter
,
nullptr
)
204
->
size
();
205
if
(
actionReward__
)
s
+=
actionRewardLearners__
[*
actionIter
]->
size
();
206
}
207
208
if
(!
actionReward__
)
s
+=
rewardLearner__
->
size
();
209
210
return
s
;
211
}
212
213
214
// ###################################################################
215
//
216
// ###################################################################
217
template
<
TESTNAME
VariableAttributeSelection
,
218
TESTNAME
RewardAttributeSelection
,
219
LEARNERNAME
LearnerSelection
>
220
void
FMDPLearner
<
VariableAttributeSelection
,
221
RewardAttributeSelection
,
222
LearnerSelection
>::
updateFMDP
() {
223
for
(
SequenceIteratorSafe
<
Idx
>
actionIter
=
fmdp__
->
beginActions
();
224
actionIter
!=
fmdp__
->
endActions
();
225
++
actionIter
) {
226
for
(
SequenceIteratorSafe
<
const
DiscreteVariable
* >
varIter
227
=
fmdp__
->
beginVariables
();
228
varIter
!=
fmdp__
->
endVariables
();
229
++
varIter
)
230
actionLearners__
[*
actionIter
]
231
->
getWithDefault
(*
varIter
,
nullptr
)
232
->
updateFunctionGraph
();
233
if
(
actionReward__
)
234
actionRewardLearners__
[*
actionIter
]->
updateFunctionGraph
();
235
}
236
237
if
(!
actionReward__
)
rewardLearner__
->
updateFunctionGraph
();
238
}
239
}
// End of namespace gum
gum::Set::emplace
INLINE void emplace(Args &&... args)
Definition:
set_tpl.h:669