aGrUM
0.20.2
a C++ library for (probabilistic) graphical models
adaptiveRMaxPlaner.cpp
Go to the documentation of this file.
1
/**
2
*
3
* Copyright 2005-2020 Pierre-Henri WUILLEMIN(@LIP6) & Christophe GONZALES(@AMU)
4
* info_at_agrum_dot_org
5
*
6
* This library is free software: you can redistribute it and/or modify
7
* it under the terms of the GNU Lesser General Public License as published by
8
* the Free Software Foundation, either version 3 of the License, or
9
* (at your option) any later version.
10
*
11
* This library is distributed in the hope that it will be useful,
12
* but WITHOUT ANY WARRANTY; without even the implied warranty of
13
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14
* GNU Lesser General Public License for more details.
15
*
16
* You should have received a copy of the GNU Lesser General Public License
17
* along with this library. If not, see <http://www.gnu.org/licenses/>.
18
*
19
*/
20
21
22
/**
23
* @file
24
* @brief Template implementation of AdaptiveRMaxPlaner classes.
25
*
26
* @author Pierre-Henri WUILLEMIN(@LIP6) and Jean-Christophe MAGNAN and Christophe
27
* GONZALES(@AMU)
28
*/
29
30
// =========================================================================
31
#
include
<
queue
>
32
#
include
<
vector
>
33
//#include <algorithm>
34
//#include <utility>
35
// =========================================================================
36
#
include
<
agrum
/
tools
/
core
/
math
/
math_utils
.
h
>
37
#
include
<
agrum
/
tools
/
core
/
functors
.
h
>
38
#
include
<
agrum
/
tools
/
core
/
smallobjectallocator
/
smallObjectAllocator
.
h
>
39
// =========================================================================
40
#
include
<
agrum
/
tools
/
multidim
/
implementations
/
multiDimFunctionGraph
.
h
>
41
#
include
<
agrum
/
tools
/
multidim
/
instantiation
.
h
>
42
#
include
<
agrum
/
tools
/
multidim
/
potential
.
h
>
43
// =========================================================================
44
#
include
<
agrum
/
FMDP
/
planning
/
adaptiveRMaxPlaner
.
h
>
45
// =========================================================================
46
47
/// For shorter line and hence more comprehensive code purposes only
48
#
define
RECASTED
(
x
)
reinterpret_cast
<
const
MultiDimFunctionGraph
<
double
>
*
>
(
x
)
49
50
namespace
gum
{
51
52
/* **************************************************************************************************
53
* **/
54
/* ** **/
55
/* ** Constructors / Destructors **/
56
/* ** **/
57
/* **************************************************************************************************
58
* **/
59
60
// ===========================================================================
61
// Default constructor
62
// ===========================================================================
63
AdaptiveRMaxPlaner
::
AdaptiveRMaxPlaner
(
IOperatorStrategy
<
double
>*
opi
,
64
double
discountFactor
,
65
double
epsilon
,
66
const
ILearningStrategy
*
learner
,
67
bool
verbose
) :
68
StructuredPlaner
(
opi
,
discountFactor
,
epsilon
,
verbose
),
69
IDecisionStrategy
(),
fmdpLearner__
(
learner
),
initialized__
(
false
) {
70
GUM_CONSTRUCTOR
(
AdaptiveRMaxPlaner
);
71
}
72
73
// ===========================================================================
74
// Default destructor
75
// ===========================================================================
76
AdaptiveRMaxPlaner
::~
AdaptiveRMaxPlaner
() {
77
GUM_DESTRUCTOR
(
AdaptiveRMaxPlaner
);
78
79
for
(
HashTableIteratorSafe
<
Idx
,
StatesCounter
* >
scIter
80
=
counterTable__
.
beginSafe
();
81
scIter
!=
counterTable__
.
endSafe
();
82
++
scIter
)
83
delete
scIter
.
val
();
84
}
85
86
/* **************************************************************************************************
87
* **/
88
/* ** **/
89
/* ** Planning Methods **/
90
/* ** **/
91
/* **************************************************************************************************
92
* **/
93
94
// ==========================================================================
95
// Initializes data structure needed for making the planning
96
// ==========================================================================
97
void
AdaptiveRMaxPlaner
::
initialize
(
const
FMDP
<
double
>*
fmdp
) {
98
if
(!
initialized__
) {
99
StructuredPlaner
::
initialize
(
fmdp
);
100
IDecisionStrategy
::
initialize
(
fmdp
);
101
for
(
auto
actionIter
=
fmdp
->
beginActions
();
102
actionIter
!=
fmdp
->
endActions
();
103
++
actionIter
) {
104
counterTable__
.
insert
(*
actionIter
,
new
StatesCounter
());
105
initializedTable__
.
insert
(*
actionIter
,
false
);
106
}
107
initialized__
=
true
;
108
}
109
}
110
111
// ===========================================================================
112
// Performs a value iteration
113
// ===========================================================================
114
void
AdaptiveRMaxPlaner
::
makePlanning
(
Idx
nbStep
) {
115
makeRMaxFunctionGraphs__
();
116
117
StructuredPlaner
::
makePlanning
(
nbStep
);
118
119
clearTables__
();
120
}
121
122
/* **************************************************************************************************
123
* **/
124
/* ** **/
125
/* ** Value Iteration Methods **/
126
/* ** **/
127
/* **************************************************************************************************
128
* **/
129
130
// ===========================================================================
131
// Performs a single step of value iteration
132
// ===========================================================================
133
void
AdaptiveRMaxPlaner
::
initVFunction_
() {
134
vFunction_
->
manager
()->
setRootNode
(
135
vFunction_
->
manager
()->
addTerminalNode
(0.0));
136
for
(
auto
actionIter
=
fmdp_
->
beginActions
();
137
actionIter
!=
fmdp_
->
endActions
();
138
++
actionIter
)
139
vFunction_
=
this
->
operator_
->
add
(
vFunction_
,
140
RECASTED
(
this
->
fmdp_
->
reward
(*
actionIter
)),
141
1);
142
}
143
144
// ===========================================================================
145
// Performs a single step of value iteration
146
// ===========================================================================
147
MultiDimFunctionGraph
<
double
>*
AdaptiveRMaxPlaner
::
valueIteration_
() {
148
// *****************************************************************************************
149
// Loop reset
150
MultiDimFunctionGraph
<
double
>*
newVFunction
151
=
operator_
->
getFunctionInstance
();
152
newVFunction
->
copyAndReassign
(*
vFunction_
,
fmdp_
->
mapMainPrime
());
153
154
// *****************************************************************************************
155
// For each action
156
std
::
vector
<
MultiDimFunctionGraph
<
double
>* >
qActionsSet
;
157
for
(
auto
actionIter
=
fmdp_
->
beginActions
();
158
actionIter
!=
fmdp_
->
endActions
();
159
++
actionIter
) {
160
MultiDimFunctionGraph
<
double
>*
qAction
161
=
evalQaction_
(
newVFunction
, *
actionIter
);
162
163
// *******************************************************************************************
164
// Next, we add the reward
165
qAction
=
addReward_
(
qAction
, *
actionIter
);
166
167
qAction
=
this
->
operator_
->
maximize
(
168
actionsRMaxTable__
[*
actionIter
],
169
this
->
operator_
->
multiply
(
qAction
,
actionsBoolTable__
[*
actionIter
], 1),
170
2);
171
172
qActionsSet
.
push_back
(
qAction
);
173
}
174
delete
newVFunction
;
175
176
// *****************************************************************************************
177
// Next to evaluate main value function, we take maximise over all action
178
// value, ...
179
newVFunction
=
maximiseQactions_
(
qActionsSet
);
180
181
return
newVFunction
;
182
}
183
184
/* **************************************************************************************************
185
* **/
186
/* ** **/
187
/* ** Optimal Policy Evaluation Methods **/
188
/* ** **/
189
/* **************************************************************************************************
190
* **/
191
192
// ===========================================================================
193
// Evals the policy corresponding to the given value function
194
// ===========================================================================
195
void
AdaptiveRMaxPlaner
::
evalPolicy_
() {
196
// *****************************************************************************************
197
// Loop reset
198
MultiDimFunctionGraph
<
double
>*
newVFunction
199
=
operator_
->
getFunctionInstance
();
200
newVFunction
->
copyAndReassign
(*
vFunction_
,
fmdp_
->
mapMainPrime
());
201
202
std
::
vector
<
203
MultiDimFunctionGraph
<
ArgMaxSet
<
double
,
Idx
>,
SetTerminalNodePolicy
>* >
204
argMaxQActionsSet
;
205
// *****************************************************************************************
206
// For each action
207
for
(
auto
actionIter
=
fmdp_
->
beginActions
();
208
actionIter
!=
fmdp_
->
endActions
();
209
++
actionIter
) {
210
MultiDimFunctionGraph
<
double
>*
qAction
211
=
this
->
evalQaction_
(
newVFunction
, *
actionIter
);
212
213
qAction
=
this
->
addReward_
(
qAction
, *
actionIter
);
214
215
qAction
=
this
->
operator_
->
maximize
(
216
actionsRMaxTable__
[*
actionIter
],
217
this
->
operator_
->
multiply
(
qAction
,
actionsBoolTable__
[*
actionIter
], 1),
218
2);
219
220
argMaxQActionsSet
.
push_back
(
makeArgMax_
(
qAction
, *
actionIter
));
221
}
222
delete
newVFunction
;
223
224
// *****************************************************************************************
225
// Next to evaluate main value function, we take maximise over all action
226
// value, ...
227
MultiDimFunctionGraph
<
ArgMaxSet
<
double
,
Idx
>,
SetTerminalNodePolicy
>*
228
argMaxVFunction
229
=
argmaximiseQactions_
(
argMaxQActionsSet
);
230
231
// *****************************************************************************************
232
// Next to evaluate main value function, we take maximise over all action
233
// value, ...
234
extractOptimalPolicy_
(
argMaxVFunction
);
235
}
236
237
// ===========================================================================
238
//
239
// ===========================================================================
240
void
AdaptiveRMaxPlaner
::
makeRMaxFunctionGraphs__
() {
241
rThreshold__
242
=
fmdpLearner__
->
modaMax
() * 5 > 30 ?
fmdpLearner__
->
modaMax
() * 5 : 30;
243
rmax__
=
fmdpLearner__
->
rMax
() / (1.0 -
this
->
discountFactor_
);
244
245
for
(
auto
actionIter
=
this
->
fmdp
()->
beginActions
();
246
actionIter
!=
this
->
fmdp
()->
endActions
();
247
++
actionIter
) {
248
std
::
vector
<
MultiDimFunctionGraph
<
double
>* >
rmaxs
;
249
std
::
vector
<
MultiDimFunctionGraph
<
double
>* >
boolQs
;
250
251
for
(
auto
varIter
=
this
->
fmdp
()->
beginVariables
();
252
varIter
!=
this
->
fmdp
()->
endVariables
();
253
++
varIter
) {
254
const
IVisitableGraphLearner
*
visited
=
counterTable__
[*
actionIter
];
255
256
MultiDimFunctionGraph
<
double
>*
varRMax
257
=
this
->
operator_
->
getFunctionInstance
();
258
MultiDimFunctionGraph
<
double
>*
varBoolQ
259
=
this
->
operator_
->
getFunctionInstance
();
260
261
visited
->
insertSetOfVars
(
varRMax
);
262
visited
->
insertSetOfVars
(
varBoolQ
);
263
264
std
::
pair
<
NodeId
,
NodeId
>
rooty
265
=
visitLearner__
(
visited
,
visited
->
root
(),
varRMax
,
varBoolQ
);
266
varRMax
->
manager
()->
setRootNode
(
rooty
.
first
);
267
varRMax
->
manager
()->
reduce
();
268
varRMax
->
manager
()->
clean
();
269
varBoolQ
->
manager
()->
setRootNode
(
rooty
.
second
);
270
varBoolQ
->
manager
()->
reduce
();
271
varBoolQ
->
manager
()->
clean
();
272
273
rmaxs
.
push_back
(
varRMax
);
274
boolQs
.
push_back
(
varBoolQ
);
275
276
// std::cout << RECASTED(this->fmdp_->transition(*actionIter,
277
// *varIter))->toDot() << std::endl;
278
// for( auto varIter2 =
279
// RECASTED(this->fmdp_->transition(*actionIter,
280
// *varIter))->variablesSequence().beginSafe(); varIter2 !=
281
// RECASTED(this->fmdp_->transition(*actionIter,
282
// *varIter))->variablesSequence().endSafe(); ++varIter2 )
283
// std::cout << (*varIter2)->name() << " | ";
284
// std::cout << std::endl;
285
286
// std::cout << varRMax->toDot() << std::endl;
287
// for( auto varIter =
288
// varRMax->variablesSequence().beginSafe(); varIter !=
289
// varRMax->variablesSequence().endSafe(); ++varIter )
290
// std::cout << (*varIter)->name() << " | ";
291
// std::cout << std::endl;
292
293
// std::cout << varBoolQ->toDot() << std::endl;
294
// for( auto varIter =
295
// varBoolQ->variablesSequence().beginSafe(); varIter !=
296
// varBoolQ->variablesSequence().endSafe(); ++varIter )
297
// std::cout << (*varIter)->name() << " | ";
298
// std::cout << std::endl;
299
}
300
301
// std::cout << "Maximising" << std::endl;
302
actionsRMaxTable__
.
insert
(*
actionIter
,
this
->
maximiseQactions_
(
rmaxs
));
303
actionsBoolTable__
.
insert
(*
actionIter
,
this
->
minimiseFunctions_
(
boolQs
));
304
}
305
}
306
307
// ===========================================================================
308
//
309
// ===========================================================================
310
std
::
pair
<
NodeId
,
NodeId
>
311
AdaptiveRMaxPlaner
::
visitLearner__
(
const
IVisitableGraphLearner
*
visited
,
312
NodeId
currentNodeId
,
313
MultiDimFunctionGraph
<
double
>*
rmax
,
314
MultiDimFunctionGraph
<
double
>*
boolQ
) {
315
std
::
pair
<
NodeId
,
NodeId
>
rep
;
316
if
(
visited
->
isTerminal
(
currentNodeId
)) {
317
rep
.
first
=
rmax
->
manager
()->
addTerminalNode
(
318
visited
->
nodeNbObservation
(
currentNodeId
) <
rThreshold__
?
rmax__
: 0.0);
319
rep
.
second
=
boolQ
->
manager
()->
addTerminalNode
(
320
visited
->
nodeNbObservation
(
currentNodeId
) <
rThreshold__
? 0.0 : 1.0);
321
return
rep
;
322
}
323
324
NodeId
*
rmaxsons
=
static_cast
<
NodeId
* >(
SOA_ALLOCATE
(
325
sizeof
(
NodeId
) *
visited
->
nodeVar
(
currentNodeId
)->
domainSize
()));
326
NodeId
*
bqsons
=
static_cast
<
NodeId
* >(
SOA_ALLOCATE
(
327
sizeof
(
NodeId
) *
visited
->
nodeVar
(
currentNodeId
)->
domainSize
()));
328
329
for
(
Idx
moda
= 0;
moda
<
visited
->
nodeVar
(
currentNodeId
)->
domainSize
();
330
++
moda
) {
331
std
::
pair
<
NodeId
,
NodeId
>
sonp
332
=
visitLearner__
(
visited
,
333
visited
->
nodeSon
(
currentNodeId
,
moda
),
334
rmax
,
335
boolQ
);
336
rmaxsons
[
moda
] =
sonp
.
first
;
337
bqsons
[
moda
] =
sonp
.
second
;
338
}
339
340
rep
.
first
=
rmax
->
manager
()->
addInternalNode
(
visited
->
nodeVar
(
currentNodeId
),
341
rmaxsons
);
342
rep
.
second
=
boolQ
->
manager
()->
addInternalNode
(
visited
->
nodeVar
(
currentNodeId
),
343
bqsons
);
344
return
rep
;
345
}
346
347
// ===========================================================================
348
//
349
// ===========================================================================
350
void
AdaptiveRMaxPlaner
::
clearTables__
() {
351
for
(
auto
actionIter
=
this
->
fmdp
()->
beginActions
();
352
actionIter
!=
this
->
fmdp
()->
endActions
();
353
++
actionIter
) {
354
delete
actionsBoolTable__
[*
actionIter
];
355
delete
actionsRMaxTable__
[*
actionIter
];
356
}
357
actionsRMaxTable__
.
clear
();
358
actionsBoolTable__
.
clear
();
359
}
360
361
}
// end of namespace gum
gum::Set::emplace
INLINE void emplace(Args &&... args)
Definition:
set_tpl.h:669
RECASTED
#define RECASTED(x)
For shorter line and hence more comprehensive code purposes only.
Definition:
adaptiveRMaxPlaner.cpp:48