aGrUM
0.20.3
a C++ library for (probabilistic) graphical models
adaptiveRMaxPlaner.cpp
Go to the documentation of this file.
1
/**
2
*
3
* Copyright (c) 2005-2021 by Pierre-Henri WUILLEMIN(@LIP6) & Christophe GONZALES(@AMU)
4
* info_at_agrum_dot_org
5
*
6
* This library is free software: you can redistribute it and/or modify
7
* it under the terms of the GNU Lesser General Public License as published by
8
* the Free Software Foundation, either version 3 of the License, or
9
* (at your option) any later version.
10
*
11
* This library is distributed in the hope that it will be useful,
12
* but WITHOUT ANY WARRANTY; without even the implied warranty of
13
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14
* GNU Lesser General Public License for more details.
15
*
16
* You should have received a copy of the GNU Lesser General Public License
17
* along with this library. If not, see <http://www.gnu.org/licenses/>.
18
*
19
*/
20
21
22
/**
23
* @file
24
* @brief Template implementation of AdaptiveRMaxPlaner classes.
25
*
26
* @author Pierre-Henri WUILLEMIN(@LIP6) and Jean-Christophe MAGNAN and Christophe
27
* GONZALES(@AMU)
28
*/
29
30
// =========================================================================
31
#
include
<
queue
>
32
#
include
<
vector
>
33
//#include <algorithm>
34
//#include <utility>
35
// =========================================================================
36
#
include
<
agrum
/
tools
/
core
/
math
/
math_utils
.
h
>
37
#
include
<
agrum
/
tools
/
core
/
functors
.
h
>
38
#
include
<
agrum
/
tools
/
core
/
smallobjectallocator
/
smallObjectAllocator
.
h
>
39
// =========================================================================
40
#
include
<
agrum
/
tools
/
multidim
/
implementations
/
multiDimFunctionGraph
.
h
>
41
#
include
<
agrum
/
tools
/
multidim
/
instantiation
.
h
>
42
#
include
<
agrum
/
tools
/
multidim
/
potential
.
h
>
43
// =========================================================================
44
#
include
<
agrum
/
FMDP
/
planning
/
adaptiveRMaxPlaner
.
h
>
45
// =========================================================================
46
47
/// For shorter line and hence more comprehensive code purposes only
48
#
define
RECASTED
(
x
)
reinterpret_cast
<
const
MultiDimFunctionGraph
<
double
>
*
>
(
x
)
49
50
namespace
gum
{
51
52
/* **************************************************************************************************
53
* **/
54
/* ** **/
55
/* ** Constructors / Destructors **/
56
/* ** **/
57
/* **************************************************************************************************
58
* **/
59
60
// ===========================================================================
61
// Default constructor
62
// ===========================================================================
63
AdaptiveRMaxPlaner
::
AdaptiveRMaxPlaner
(
IOperatorStrategy
<
double
>*
opi
,
64
double
discountFactor
,
65
double
epsilon
,
66
const
ILearningStrategy
*
learner
,
67
bool
verbose
) :
68
StructuredPlaner
(
opi
,
discountFactor
,
epsilon
,
verbose
),
69
IDecisionStrategy
(),
_fmdpLearner_
(
learner
),
_initialized_
(
false
) {
70
GUM_CONSTRUCTOR
(
AdaptiveRMaxPlaner
);
71
}
72
73
// ===========================================================================
74
// Default destructor
75
// ===========================================================================
76
AdaptiveRMaxPlaner
::~
AdaptiveRMaxPlaner
() {
77
GUM_DESTRUCTOR
(
AdaptiveRMaxPlaner
);
78
79
for
(
HashTableIteratorSafe
<
Idx
,
StatesCounter
* >
scIter
=
_counterTable_
.
beginSafe
();
80
scIter
!=
_counterTable_
.
endSafe
();
81
++
scIter
)
82
delete
scIter
.
val
();
83
}
84
85
/* **************************************************************************************************
86
* **/
87
/* ** **/
88
/* ** Planning Methods **/
89
/* ** **/
90
/* **************************************************************************************************
91
* **/
92
93
// ==========================================================================
94
// Initializes data structure needed for making the planning
95
// ==========================================================================
96
void
AdaptiveRMaxPlaner
::
initialize
(
const
FMDP
<
double
>*
fmdp
) {
97
if
(!
_initialized_
) {
98
StructuredPlaner
::
initialize
(
fmdp
);
99
IDecisionStrategy
::
initialize
(
fmdp
);
100
for
(
auto
actionIter
=
fmdp
->
beginActions
();
actionIter
!=
fmdp
->
endActions
(); ++
actionIter
) {
101
_counterTable_
.
insert
(*
actionIter
,
new
StatesCounter
());
102
_initializedTable_
.
insert
(*
actionIter
,
false
);
103
}
104
_initialized_
=
true
;
105
}
106
}
107
108
// ===========================================================================
109
// Performs a value iteration
110
// ===========================================================================
111
void
AdaptiveRMaxPlaner
::
makePlanning
(
Idx
nbStep
) {
112
_makeRMaxFunctionGraphs_
();
113
114
StructuredPlaner
::
makePlanning
(
nbStep
);
115
116
_clearTables_
();
117
}
118
119
/* **************************************************************************************************
120
* **/
121
/* ** **/
122
/* ** Value Iteration Methods **/
123
/* ** **/
124
/* **************************************************************************************************
125
* **/
126
127
// ===========================================================================
128
// Performs a single step of value iteration
129
// ===========================================================================
130
void
AdaptiveRMaxPlaner
::
initVFunction_
() {
131
vFunction_
->
manager
()->
setRootNode
(
vFunction_
->
manager
()->
addTerminalNode
(0.0));
132
for
(
auto
actionIter
=
fmdp_
->
beginActions
();
actionIter
!=
fmdp_
->
endActions
(); ++
actionIter
)
133
vFunction_
=
this
->
operator_
->
add
(
vFunction_
,
RECASTED
(
this
->
fmdp_
->
reward
(*
actionIter
)), 1);
134
}
135
136
// ===========================================================================
137
// Performs a single step of value iteration
138
// ===========================================================================
139
MultiDimFunctionGraph
<
double
>*
AdaptiveRMaxPlaner
::
valueIteration_
() {
140
// *****************************************************************************************
141
// Loop reset
142
MultiDimFunctionGraph
<
double
>*
newVFunction
=
operator_
->
getFunctionInstance
();
143
newVFunction
->
copyAndReassign
(*
vFunction_
,
fmdp_
->
mapMainPrime
());
144
145
// *****************************************************************************************
146
// For each action
147
std
::
vector
<
MultiDimFunctionGraph
<
double
>* >
qActionsSet
;
148
for
(
auto
actionIter
=
fmdp_
->
beginActions
();
actionIter
!=
fmdp_
->
endActions
(); ++
actionIter
) {
149
MultiDimFunctionGraph
<
double
>*
qAction
=
evalQaction_
(
newVFunction
, *
actionIter
);
150
151
// *******************************************************************************************
152
// Next, we add the reward
153
qAction
=
addReward_
(
qAction
, *
actionIter
);
154
155
qAction
=
this
->
operator_
->
maximize
(
156
_actionsRMaxTable_
[*
actionIter
],
157
this
->
operator_
->
multiply
(
qAction
,
_actionsBoolTable_
[*
actionIter
], 1),
158
2);
159
160
qActionsSet
.
push_back
(
qAction
);
161
}
162
delete
newVFunction
;
163
164
// *****************************************************************************************
165
// Next to evaluate main value function, we take maximise over all action
166
// value, ...
167
newVFunction
=
maximiseQactions_
(
qActionsSet
);
168
169
return
newVFunction
;
170
}
171
172
/* **************************************************************************************************
173
* **/
174
/* ** **/
175
/* ** Optimal Policy Evaluation Methods **/
176
/* ** **/
177
/* **************************************************************************************************
178
* **/
179
180
// ===========================================================================
181
// Evals the policy corresponding to the given value function
182
// ===========================================================================
183
void
AdaptiveRMaxPlaner
::
evalPolicy_
() {
184
// *****************************************************************************************
185
// Loop reset
186
MultiDimFunctionGraph
<
double
>*
newVFunction
=
operator_
->
getFunctionInstance
();
187
newVFunction
->
copyAndReassign
(*
vFunction_
,
fmdp_
->
mapMainPrime
());
188
189
std
::
vector
<
MultiDimFunctionGraph
<
ArgMaxSet
<
double
,
Idx
>,
SetTerminalNodePolicy
>* >
190
argMaxQActionsSet
;
191
// *****************************************************************************************
192
// For each action
193
for
(
auto
actionIter
=
fmdp_
->
beginActions
();
actionIter
!=
fmdp_
->
endActions
(); ++
actionIter
) {
194
MultiDimFunctionGraph
<
double
>*
qAction
=
this
->
evalQaction_
(
newVFunction
, *
actionIter
);
195
196
qAction
=
this
->
addReward_
(
qAction
, *
actionIter
);
197
198
qAction
=
this
->
operator_
->
maximize
(
199
_actionsRMaxTable_
[*
actionIter
],
200
this
->
operator_
->
multiply
(
qAction
,
_actionsBoolTable_
[*
actionIter
], 1),
201
2);
202
203
argMaxQActionsSet
.
push_back
(
makeArgMax_
(
qAction
, *
actionIter
));
204
}
205
delete
newVFunction
;
206
207
// *****************************************************************************************
208
// Next to evaluate main value function, we take maximise over all action
209
// value, ...
210
MultiDimFunctionGraph
<
ArgMaxSet
<
double
,
Idx
>,
SetTerminalNodePolicy
>*
argMaxVFunction
211
=
argmaximiseQactions_
(
argMaxQActionsSet
);
212
213
// *****************************************************************************************
214
// Next to evaluate main value function, we take maximise over all action
215
// value, ...
216
extractOptimalPolicy_
(
argMaxVFunction
);
217
}
218
219
// ===========================================================================
220
//
221
// ===========================================================================
222
void
AdaptiveRMaxPlaner
::
_makeRMaxFunctionGraphs_
() {
223
_rThreshold_
=
_fmdpLearner_
->
modaMax
() * 5 > 30 ?
_fmdpLearner_
->
modaMax
() * 5 : 30;
224
_rmax_
=
_fmdpLearner_
->
rMax
() / (1.0 -
this
->
discountFactor_
);
225
226
for
(
auto
actionIter
=
this
->
fmdp
()->
beginActions
();
actionIter
!=
this
->
fmdp
()->
endActions
();
227
++
actionIter
) {
228
std
::
vector
<
MultiDimFunctionGraph
<
double
>* >
rmaxs
;
229
std
::
vector
<
MultiDimFunctionGraph
<
double
>* >
boolQs
;
230
231
for
(
auto
varIter
=
this
->
fmdp
()->
beginVariables
();
varIter
!=
this
->
fmdp
()->
endVariables
();
232
++
varIter
) {
233
const
IVisitableGraphLearner
*
visited
=
_counterTable_
[*
actionIter
];
234
235
MultiDimFunctionGraph
<
double
>*
varRMax
=
this
->
operator_
->
getFunctionInstance
();
236
MultiDimFunctionGraph
<
double
>*
varBoolQ
=
this
->
operator_
->
getFunctionInstance
();
237
238
visited
->
insertSetOfVars
(
varRMax
);
239
visited
->
insertSetOfVars
(
varBoolQ
);
240
241
std
::
pair
<
NodeId
,
NodeId
>
rooty
242
=
_visitLearner_
(
visited
,
visited
->
root
(),
varRMax
,
varBoolQ
);
243
varRMax
->
manager
()->
setRootNode
(
rooty
.
first
);
244
varRMax
->
manager
()->
reduce
();
245
varRMax
->
manager
()->
clean
();
246
varBoolQ
->
manager
()->
setRootNode
(
rooty
.
second
);
247
varBoolQ
->
manager
()->
reduce
();
248
varBoolQ
->
manager
()->
clean
();
249
250
rmaxs
.
push_back
(
varRMax
);
251
boolQs
.
push_back
(
varBoolQ
);
252
253
// std::cout << RECASTED(this->fmdp_->transition(*actionIter,
254
// *varIter))->toDot() << std::endl;
255
// for( auto varIter2 =
256
// RECASTED(this->fmdp_->transition(*actionIter,
257
// *varIter))->variablesSequence().beginSafe(); varIter2 !=
258
// RECASTED(this->fmdp_->transition(*actionIter,
259
// *varIter))->variablesSequence().endSafe(); ++varIter2 )
260
// std::cout << (*varIter2)->name() << " | ";
261
// std::cout << std::endl;
262
263
// std::cout << varRMax->toDot() << std::endl;
264
// for( auto varIter =
265
// varRMax->variablesSequence().beginSafe(); varIter !=
266
// varRMax->variablesSequence().endSafe(); ++varIter )
267
// std::cout << (*varIter)->name() << " | ";
268
// std::cout << std::endl;
269
270
// std::cout << varBoolQ->toDot() << std::endl;
271
// for( auto varIter =
272
// varBoolQ->variablesSequence().beginSafe(); varIter !=
273
// varBoolQ->variablesSequence().endSafe(); ++varIter )
274
// std::cout << (*varIter)->name() << " | ";
275
// std::cout << std::endl;
276
}
277
278
// std::cout << "Maximising" << std::endl;
279
_actionsRMaxTable_
.
insert
(*
actionIter
,
this
->
maximiseQactions_
(
rmaxs
));
280
_actionsBoolTable_
.
insert
(*
actionIter
,
this
->
minimiseFunctions_
(
boolQs
));
281
}
282
}
283
284
// ===========================================================================
285
//
286
// ===========================================================================
287
std
::
pair
<
NodeId
,
NodeId
>
288
AdaptiveRMaxPlaner
::
_visitLearner_
(
const
IVisitableGraphLearner
*
visited
,
289
NodeId
currentNodeId
,
290
MultiDimFunctionGraph
<
double
>*
rmax
,
291
MultiDimFunctionGraph
<
double
>*
boolQ
) {
292
std
::
pair
<
NodeId
,
NodeId
>
rep
;
293
if
(
visited
->
isTerminal
(
currentNodeId
)) {
294
rep
.
first
=
rmax
->
manager
()->
addTerminalNode
(
295
visited
->
nodeNbObservation
(
currentNodeId
) <
_rThreshold_
?
_rmax_
: 0.0);
296
rep
.
second
=
boolQ
->
manager
()->
addTerminalNode
(
297
visited
->
nodeNbObservation
(
currentNodeId
) <
_rThreshold_
? 0.0 : 1.0);
298
return
rep
;
299
}
300
301
NodeId
*
rmaxsons
=
static_cast
<
NodeId
* >(
302
SOA_ALLOCATE
(
sizeof
(
NodeId
) *
visited
->
nodeVar
(
currentNodeId
)->
domainSize
()));
303
NodeId
*
bqsons
=
static_cast
<
NodeId
* >(
304
SOA_ALLOCATE
(
sizeof
(
NodeId
) *
visited
->
nodeVar
(
currentNodeId
)->
domainSize
()));
305
306
for
(
Idx
moda
= 0;
moda
<
visited
->
nodeVar
(
currentNodeId
)->
domainSize
(); ++
moda
) {
307
std
::
pair
<
NodeId
,
NodeId
>
sonp
308
=
_visitLearner_
(
visited
,
visited
->
nodeSon
(
currentNodeId
,
moda
),
rmax
,
boolQ
);
309
rmaxsons
[
moda
] =
sonp
.
first
;
310
bqsons
[
moda
] =
sonp
.
second
;
311
}
312
313
rep
.
first
=
rmax
->
manager
()->
addInternalNode
(
visited
->
nodeVar
(
currentNodeId
),
rmaxsons
);
314
rep
.
second
=
boolQ
->
manager
()->
addInternalNode
(
visited
->
nodeVar
(
currentNodeId
),
bqsons
);
315
return
rep
;
316
}
317
318
// ===========================================================================
319
//
320
// ===========================================================================
321
void
AdaptiveRMaxPlaner
::
_clearTables_
() {
322
for
(
auto
actionIter
=
this
->
fmdp
()->
beginActions
();
actionIter
!=
this
->
fmdp
()->
endActions
();
323
++
actionIter
) {
324
delete
_actionsBoolTable_
[*
actionIter
];
325
delete
_actionsRMaxTable_
[*
actionIter
];
326
}
327
_actionsRMaxTable_
.
clear
();
328
_actionsBoolTable_
.
clear
();
329
}
330
331
}
// end of namespace gum
gum::Set::emplace
INLINE void emplace(Args &&... args)
Definition:
set_tpl.h:643
RECASTED
#define RECASTED(x)
For shorter line and hence more comprehensive code purposes only.
Definition:
adaptiveRMaxPlaner.cpp:48