aGrUM
0.20.2
a C++ library for (probabilistic) graphical models
sdyna.cpp
Go to the documentation of this file.
1
/**
2
*
3
* Copyright 2005-2020 Pierre-Henri WUILLEMIN(@LIP6) & Christophe GONZALES(@AMU)
4
* info_at_agrum_dot_org
5
*
6
* This library is free software: you can redistribute it and/or modify
7
* it under the terms of the GNU Lesser General Public License as published by
8
* the Free Software Foundation, either version 3 of the License, or
9
* (at your option) any later version.
10
*
11
* This library is distributed in the hope that it will be useful,
12
* but WITHOUT ANY WARRANTY; without even the implied warranty of
13
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14
* GNU Lesser General Public License for more details.
15
*
16
* You should have received a copy of the GNU Lesser General Public License
17
* along with this library. If not, see <http://www.gnu.org/licenses/>.
18
*
19
*/
20
21
22
/**
23
* @file
24
* @brief Headers of the ModelLearner class.
25
*
26
* @author Pierre-Henri WUILLEMIN(@LIP6) and Jean-Christophe MAGNAN and Christophe
27
* GONZALES(@AMU)
28
*/
29
30
31
// =========================================================================
32
#
include
<
cstdlib
>
33
#
include
<
random
>
34
// =========================================================================
35
#
include
<
agrum
/
FMDP
/
SDyna
/
sdyna
.
h
>
36
// =========================================================================
37
38
namespace
gum
{
39
40
// ==========================================================================
41
// Constructor & destructor.
42
// ==========================================================================
43
44
// ###################################################################
45
/*
46
* Constructor
47
*
48
* @param observationPhaseLenght : the number of observation done before a
49
* replanning is launch. If equals 0, a planning is done after each structural
50
* change.
51
* @param nbValueIterationStep : the number of value iteration done during
52
* one planning
53
* @return an instance of SDyna architecture
54
*/
55
// ###################################################################
56
57
SDYNA
::
SDYNA
(
ILearningStrategy
*
learner
,
58
IPlanningStrategy
<
double
>*
planer
,
59
IDecisionStrategy
*
decider
,
60
Idx
observationPhaseLenght
,
61
Idx
nbValueIterationStep
,
62
bool
actionReward
,
63
bool
verbose
) :
64
learner__
(
learner
),
65
planer__
(
planer
),
decider__
(
decider
),
66
observationPhaseLenght__
(
observationPhaseLenght
),
67
nbValueIterationStep__
(
nbValueIterationStep
),
actionReward__
(
actionReward
),
68
verbose_
(
verbose
) {
69
GUM_CONSTRUCTOR
(
SDYNA
);
70
71
fmdp_
=
new
FMDP
<
double
>();
72
73
nbObservation__
= 1;
74
}
75
76
// ###################################################################
77
// Destructor
78
// ###################################################################
79
SDYNA
::~
SDYNA
() {
80
delete
decider__
;
81
82
delete
learner__
;
83
84
delete
planer__
;
85
86
for
(
auto
obsIter
=
bin__
.
beginSafe
();
obsIter
!=
bin__
.
endSafe
(); ++
obsIter
)
87
delete
*
obsIter
;
88
89
delete
fmdp_
;
90
91
GUM_DESTRUCTOR
(
SDYNA
);
92
}
93
94
// ==========================================================================
95
// Initialization
96
// ==========================================================================
97
98
void
SDYNA
::
initialize
() {
99
learner__
->
initialize
(
fmdp_
);
100
planer__
->
initialize
(
fmdp_
);
101
decider__
->
initialize
(
fmdp_
);
102
}
103
104
// ###################################################################
105
/*
106
* Initializes the Sdyna instance.
107
* @param initialState : the state of the studied system from which we will
108
* begin the explore, learn and exploit process
109
*/
110
// ###################################################################
111
void
SDYNA
::
initialize
(
const
Instantiation
&
initialState
) {
112
initialize
();
113
setCurrentState
(
initialState
);
114
}
115
116
// ==========================================================================
117
/// Incremental methods
118
// ==========================================================================
119
120
// ###################################################################
121
/*
122
* Performs a feedback on the last transition.
123
* In extenso, learn from the transition.
124
* @param originalState : the state we were in before the transition
125
* @param reachedState : the state we reached after
126
* @param performedAction : the action we performed
127
* @param obtainedReward : the reward we obtained
128
*/
129
// ###################################################################
130
void
SDYNA
::
feedback
(
const
Instantiation
&
curState
,
131
const
Instantiation
&
prevState
,
132
Idx
lastAction
,
133
double
reward
) {
134
lastAction__
=
lastAction
;
135
lastState_
=
prevState
;
136
feedback
(
curState
,
reward
);
137
}
138
139
// ###################################################################
140
/*
141
* Performs a feedback on the last transition.
142
* In extenso, learn from the transition.
143
* @param reachedState : the state reached after the transition
144
* @param obtainedReward : the reward obtained during the transition
145
* @warning Uses the originalState__ and performedAction__ stored in cache
146
* If you want to specify the original state and the performed action, see
147
* below
148
*/
149
// ###################################################################
150
void
SDYNA
::
feedback
(
const
Instantiation
&
newState
,
double
reward
) {
151
Observation
*
obs
=
new
Observation
();
152
153
for
(
auto
varIter
=
lastState_
.
variablesSequence
().
beginSafe
();
154
varIter
!=
lastState_
.
variablesSequence
().
endSafe
();
155
++
varIter
)
156
obs
->
setModality
(*
varIter
,
lastState_
.
val
(**
varIter
));
157
158
for
(
auto
varIter
=
newState
.
variablesSequence
().
beginSafe
();
159
varIter
!=
newState
.
variablesSequence
().
endSafe
();
160
++
varIter
) {
161
obs
->
setModality
(
fmdp_
->
main2prime
(*
varIter
),
newState
.
val
(**
varIter
));
162
163
if
(
this
->
actionReward__
)
164
obs
->
setRModality
(*
varIter
,
lastState_
.
val
(**
varIter
));
165
else
166
obs
->
setRModality
(*
varIter
,
newState
.
val
(**
varIter
));
167
}
168
169
obs
->
setReward
(
reward
);
170
171
learner__
->
addObservation
(
lastAction__
,
obs
);
172
bin__
.
insert
(
obs
);
173
174
setCurrentState
(
newState
);
175
decider__
->
checkState
(
lastState_
,
lastAction__
);
176
177
if
(
nbObservation__
%
observationPhaseLenght__
== 0)
178
makePlanning
(
nbValueIterationStep__
);
179
180
nbObservation__
++;
181
}
182
183
// ###################################################################
184
/*
185
* Starts a new planning
186
* @param Idx : the maximal number of value iteration performed in this
187
* planning
188
*/
189
// ###################################################################
190
void
SDYNA
::
makePlanning
(
Idx
nbValueIterationStep
) {
191
if
(
verbose_
)
std
::
cout
<<
"Updating decision trees ..."
<<
std
::
endl
;
192
learner__
->
updateFMDP
();
193
// std::cout << << "Done" << std::endl;
194
195
if
(
verbose_
)
std
::
cout
<<
"Planning ..."
<<
std
::
endl
;
196
planer__
->
makePlanning
(
nbValueIterationStep
);
197
// std::cout << << "Done" << std::endl;
198
199
decider__
->
setOptimalStrategy
(
planer__
->
optimalPolicy
());
200
}
201
202
// ##################################################################
203
/*
204
* @return the id of the action the SDyna instance wish to be performed
205
* @param the state in which we currently are
206
*/
207
// ###################################################################
208
Idx
SDYNA
::
takeAction
(
const
Instantiation
&
curState
) {
209
lastState_
=
curState
;
210
return
takeAction
();
211
}
212
213
// ###################################################################
214
/*
215
* @return the id of the action the SDyna instance wish to be performed
216
*/
217
// ###################################################################
218
Idx
SDYNA
::
takeAction
() {
219
ActionSet
actionSet
=
decider__
->
stateOptimalPolicy
(
lastState_
);
220
if
(
actionSet
.
size
() == 1) {
221
lastAction__
=
actionSet
[0];
222
}
else
{
223
Idx
randy
= (
Idx
)((
double
)
std
::
rand
() / (
double
)RAND_MAX *
actionSet
.
size
());
224
lastAction__
=
actionSet
[
randy
==
actionSet
.
size
() ? 0 :
randy
];
225
}
226
return
lastAction__
;
227
}
228
229
// ###################################################################
230
//
231
// ###################################################################
232
std
::
string
SDYNA
::
toString
() {
233
std
::
stringstream
description
;
234
235
description
<<
fmdp_
->
toString
() <<
std
::
endl
;
236
description
<<
planer__
->
optimalPolicy2String
() <<
std
::
endl
;
237
238
return
description
.
str
();
239
}
240
241
}
// End of namespace gum
gum::Set::emplace
INLINE void emplace(Args &&... args)
Definition:
set_tpl.h:669