aGrUM
0.20.3
a C++ library for (probabilistic) graphical models
sdyna.cpp
Go to the documentation of this file.
1
/**
2
*
3
* Copyright (c) 2005-2021 by Pierre-Henri WUILLEMIN(@LIP6) & Christophe GONZALES(@AMU)
4
* info_at_agrum_dot_org
5
*
6
* This library is free software: you can redistribute it and/or modify
7
* it under the terms of the GNU Lesser General Public License as published by
8
* the Free Software Foundation, either version 3 of the License, or
9
* (at your option) any later version.
10
*
11
* This library is distributed in the hope that it will be useful,
12
* but WITHOUT ANY WARRANTY; without even the implied warranty of
13
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14
* GNU Lesser General Public License for more details.
15
*
16
* You should have received a copy of the GNU Lesser General Public License
17
* along with this library. If not, see <http://www.gnu.org/licenses/>.
18
*
19
*/
20
21
22
/**
23
* @file
24
* @brief Headers of the ModelLearner class.
25
*
26
* @author Pierre-Henri WUILLEMIN(@LIP6) and Jean-Christophe MAGNAN and Christophe
27
* GONZALES(@AMU)
28
*/
29
30
31
// =========================================================================
32
#
include
<
cstdlib
>
33
#
include
<
random
>
34
// =========================================================================
35
#
include
<
agrum
/
FMDP
/
SDyna
/
sdyna
.
h
>
36
// =========================================================================
37
38
namespace
gum
{
39
40
// ==========================================================================
41
// Constructor & destructor.
42
// ==========================================================================
43
44
// ###################################################################
45
/*
46
* Constructor
47
*
48
* @param observationPhaseLenght : the number of observation done before a
49
* replanning is launch. If equals 0, a planning is done after each structural
50
* change.
51
* @param nbValueIterationStep : the number of value iteration done during
52
* one planning
53
* @return an instance of SDyna architecture
54
*/
55
// ###################################################################
56
57
SDYNA
::
SDYNA
(
ILearningStrategy
*
learner
,
58
IPlanningStrategy
<
double
>*
planer
,
59
IDecisionStrategy
*
decider
,
60
Idx
observationPhaseLenght
,
61
Idx
nbValueIterationStep
,
62
bool
actionReward
,
63
bool
verbose
) :
64
_learner_
(
learner
),
65
_planer_
(
planer
),
_decider_
(
decider
),
_observationPhaseLenght_
(
observationPhaseLenght
),
66
_nbValueIterationStep_
(
nbValueIterationStep
),
_actionReward_
(
actionReward
),
67
verbose_
(
verbose
) {
68
GUM_CONSTRUCTOR
(
SDYNA
);
69
70
fmdp_
=
new
FMDP
<
double
>();
71
72
_nbObservation_
= 1;
73
}
74
75
// ###################################################################
76
// Destructor
77
// ###################################################################
78
SDYNA
::~
SDYNA
() {
79
delete
_decider_
;
80
81
delete
_learner_
;
82
83
delete
_planer_
;
84
85
for
(
auto
obsIter
=
_bin_
.
beginSafe
();
obsIter
!=
_bin_
.
endSafe
(); ++
obsIter
)
86
delete
*
obsIter
;
87
88
delete
fmdp_
;
89
90
GUM_DESTRUCTOR
(
SDYNA
);
91
}
92
93
// ==========================================================================
94
// Initialization
95
// ==========================================================================
96
97
void
SDYNA
::
initialize
() {
98
_learner_
->
initialize
(
fmdp_
);
99
_planer_
->
initialize
(
fmdp_
);
100
_decider_
->
initialize
(
fmdp_
);
101
}
102
103
// ###################################################################
104
/*
105
* Initializes the Sdyna instance.
106
* @param initialState : the state of the studied system from which we will
107
* begin the explore, learn and exploit process
108
*/
109
// ###################################################################
110
void
SDYNA
::
initialize
(
const
Instantiation
&
initialState
) {
111
initialize
();
112
setCurrentState
(
initialState
);
113
}
114
115
// ==========================================================================
116
/// Incremental methods
117
// ==========================================================================
118
119
// ###################################################################
120
/*
121
* Performs a feedback on the last transition.
122
* In extenso, learn from the transition.
123
* @param originalState : the state we were in before the transition
124
* @param reachedState : the state we reached after
125
* @param performedAction : the action we performed
126
* @param obtainedReward : the reward we obtained
127
*/
128
// ###################################################################
129
void
SDYNA
::
feedback
(
const
Instantiation
&
curState
,
130
const
Instantiation
&
prevState
,
131
Idx
lastAction
,
132
double
reward
) {
133
_lastAction_
=
lastAction
;
134
lastState_
=
prevState
;
135
feedback
(
curState
,
reward
);
136
}
137
138
// ###################################################################
139
/*
140
* Performs a feedback on the last transition.
141
* In extenso, learn from the transition.
142
* @param reachedState : the state reached after the transition
143
* @param obtainedReward : the reward obtained during the transition
144
* @warning Uses the _originalState_ and _performedAction_ stored in cache
145
* If you want to specify the original state and the performed action, see
146
* below
147
*/
148
// ###################################################################
149
void
SDYNA
::
feedback
(
const
Instantiation
&
newState
,
double
reward
) {
150
Observation
*
obs
=
new
Observation
();
151
152
for
(
auto
varIter
=
lastState_
.
variablesSequence
().
beginSafe
();
153
varIter
!=
lastState_
.
variablesSequence
().
endSafe
();
154
++
varIter
)
155
obs
->
setModality
(*
varIter
,
lastState_
.
val
(**
varIter
));
156
157
for
(
auto
varIter
=
newState
.
variablesSequence
().
beginSafe
();
158
varIter
!=
newState
.
variablesSequence
().
endSafe
();
159
++
varIter
) {
160
obs
->
setModality
(
fmdp_
->
main2prime
(*
varIter
),
newState
.
val
(**
varIter
));
161
162
if
(
this
->
_actionReward_
)
163
obs
->
setRModality
(*
varIter
,
lastState_
.
val
(**
varIter
));
164
else
165
obs
->
setRModality
(*
varIter
,
newState
.
val
(**
varIter
));
166
}
167
168
obs
->
setReward
(
reward
);
169
170
_learner_
->
addObservation
(
_lastAction_
,
obs
);
171
_bin_
.
insert
(
obs
);
172
173
setCurrentState
(
newState
);
174
_decider_
->
checkState
(
lastState_
,
_lastAction_
);
175
176
if
(
_nbObservation_
%
_observationPhaseLenght_
== 0)
makePlanning
(
_nbValueIterationStep_
);
177
178
_nbObservation_
++;
179
}
180
181
// ###################################################################
182
/*
183
* Starts a new planning
184
* @param Idx : the maximal number of value iteration performed in this
185
* planning
186
*/
187
// ###################################################################
188
void
SDYNA
::
makePlanning
(
Idx
nbValueIterationStep
) {
189
if
(
verbose_
)
std
::
cout
<<
"Updating decision trees ..."
<<
std
::
endl
;
190
_learner_
->
updateFMDP
();
191
// std::cout << << "Done" << std::endl;
192
193
if
(
verbose_
)
std
::
cout
<<
"Planning ..."
<<
std
::
endl
;
194
_planer_
->
makePlanning
(
nbValueIterationStep
);
195
// std::cout << << "Done" << std::endl;
196
197
_decider_
->
setOptimalStrategy
(
_planer_
->
optimalPolicy
());
198
}
199
200
// ##################################################################
201
/*
202
* @return the id of the action the SDyna instance wish to be performed
203
* @param the state in which we currently are
204
*/
205
// ###################################################################
206
Idx
SDYNA
::
takeAction
(
const
Instantiation
&
curState
) {
207
lastState_
=
curState
;
208
return
takeAction
();
209
}
210
211
// ###################################################################
212
/*
213
* @return the id of the action the SDyna instance wish to be performed
214
*/
215
// ###################################################################
216
Idx
SDYNA
::
takeAction
() {
217
ActionSet
actionSet
=
_decider_
->
stateOptimalPolicy
(
lastState_
);
218
if
(
actionSet
.
size
() == 1) {
219
_lastAction_
=
actionSet
[0];
220
}
else
{
221
Idx
randy
= (
Idx
)((
double
)
std
::
rand
() / (
double
)RAND_MAX *
actionSet
.
size
());
222
_lastAction_
=
actionSet
[
randy
==
actionSet
.
size
() ? 0 :
randy
];
223
}
224
return
_lastAction_
;
225
}
226
227
// ###################################################################
228
//
229
// ###################################################################
230
std
::
string
SDYNA
::
toString
() {
231
std
::
stringstream
description
;
232
233
description
<<
fmdp_
->
toString
() <<
std
::
endl
;
234
description
<<
_planer_
->
optimalPolicy2String
() <<
std
::
endl
;
235
236
return
description
.
str
();
237
}
238
239
}
// End of namespace gum
gum::Set::emplace
INLINE void emplace(Args &&... args)
Definition:
set_tpl.h:643