aGrUM
0.20.3
a C++ library for (probabilistic) graphical models
E_GreedyDecider.cpp
Go to the documentation of this file.
1
/**
2
*
3
* Copyright (c) 2005-2021 by Pierre-Henri WUILLEMIN(@LIP6) & Christophe GONZALES(@AMU)
4
* info_at_agrum_dot_org
5
*
6
* This library is free software: you can redistribute it and/or modify
7
* it under the terms of the GNU Lesser General Public License as published by
8
* the Free Software Foundation, either version 3 of the License, or
9
* (at your option) any later version.
10
*
11
* This library is distributed in the hope that it will be useful,
12
* but WITHOUT ANY WARRANTY; without even the implied warranty of
13
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14
* GNU Lesser General Public License for more details.
15
*
16
* You should have received a copy of the GNU Lesser General Public License
17
* along with this library. If not, see <http://www.gnu.org/licenses/>.
18
*
19
*/
20
21
22
/**
23
* @file
24
* @brief Sources of the class.
25
*
26
* @author Pierre-Henri WUILLEMIN(@LIP6) and Jean-Christophe MAGNAN and Christophe
27
* GONZALES(@AMU)
28
*/
29
30
// =========================================================================
31
#
include
<
cstdlib
>
32
#
include
<
random
>
33
// =========================================================================
34
#
include
<
agrum
/
FMDP
/
decision
/
E_GreedyDecider
.
h
>
35
// =========================================================================
36
37
namespace
gum
{
38
39
// ==========================================================================
40
// Constructor & destructor.
41
// ==========================================================================
42
43
// ###################################################################
44
/**
45
* Constructor
46
*/
47
// ###################################################################
48
E_GreedyDecider
::
E_GreedyDecider
() {
49
GUM_CONSTRUCTOR
(
E_GreedyDecider
);
50
51
_sss_
= 1.0;
52
}
53
54
55
// ###################################################################
56
/**
57
*
58
*/
59
// ###################################################################
60
E_GreedyDecider
::~
E_GreedyDecider
() {
61
GUM_DESTRUCTOR
(
E_GreedyDecider
);
62
;
63
}
64
65
66
// ==========================================================================
67
// Initialization
68
// ==========================================================================
69
70
// ###################################################################
71
/**
72
*
73
*/
74
// ###################################################################
75
void
E_GreedyDecider
::
initialize
(
const
FMDP
<
double
>*
fmdp
) {
76
IDecisionStrategy
::
initialize
(
fmdp
);
77
for
(
auto
varIter
=
fmdp
->
beginVariables
();
varIter
!=
fmdp
->
endVariables
(); ++
varIter
)
78
_sss_
*= (
double
)(*
varIter
)->
domainSize
();
79
}
80
81
82
// ==========================================================================
83
// Incremental methods
84
// ==========================================================================
85
86
// ###################################################################
87
/*
88
* Performs a feedback on the last transition.
89
* In extenso, learn from the transition.
90
* @param reachedState : the state reached after the transition
91
*/
92
// ###################################################################
93
void
E_GreedyDecider
::
checkState
(
const
Instantiation
&
reachedState
,
Idx
actionId
) {
94
if
(
_statecpt_
.
nbVisitedStates
() == 0)
95
_statecpt_
.
reset
(
reachedState
);
96
else
if
(!
_statecpt_
.
checkState
(
reachedState
))
97
_statecpt_
.
addState
(
reachedState
);
98
}
99
100
// ###################################################################
101
/*
102
* @param the state in which we currently are
103
* @return a set containing every optimal actions on that state
104
*/
105
// ###################################################################
106
ActionSet
E_GreedyDecider
::
stateOptimalPolicy
(
const
Instantiation
&
curState
) {
107
double
explo
= (
double
)
std
::
rand
() / (
double
)RAND_MAX;
108
double
temp
=
std
::
pow
((
_sss_
- (
double
)
_statecpt_
.
nbVisitedStates
()) /
_sss_
, 3.0);
109
double
exploThreshold
=
temp
< 0.1 ? 0.1 :
temp
;
110
111
// std::cout << exploThreshold << std::endl;
112
113
ActionSet
optimalSet
=
IDecisionStrategy
::
stateOptimalPolicy
(
curState
);
114
if
(
explo
>
exploThreshold
) {
115
// std::cout << "Exploit : " << optimalSet << std::endl;
116
return
optimalSet
;
117
}
118
119
if
(
allActions_
.
size
() >
optimalSet
.
size
()) {
120
ActionSet
ret
(
allActions_
);
121
ret
-=
optimalSet
;
122
// std::cout << "Explore : " << ret << std::endl;
123
return
ret
;
124
}
125
126
// std::cout << "Explore : " << allActions_ << std::endl;
127
return
allActions_
;
128
}
129
130
}
// End of namespace gum
gum::Set::emplace
INLINE void emplace(Args &&... args)
Definition:
set_tpl.h:643