aGrUM
0.20.2
a C++ library for (probabilistic) graphical models
E_GreedyDecider.cpp
Go to the documentation of this file.
1
/**
2
*
3
* Copyright 2005-2020 Pierre-Henri WUILLEMIN(@LIP6) & Christophe GONZALES(@AMU)
4
* info_at_agrum_dot_org
5
*
6
* This library is free software: you can redistribute it and/or modify
7
* it under the terms of the GNU Lesser General Public License as published by
8
* the Free Software Foundation, either version 3 of the License, or
9
* (at your option) any later version.
10
*
11
* This library is distributed in the hope that it will be useful,
12
* but WITHOUT ANY WARRANTY; without even the implied warranty of
13
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14
* GNU Lesser General Public License for more details.
15
*
16
* You should have received a copy of the GNU Lesser General Public License
17
* along with this library. If not, see <http://www.gnu.org/licenses/>.
18
*
19
*/
20
21
22
/**
23
* @file
24
* @brief Sources of the class.
25
*
26
* @author Pierre-Henri WUILLEMIN(@LIP6) and Jean-Christophe MAGNAN and Christophe
27
* GONZALES(@AMU)
28
*/
29
30
// =========================================================================
31
#
include
<
cstdlib
>
32
#
include
<
random
>
33
// =========================================================================
34
#
include
<
agrum
/
FMDP
/
decision
/
E_GreedyDecider
.
h
>
35
// =========================================================================
36
37
namespace
gum
{
38
39
// ==========================================================================
40
// Constructor & destructor.
41
// ==========================================================================
42
43
// ###################################################################
44
/**
45
* Constructor
46
*/
47
// ###################################################################
48
E_GreedyDecider
::
E_GreedyDecider
() {
49
GUM_CONSTRUCTOR
(
E_GreedyDecider
);
50
51
sss__
= 1.0;
52
}
53
54
55
// ###################################################################
56
/**
57
*
58
*/
59
// ###################################################################
60
E_GreedyDecider
::~
E_GreedyDecider
() {
GUM_DESTRUCTOR
(
E_GreedyDecider
); }
61
62
63
// ==========================================================================
64
// Initialization
65
// ==========================================================================
66
67
// ###################################################################
68
/**
69
*
70
*/
71
// ###################################################################
72
void
E_GreedyDecider
::
initialize
(
const
FMDP
<
double
>*
fmdp
) {
73
IDecisionStrategy
::
initialize
(
fmdp
);
74
for
(
auto
varIter
=
fmdp
->
beginVariables
();
varIter
!=
fmdp
->
endVariables
();
75
++
varIter
)
76
sss__
*= (
double
)(*
varIter
)->
domainSize
();
77
}
78
79
80
// ==========================================================================
81
// Incremental methods
82
// ==========================================================================
83
84
// ###################################################################
85
/*
86
* Performs a feedback on the last transition.
87
* In extenso, learn from the transition.
88
* @param reachedState : the state reached after the transition
89
*/
90
// ###################################################################
91
void
E_GreedyDecider
::
checkState
(
const
Instantiation
&
reachedState
,
92
Idx
actionId
) {
93
if
(
statecpt__
.
nbVisitedStates
() == 0)
94
statecpt__
.
reset
(
reachedState
);
95
else
if
(!
statecpt__
.
checkState
(
reachedState
))
96
statecpt__
.
addState
(
reachedState
);
97
}
98
99
// ###################################################################
100
/*
101
* @param the state in which we currently are
102
* @return a set containing every optimal actions on that state
103
*/
104
// ###################################################################
105
ActionSet
E_GreedyDecider
::
stateOptimalPolicy
(
const
Instantiation
&
curState
) {
106
double
explo
= (
double
)
std
::
rand
() / (
double
)RAND_MAX;
107
double
temp
108
=
std
::
pow
((
sss__
- (
double
)
statecpt__
.
nbVisitedStates
()) /
sss__
, 3.0);
109
double
exploThreshold
=
temp
< 0.1 ? 0.1 :
temp
;
110
111
// std::cout << exploThreshold << std::endl;
112
113
ActionSet
optimalSet
=
IDecisionStrategy
::
stateOptimalPolicy
(
curState
);
114
if
(
explo
>
exploThreshold
) {
115
// std::cout << "Exploit : " << optimalSet << std::endl;
116
return
optimalSet
;
117
}
118
119
if
(
allActions_
.
size
() >
optimalSet
.
size
()) {
120
ActionSet
ret
(
allActions_
);
121
ret
-=
optimalSet
;
122
// std::cout << "Explore : " << ret << std::endl;
123
return
ret
;
124
}
125
126
// std::cout << "Explore : " << allActions_ << std::endl;
127
return
allActions_
;
128
}
129
130
}
// End of namespace gum
gum::Set::emplace
INLINE void emplace(Args &&... args)
Definition:
set_tpl.h:669