aGrUM  0.14.2
E_GreedyDecider.cpp
Go to the documentation of this file.
1 /***************************************************************************
2  * Copyright (C) 2005 by Christophe GONZALES and Pierre-Henri WUILLEMIN *
3  * {prenom.nom}_at_lip6.fr *
4  * *
5  * This program is free software; you can redistribute it and/or modify *
6  * it under the terms of the GNU General Public License as published by *
7  * the Free Software Foundation; either version 2 of the License, or *
8  * (at your option) any later version. *
9  * *
10  * This program is distributed in the hope that it will be useful, *
11  * but WITHOUT ANY WARRANTY; without even the implied warranty of *
12  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the *
13  * GNU General Public License for more details. *
14  * *
15  * You should have received a copy of the GNU General Public License *
16  * along with this program; if not, write to the *
17  * Free Software Foundation, Inc., *
18  * 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. *
19  ***************************************************************************/
27 // =========================================================================
28 #include <cstdlib>
29 #include <random>
30 // =========================================================================
32 // =========================================================================
33 
34 namespace gum {
35 
36  // ==========================================================================
37  // Constructor & destructor.
38  // ==========================================================================
39 
40  // ###################################################################
44  // ###################################################################
46  GUM_CONSTRUCTOR(E_GreedyDecider);
47 
48  __sss = 1.0;
49  }
50 
51 
52  // ###################################################################
56  // ###################################################################
58 
59 
60  // ==========================================================================
61  // Initialization
62  // ==========================================================================
63 
64  // ###################################################################
68  // ###################################################################
71  for (auto varIter = fmdp->beginVariables(); varIter != fmdp->endVariables();
72  ++varIter)
73  __sss *= (double)(*varIter)->domainSize();
74  }
75 
76 
77  // ==========================================================================
78  // Incremental methods
79  // ==========================================================================
80 
81  // ###################################################################
82  /*
83  * Performs a feedback on the last transition.
84  * In extenso, learn from the transition.
85  * @param reachedState : the state reached after the transition
86  */
87  // ###################################################################
88  void E_GreedyDecider::checkState(const Instantiation& reachedState,
89  Idx actionId) {
90  if (__statecpt.nbVisitedStates() == 0)
91  __statecpt.reset(reachedState);
92  else if (!__statecpt.checkState(reachedState))
93  __statecpt.addState(reachedState);
94  }
95 
96  // ###################################################################
97  /*
98  * @param the state in which we currently are
99  * @return a set containing every optimal actions on that state
100  */
101  // ###################################################################
103  double explo = (double)std::rand() / (double)RAND_MAX;
104  double temp =
105  std::pow((__sss - (double)__statecpt.nbVisitedStates()) / __sss, 3.0);
106  double exploThreshold = temp < 0.1 ? 0.1 : temp;
107 
108  // std::cout << exploThreshold << std::endl;
109 
110  ActionSet optimalSet = IDecisionStrategy::stateOptimalPolicy(curState);
111  if (explo > exploThreshold) {
112  // std::cout << "Exploit : " << optimalSet << std::endl;
113  return optimalSet;
114  }
115 
116  if (_allActions.size() > optimalSet.size()) {
117  ActionSet ret(_allActions);
118  ret -= optimalSet;
119  // std::cout << "Explore : " << ret << std::endl;
120  return ret;
121  }
122 
123  // std::cout << "Explore : " << _allActions << std::endl;
124  return _allActions;
125  }
126 
127 } // End of namespace gum
bool checkState(const Instantiation &state)
Definition: statesChecker.h:70
Size size() const
Gives the size.
Definition: actionSet.h:208
void initialize(const FMDP< double > *fmdp)
Initializes the learner.
virtual void initialize(const FMDP< double > *fmdp)
Initializes the learner.
virtual ActionSet stateOptimalPolicy(const Instantiation &curState)
A class to store the optimal actions.
Definition: actionSet.h:85
ActionSet stateOptimalPolicy(const Instantiation &curState)
SequenceIteratorSafe< const DiscreteVariable *> beginVariables() const
Returns an iterator reference to he beginning of the list of variables.
Definition: fmdp.h:92
StatesChecker __statecpt
gum is the global namespace for all aGrUM entities
Definition: agrum.h:25
void checkState(const Instantiation &newState, Idx actionId)
void addState(const Instantiation &)
E_GreedyDecider()
Constructor.
<agrum/FMDP/decision/E_GreedyDecider.h>
Class for assigning/browsing values to tuples of discrete variables.
Definition: instantiation.h:80
void reset(const Instantiation &)
Size Idx
Type for indexes.
Definition: types.h:50
SequenceIteratorSafe< const DiscreteVariable *> endVariables() const
Returns an iterator reference to the end of the list of variables.
Definition: fmdp.h:99
~E_GreedyDecider()
Destructor.
Headers of the epsilon-greedy decision maker class.