aGrUM  0.16.0
E_GreedyDecider.cpp
Go to the documentation of this file.
1 
30 // =========================================================================
31 #include <cstdlib>
32 #include <random>
33 // =========================================================================
35 // =========================================================================
36 
37 namespace gum {
38 
39  // ==========================================================================
40  // Constructor & destructor.
41  // ==========================================================================
42 
43  // ###################################################################
47  // ###################################################################
49  GUM_CONSTRUCTOR(E_GreedyDecider);
50 
51  __sss = 1.0;
52  }
53 
54 
55  // ###################################################################
59  // ###################################################################
61 
62 
63  // ==========================================================================
64  // Initialization
65  // ==========================================================================
66 
67  // ###################################################################
71  // ###################################################################
74  for (auto varIter = fmdp->beginVariables(); varIter != fmdp->endVariables();
75  ++varIter)
76  __sss *= (double)(*varIter)->domainSize();
77  }
78 
79 
80  // ==========================================================================
81  // Incremental methods
82  // ==========================================================================
83 
84  // ###################################################################
85  /*
86  * Performs a feedback on the last transition.
87  * In extenso, learn from the transition.
88  * @param reachedState : the state reached after the transition
89  */
90  // ###################################################################
91  void E_GreedyDecider::checkState(const Instantiation& reachedState,
92  Idx actionId) {
93  if (__statecpt.nbVisitedStates() == 0)
94  __statecpt.reset(reachedState);
95  else if (!__statecpt.checkState(reachedState))
96  __statecpt.addState(reachedState);
97  }
98 
99  // ###################################################################
100  /*
101  * @param the state in which we currently are
102  * @return a set containing every optimal actions on that state
103  */
104  // ###################################################################
106  double explo = (double)std::rand() / (double)RAND_MAX;
107  double temp =
108  std::pow((__sss - (double)__statecpt.nbVisitedStates()) / __sss, 3.0);
109  double exploThreshold = temp < 0.1 ? 0.1 : temp;
110 
111  // std::cout << exploThreshold << std::endl;
112 
113  ActionSet optimalSet = IDecisionStrategy::stateOptimalPolicy(curState);
114  if (explo > exploThreshold) {
115  // std::cout << "Exploit : " << optimalSet << std::endl;
116  return optimalSet;
117  }
118 
119  if (_allActions.size() > optimalSet.size()) {
120  ActionSet ret(_allActions);
121  ret -= optimalSet;
122  // std::cout << "Explore : " << ret << std::endl;
123  return ret;
124  }
125 
126  // std::cout << "Explore : " << _allActions << std::endl;
127  return _allActions;
128  }
129 
130 } // End of namespace gum
bool checkState(const Instantiation &state)
Definition: statesChecker.h:73
Size size() const
Gives the size.
Definition: actionSet.h:211
void initialize(const FMDP< double > *fmdp)
Initializes the learner.
virtual void initialize(const FMDP< double > *fmdp)
Initializes the learner.
virtual ActionSet stateOptimalPolicy(const Instantiation &curState)
A class to store the optimal actions.
Definition: actionSet.h:88
ActionSet stateOptimalPolicy(const Instantiation &curState)
SequenceIteratorSafe< const DiscreteVariable *> beginVariables() const
Returns an iterator reference to he beginning of the list of variables.
Definition: fmdp.h:95
StatesChecker __statecpt
Copyright 2005-2019 Pierre-Henri WUILLEMIN et Christophe GONZALES (LIP6) {prenom.nom}_at_lip6.fr.
Definition: agrum.h:25
void checkState(const Instantiation &newState, Idx actionId)
void addState(const Instantiation &)
E_GreedyDecider()
Constructor.
<agrum/FMDP/decision/E_GreedyDecider.h>
Class for assigning/browsing values to tuples of discrete variables.
Definition: instantiation.h:83
void reset(const Instantiation &)
Size Idx
Type for indexes.
Definition: types.h:53
SequenceIteratorSafe< const DiscreteVariable *> endVariables() const
Returns an iterator reference to the end of the list of variables.
Definition: fmdp.h:102
~E_GreedyDecider()
Destructor.
Copyright 2005-2019 Pierre-Henri WUILLEMIN et Christophe GONZALES (LIP6) {prenom.nom}_at_lip6.fr.