d0/db0/E__GreedyDecider_8cpp_source.html

 /***************************************************************************
  *  Copyright (C) 2005 by Christophe GONZALES and Pierre-Henri WUILLEMIN   *
  *  {prenom.nom}_at_lip6.fr                                                *
  *                                                                         *
  *  This program is free software; you can redistribute it and/or modify   *
  *  it under the terms of the GNU General Public License as published by   *
  *  the Free Software Foundation; either version 2 of the License, or      *
  *  (at your option) any later version.                                    *
  *                                                                         *
  *  This program is distributed in the hope that it will be useful,        *
  *  but WITHOUT ANY WARRANTY; without even the implied warranty of         *
  *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the           *
  *  GNU General Public License for more details.                           *
  *                                                                         *
  *  You should have received a copy of the GNU General Public License      *
  *  along with this program; if not, write to the                          *
  *  Free Software Foundation, Inc.,                                        *
  *  59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.               *
  ***************************************************************************/
 // =========================================================================
 #include <cstdlib>
 #include <random>
 // =========================================================================
 #include <agrum/FMDP/decision/E_GreedyDecider.h>
 // =========================================================================

 namespace gum {

   // ==========================================================================
   // Constructor & destructor.
   // ==========================================================================

   // ###################################################################
   // ###################################################################
   E_GreedyDecider::E_GreedyDecider() {
     GUM_CONSTRUCTOR(E_GreedyDecider);

     __sss = 1.0;
   }


   // ###################################################################
   // ###################################################################
   E_GreedyDecider::~E_GreedyDecider() { GUM_DESTRUCTOR(E_GreedyDecider); }


   // ==========================================================================
   // Initialization
   // ==========================================================================

   // ###################################################################
   // ###################################################################
   void E_GreedyDecider::initialize(const FMDP< double >* fmdp) {
     IDecisionStrategy::initialize(fmdp);
     for (auto varIter = fmdp->beginVariables(); varIter != fmdp->endVariables();
          ++varIter)
       __sss *= (double)(*varIter)->domainSize();
   }


   // ==========================================================================
   // Incremental methods
   // ==========================================================================

   // ###################################################################
   /*
    * Performs a feedback on the last transition.
    * In extenso, learn from the transition.
    * @param reachedState : the state reached after the transition
    */
   // ###################################################################
   void E_GreedyDecider::checkState(const Instantiation& reachedState,
                                    Idx                  actionId) {
     if (__statecpt.nbVisitedStates() == 0)
       __statecpt.reset(reachedState);
     else if (!__statecpt.checkState(reachedState))
       __statecpt.addState(reachedState);
   }

   // ###################################################################
   /*
    * @param the state in which we currently are
    * @return a set containing every optimal actions on that state
    */
   // ###################################################################
   ActionSet E_GreedyDecider::stateOptimalPolicy(const Instantiation& curState) {
     double explo = (double)std::rand() / (double)RAND_MAX;
     double temp =
        std::pow((__sss - (double)__statecpt.nbVisitedStates()) / __sss, 3.0);
     double exploThreshold = temp < 0.1 ? 0.1 : temp;

     //      std::cout << exploThreshold << std::endl;

     ActionSet optimalSet = IDecisionStrategy::stateOptimalPolicy(curState);
     if (explo > exploThreshold) {
       //        std::cout << "Exploit : " << optimalSet << std::endl;
       return optimalSet;
     }

     if (_allActions.size() > optimalSet.size()) {
       ActionSet ret(_allActions);
       ret -= optimalSet;
       //        std::cout << "Explore : " << ret << std::endl;
       return ret;
     }

     //      std::cout << "Explore : " << _allActions << std::endl;
     return _allActions;
   }

 }   // End of namespace gum
gum::StatesChecker::checkState
bool checkState(const Instantiation &state)
Definition: statesChecker.h:70

gum::ActionSet::size
Size size() const
Gives the size.
Definition: actionSet.h:208

gum::E_GreedyDecider::initialize
void initialize(const FMDP< double > *fmdp)
Initializes the learner.
Definition: E_GreedyDecider.cpp:69

gum::IDecisionStrategy::initialize
virtual void initialize(const FMDP< double > *fmdp)
Initializes the learner.
Definition: IDecisionStrategy.h:69

gum::IDecisionStrategy::stateOptimalPolicy
virtual ActionSet stateOptimalPolicy(const Instantiation &curState)
Definition: IDecisionStrategy.h:94

gum::ActionSet
A class to store the optimal actions.
Definition: actionSet.h:85

gum::E_GreedyDecider::stateOptimalPolicy
ActionSet stateOptimalPolicy(const Instantiation &curState)
Definition: E_GreedyDecider.cpp:102

gum::E_GreedyDecider::__sss
double __sss
Definition: E_GreedyDecider.h:97

double

gum::FMDP::beginVariables
SequenceIteratorSafe< const DiscreteVariable *> beginVariables() const
Returns an iterator reference to he beginning of the list of variables.
Definition: fmdp.h:92

gum::E_GreedyDecider::__statecpt
StatesChecker __statecpt
Definition: E_GreedyDecider.h:96

gum::FMDP< double >

gum
gum is the global namespace for all aGrUM entities
Definition: agrum.h:25

gum::E_GreedyDecider::checkState
void checkState(const Instantiation &newState, Idx actionId)
Definition: E_GreedyDecider.cpp:88

gum::StatesChecker::addState
void addState(const Instantiation &)
Definition: statesChecker.cpp:53

gum::E_GreedyDecider::E_GreedyDecider
E_GreedyDecider()
Constructor.
Definition: E_GreedyDecider.cpp:45

gum::E_GreedyDecider
<agrum/FMDP/decision/E_GreedyDecider.h>
Definition: E_GreedyDecider.h:53

gum::StatesChecker::nbVisitedStates
Idx nbVisitedStates()
Definition: statesChecker.h:74

gum::Instantiation
Class for assigning/browsing values to tuples of discrete variables.
Definition: instantiation.h:80

gum::StatesChecker::reset
void reset(const Instantiation &)
Definition: statesChecker.cpp:38

gum::IDecisionStrategy::_allActions
ActionSet _allActions
Definition: IDecisionStrategy.h:104

gum::Idx
Size Idx
Type for indexes.
Definition: types.h:50

gum::FMDP::endVariables
SequenceIteratorSafe< const DiscreteVariable *> endVariables() const
Returns an iterator reference to the end of the list of variables.
Definition: fmdp.h:99

gum::E_GreedyDecider::~E_GreedyDecider
~E_GreedyDecider()
Destructor.
Definition: E_GreedyDecider.cpp:57

E_GreedyDecider.h
Headers of the epsilon-greedy decision maker class.