d5/de8/adaptiveRMaxPlaner_8h_source.html

 /**
  *
  *   Copyright 2005-2020 Pierre-Henri WUILLEMIN(@LIP6) & Christophe GONZALES(@AMU)
  *   info_at_agrum_dot_org
  *
  *  This library is free software: you can redistribute it and/or modify
  *  it under the terms of the GNU Lesser General Public License as published by
  *  the Free Software Foundation, either version 3 of the License, or
  *  (at your option) any later version.
  *
  *  This library is distributed in the hope that it will be useful,
  *  but WITHOUT ANY WARRANTY; without even the implied warranty of
  *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  *  GNU Lesser General Public License for more details.
  *
  *  You should have received a copy of the GNU Lesser General Public License
  *  along with this library.  If not, see <http://www.gnu.org/licenses/>.
  *
  */


 /**
  * @file
  * @brief Headers of the RMax planer class.
  *
  * @author Pierre-Henri WUILLEMIN(@LIP6) and Jean-Christophe MAGNAN and Christophe
  * GONZALES(@AMU)
  */

 // =========================================================================
 #ifndef GUM_ADAPTIVE_RMAX_PLANER_H
 #define GUM_ADAPTIVE_RMAX_PLANER_H
 // =========================================================================
 #include <agrum/FMDP/SDyna/Strategies/IDecisionStrategy.h>
 #include <agrum/FMDP/fmdp.h>
 #include <agrum/FMDP/learning/fmdpLearner.h>
 #include <agrum/FMDP/planning/structuredPlaner.h>
 #include <agrum/FMDP/simulation/statesCounter.h>
 // =========================================================================

 namespace gum {

   /**
    * @class AdaptiveRMaxPlaner adaptiveRMaxPlaner.h
    * <agrum/FMDP/planning/adaptiveRMaxPlaner.h>
    * @brief A class to find optimal policy for a given FMDP.
    * @ingroup fmdp_group
    *
    * Perform a RMax planning on given in parameter factored markov decision
    * process
    *
    */
   class AdaptiveRMaxPlaner:
       public StructuredPlaner< double >,
       public IDecisionStrategy {
     // ###################################################################
     /// @name
     // ###################################################################
     /// @{
     public:
     // ==========================================================================
     ///
     // ==========================================================================
     static AdaptiveRMaxPlaner*
        ReducedAndOrderedInstance(const ILearningStrategy* learner,
                                  double                   discountFactor = 0.9,
                                  double                   epsilon        = 0.00001,
                                  bool                     verbose        = true) {
       return new AdaptiveRMaxPlaner(new MDDOperatorStrategy< double >(),
                                     discountFactor,
                                     epsilon,
                                     learner,
                                     verbose);
     }

     // ==========================================================================
     ///
     // ==========================================================================
     static AdaptiveRMaxPlaner* TreeInstance(const ILearningStrategy* learner,
                                             double discountFactor = 0.9,
                                             double epsilon        = 0.00001,
                                             bool   verbose        = true) {
       return new AdaptiveRMaxPlaner(new TreeOperatorStrategy< double >(),
                                     discountFactor,
                                     epsilon,
                                     learner,
                                     verbose);
     }

     /// @}

     // ###################################################################
     /// @name Constructor & destructor.
     // ###################################################################
     /// @{
     private:
     // ==========================================================================
     /// Default constructor
     // ==========================================================================
     AdaptiveRMaxPlaner(IOperatorStrategy< double >* opi,
                        double                       discountFactor,
                        double                       epsilon,
                        const ILearningStrategy*     learner,
                        bool                         verbose);

     // ==========================================================================
     /// Default destructor
     // ==========================================================================
     public:
     ~AdaptiveRMaxPlaner();

     /// @}


     // ###################################################################
     /// @name Planning Methods
     // ###################################################################
     /// @{

     public:
     // ==========================================================================
     /**
      * Initializes data structure needed for making the planning
      * @warning No calling this methods before starting the first makePlaninng
      * will surely and definitely result in a crash
      */
     // ==========================================================================
     void initialize(const FMDP< double >* fmdp);


     // ==========================================================================
     /**
      * Performs a value iteration
      *
      * @param nbStep : enables you to specify how many value iterations you wish
      * to do.
      * makePlanning will then stop whether when optimal value function is reach
      * or when nbStep have been performed
      */
     // ==========================================================================
     void makePlanning(Idx nbStep = 1000000);

     /// @}


     // ###################################################################
     /// @name Value Iteration Methods
     // ###################################################################
     /// @{

     protected:
     // ==========================================================================
     ///
     // ==========================================================================
     virtual void initVFunction_();

     // ==========================================================================
     /// Performs a single step of value iteration
     // ==========================================================================
     virtual MultiDimFunctionGraph< double >* valueIteration_();

     /// @}


     // ###################################################################
     /// @name Optimal policy extraction methods
     // ###################################################################
     /// @{

     protected:
     // ==========================================================================
     /// Perform the required tasks to extract an optimal policy
     // ==========================================================================
     virtual void evalPolicy_();

     /// @}

     private:
     void makeRMaxFunctionGraphs__();

     std::pair< NodeId, NodeId > visitLearner__(const IVisitableGraphLearner*,
                                                NodeId currentNodeId,
                                                MultiDimFunctionGraph< double >*,
                                                MultiDimFunctionGraph< double >*);
     void                        clearTables__();

     private:
     HashTable< Idx, MultiDimFunctionGraph< double >* > actionsRMaxTable__;
     HashTable< Idx, MultiDimFunctionGraph< double >* > actionsBoolTable__;
     const ILearningStrategy*                           fmdpLearner__;

     double rThreshold__;
     double rmax__;


     // ###################################################################
     /// @name Incremental methods
     // ###################################################################
     /// @{
     public:
     void checkState(const Instantiation& newState, Idx actionId) {
       if (!initializedTable__[actionId]) {
         counterTable__[actionId]->reset(newState);
         initializedTable__[actionId] = true;
       } else
         counterTable__[actionId]->incState(newState);
     }

     private:
     HashTable< Idx, StatesCounter* > counterTable__;
     HashTable< Idx, bool >           initializedTable__;

     bool initialized__;
     /// @}
   };

 } /* namespace gum */

 #endif   // GUM_ADAPTIVE_RMAX_PLANER_H
gum::AdaptiveRMaxPlaner::makePlanning
void makePlanning(Idx nbStep=1000000)
Performs a value iteration.
Definition: adaptiveRMaxPlaner.cpp:114

gum::AdaptiveRMaxPlaner::~AdaptiveRMaxPlaner
~AdaptiveRMaxPlaner()
Default destructor.
Definition: adaptiveRMaxPlaner.cpp:76

gum::Set::emplace
INLINE void emplace(Args &&... args)
Definition: set_tpl.h:669

gum::AdaptiveRMaxPlaner::ReducedAndOrderedInstance
static AdaptiveRMaxPlaner * ReducedAndOrderedInstance(const ILearningStrategy *learner, double discountFactor=0.9, double epsilon=0.00001, bool verbose=true)
Definition: adaptiveRMaxPlaner.h:65

gum::AdaptiveRMaxPlaner::initVFunction_
virtual void initVFunction_()
Performs a single step of value iteration.
Definition: adaptiveRMaxPlaner.cpp:133

gum::AdaptiveRMaxPlaner::initialized__
bool initialized__
Definition: adaptiveRMaxPlaner.h:213

gum::AdaptiveRMaxPlaner::AdaptiveRMaxPlaner
AdaptiveRMaxPlaner(IOperatorStrategy< double > *opi, double discountFactor, double epsilon, const ILearningStrategy *learner, bool verbose)
Default constructor.
Definition: adaptiveRMaxPlaner.cpp:63

gum::AdaptiveRMaxPlaner::visitLearner__
std::pair< NodeId, NodeId > visitLearner__(const IVisitableGraphLearner *, NodeId currentNodeId, MultiDimFunctionGraph< double > *, MultiDimFunctionGraph< double > *)
Definition: adaptiveRMaxPlaner.cpp:311

gum::AdaptiveRMaxPlaner::fmdpLearner__
const ILearningStrategy * fmdpLearner__
Definition: adaptiveRMaxPlaner.h:190

gum::AdaptiveRMaxPlaner::rThreshold__
double rThreshold__
Definition: adaptiveRMaxPlaner.h:192

gum::AdaptiveRMaxPlaner::actionsBoolTable__
HashTable< Idx, MultiDimFunctionGraph< double > *> actionsBoolTable__
Definition: adaptiveRMaxPlaner.h:189

gum::AdaptiveRMaxPlaner::TreeInstance
static AdaptiveRMaxPlaner * TreeInstance(const ILearningStrategy *learner, double discountFactor=0.9, double epsilon=0.00001, bool verbose=true)
Definition: adaptiveRMaxPlaner.h:79

gum::AdaptiveRMaxPlaner::valueIteration_
virtual MultiDimFunctionGraph< double > * valueIteration_()
Performs a single step of value iteration.
Definition: adaptiveRMaxPlaner.cpp:147

gum::AdaptiveRMaxPlaner::evalPolicy_
virtual void evalPolicy_()
Perform the required tasks to extract an optimal policy.
Definition: adaptiveRMaxPlaner.cpp:195

gum::AdaptiveRMaxPlaner::counterTable__
HashTable< Idx, StatesCounter *> counterTable__
Definition: adaptiveRMaxPlaner.h:210

gum::AdaptiveRMaxPlaner::initializedTable__
HashTable< Idx, bool > initializedTable__
Definition: adaptiveRMaxPlaner.h:211

gum::AdaptiveRMaxPlaner::rmax__
double rmax__
Definition: adaptiveRMaxPlaner.h:193

gum::AdaptiveRMaxPlaner::clearTables__
void clearTables__()
Definition: adaptiveRMaxPlaner.cpp:350

gum::AdaptiveRMaxPlaner::initialize
void initialize(const FMDP< double > *fmdp)
Initializes data structure needed for making the planning.
Definition: adaptiveRMaxPlaner.cpp:97

gum::AdaptiveRMaxPlaner::makeRMaxFunctionGraphs__
void makeRMaxFunctionGraphs__()
Definition: adaptiveRMaxPlaner.cpp:240

gum::AdaptiveRMaxPlaner::checkState
void checkState(const Instantiation &newState, Idx actionId)
Definition: adaptiveRMaxPlaner.h:201