d5/de8/adaptiveRMaxPlaner_8h_source.html

 /**
  *
  *   Copyright (c) 2005-2021 by Pierre-Henri WUILLEMIN(@LIP6) & Christophe GONZALES(@AMU)
  *   info_at_agrum_dot_org
  *
  *  This library is free software: you can redistribute it and/or modify
  *  it under the terms of the GNU Lesser General Public License as published by
  *  the Free Software Foundation, either version 3 of the License, or
  *  (at your option) any later version.
  *
  *  This library is distributed in the hope that it will be useful,
  *  but WITHOUT ANY WARRANTY; without even the implied warranty of
  *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  *  GNU Lesser General Public License for more details.
  *
  *  You should have received a copy of the GNU Lesser General Public License
  *  along with this library.  If not, see <http://www.gnu.org/licenses/>.
  *
  */


 /**
  * @file
  * @brief Headers of the RMax planer class.
  *
  * @author Pierre-Henri WUILLEMIN(@LIP6) and Jean-Christophe MAGNAN and Christophe
  * GONZALES(@AMU)
  */

 // =========================================================================
 #ifndef GUM_ADAPTIVE_RMAX_PLANER_H
 #define GUM_ADAPTIVE_RMAX_PLANER_H
 // =========================================================================
 #include <agrum/FMDP/SDyna/Strategies/IDecisionStrategy.h>
 #include <agrum/FMDP/fmdp.h>
 #include <agrum/FMDP/learning/fmdpLearner.h>
 #include <agrum/FMDP/planning/structuredPlaner.h>
 #include <agrum/FMDP/simulation/statesCounter.h>
 // =========================================================================

 namespace gum {

   /**
    * @class AdaptiveRMaxPlaner adaptiveRMaxPlaner.h
    * <agrum/FMDP/planning/adaptiveRMaxPlaner.h>
    * @brief A class to find optimal policy for a given FMDP.
    * @ingroup fmdp_group
    *
    * Perform a RMax planning on given in parameter factored markov decision
    * process
    *
    */
   class AdaptiveRMaxPlaner: public StructuredPlaner< double >, public IDecisionStrategy {
     // ###################################################################
     /// @name
     // ###################################################################
     /// @{
     public:
     // ==========================================================================
     ///
     // ==========================================================================
     static AdaptiveRMaxPlaner* ReducedAndOrderedInstance(const ILearningStrategy* learner,
                                                          double discountFactor = 0.9,
                                                          double epsilon        = 0.00001,
                                                          bool   verbose        = true) {
       return new AdaptiveRMaxPlaner(new MDDOperatorStrategy< double >(),
                                     discountFactor,
                                     epsilon,
                                     learner,
                                     verbose);
     }

     // ==========================================================================
     ///
     // ==========================================================================
     static AdaptiveRMaxPlaner* TreeInstance(const ILearningStrategy* learner,
                                             double                   discountFactor = 0.9,
                                             double                   epsilon        = 0.00001,
                                             bool                     verbose        = true) {
       return new AdaptiveRMaxPlaner(new TreeOperatorStrategy< double >(),
                                     discountFactor,
                                     epsilon,
                                     learner,
                                     verbose);
     }

     /// @}

     // ###################################################################
     /// @name Constructor & destructor.
     // ###################################################################
     /// @{
     private:
     // ==========================================================================
     /// Default constructor
     // ==========================================================================
     AdaptiveRMaxPlaner(IOperatorStrategy< double >* opi,
                        double                       discountFactor,
                        double                       epsilon,
                        const ILearningStrategy*     learner,
                        bool                         verbose);

     // ==========================================================================
     /// Default destructor
     // ==========================================================================
     public:
     ~AdaptiveRMaxPlaner();

     /// @}


     // ###################################################################
     /// @name Planning Methods
     // ###################################################################
     /// @{

     public:
     // ==========================================================================
     /**
      * Initializes data structure needed for making the planning
      * @warning No calling this methods before starting the first makePlaninng
      * will surely and definitely result in a crash
      */
     // ==========================================================================
     void initialize(const FMDP< double >* fmdp);


     // ==========================================================================
     /**
      * Performs a value iteration
      *
      * @param nbStep : enables you to specify how many value iterations you wish
      * to do.
      * makePlanning will then stop whether when optimal value function is reach
      * or when nbStep have been performed
      */
     // ==========================================================================
     void makePlanning(Idx nbStep = 1000000);

     /// @}


     // ###################################################################
     /// @name Value Iteration Methods
     // ###################################################################
     /// @{

     protected:
     // ==========================================================================
     ///
     // ==========================================================================
     virtual void initVFunction_();

     // ==========================================================================
     /// Performs a single step of value iteration
     // ==========================================================================
     virtual MultiDimFunctionGraph< double >* valueIteration_();

     /// @}


     // ###################################################################
     /// @name Optimal policy extraction methods
     // ###################################################################
     /// @{

     protected:
     // ==========================================================================
     /// Perform the required tasks to extract an optimal policy
     // ==========================================================================
     virtual void evalPolicy_();

     /// @}

     private:
     void _makeRMaxFunctionGraphs_();

     std::pair< NodeId, NodeId > _visitLearner_(const IVisitableGraphLearner*,
                                                NodeId currentNodeId,
                                                MultiDimFunctionGraph< double >*,
                                                MultiDimFunctionGraph< double >*);
     void                        _clearTables_();

     private:
     HashTable< Idx, MultiDimFunctionGraph< double >* > _actionsRMaxTable_;
     HashTable< Idx, MultiDimFunctionGraph< double >* > _actionsBoolTable_;
     const ILearningStrategy*                           _fmdpLearner_;

     double _rThreshold_;
     double _rmax_;


     // ###################################################################
     /// @name Incremental methods
     // ###################################################################
     /// @{
     public:
     void checkState(const Instantiation& newState, Idx actionId) {
       if (!_initializedTable_[actionId]) {
         _counterTable_[actionId]->reset(newState);
         _initializedTable_[actionId] = true;
       } else
         _counterTable_[actionId]->incState(newState);
     }

     private:
     HashTable< Idx, StatesCounter* > _counterTable_;
     HashTable< Idx, bool >           _initializedTable_;

     bool _initialized_;
     /// @}
   };

 } /* namespace gum */

 #endif   // GUM_ADAPTIVE_RMAX_PLANER_H
gum::AdaptiveRMaxPlaner::makePlanning
void makePlanning(Idx nbStep=1000000)
Performs a value iteration.
Definition: adaptiveRMaxPlaner.cpp:111

gum::AdaptiveRMaxPlaner::~AdaptiveRMaxPlaner
~AdaptiveRMaxPlaner()
Default destructor.
Definition: adaptiveRMaxPlaner.cpp:76

gum::Set::emplace
INLINE void emplace(Args &&... args)
Definition: set_tpl.h:643

gum::AdaptiveRMaxPlaner::_initialized_
bool _initialized_
Definition: adaptiveRMaxPlaner.h:210

gum::AdaptiveRMaxPlaner::ReducedAndOrderedInstance
static AdaptiveRMaxPlaner * ReducedAndOrderedInstance(const ILearningStrategy *learner, double discountFactor=0.9, double epsilon=0.00001, bool verbose=true)
Definition: adaptiveRMaxPlaner.h:62

gum::AdaptiveRMaxPlaner::initVFunction_
virtual void initVFunction_()
Performs a single step of value iteration.
Definition: adaptiveRMaxPlaner.cpp:130

gum::AdaptiveRMaxPlaner::_rmax_
double _rmax_
Definition: adaptiveRMaxPlaner.h:190

gum::AdaptiveRMaxPlaner::AdaptiveRMaxPlaner
AdaptiveRMaxPlaner(IOperatorStrategy< double > *opi, double discountFactor, double epsilon, const ILearningStrategy *learner, bool verbose)
Default constructor.
Definition: adaptiveRMaxPlaner.cpp:63

gum::AdaptiveRMaxPlaner::_counterTable_
HashTable< Idx, StatesCounter *> _counterTable_
Definition: adaptiveRMaxPlaner.h:207

gum::AdaptiveRMaxPlaner::_fmdpLearner_
const ILearningStrategy * _fmdpLearner_
Definition: adaptiveRMaxPlaner.h:187

gum::AdaptiveRMaxPlaner::_visitLearner_
std::pair< NodeId, NodeId > _visitLearner_(const IVisitableGraphLearner *, NodeId currentNodeId, MultiDimFunctionGraph< double > *, MultiDimFunctionGraph< double > *)
Definition: adaptiveRMaxPlaner.cpp:288

gum::AdaptiveRMaxPlaner::TreeInstance
static AdaptiveRMaxPlaner * TreeInstance(const ILearningStrategy *learner, double discountFactor=0.9, double epsilon=0.00001, bool verbose=true)
Definition: adaptiveRMaxPlaner.h:76

gum::AdaptiveRMaxPlaner::valueIteration_
virtual MultiDimFunctionGraph< double > * valueIteration_()
Performs a single step of value iteration.
Definition: adaptiveRMaxPlaner.cpp:139

gum::AdaptiveRMaxPlaner::evalPolicy_
virtual void evalPolicy_()
Perform the required tasks to extract an optimal policy.
Definition: adaptiveRMaxPlaner.cpp:183

gum::AdaptiveRMaxPlaner::_initializedTable_
HashTable< Idx, bool > _initializedTable_
Definition: adaptiveRMaxPlaner.h:208

gum::AdaptiveRMaxPlaner::_rThreshold_
double _rThreshold_
Definition: adaptiveRMaxPlaner.h:189

gum::AdaptiveRMaxPlaner::initialize
void initialize(const FMDP< double > *fmdp)
Initializes data structure needed for making the planning.
Definition: adaptiveRMaxPlaner.cpp:96

gum::AdaptiveRMaxPlaner::_makeRMaxFunctionGraphs_
void _makeRMaxFunctionGraphs_()
Definition: adaptiveRMaxPlaner.cpp:222

gum::AdaptiveRMaxPlaner::_clearTables_
void _clearTables_()
Definition: adaptiveRMaxPlaner.cpp:321

gum::AdaptiveRMaxPlaner::checkState
void checkState(const Instantiation &newState, Idx actionId)
Definition: adaptiveRMaxPlaner.h:198

gum::AdaptiveRMaxPlaner::_actionsBoolTable_
HashTable< Idx, MultiDimFunctionGraph< double > *> _actionsBoolTable_
Definition: adaptiveRMaxPlaner.h:186