d5/de8/adaptiveRMaxPlaner_8h_source.html

 /***************************************************************************
  *   Copyright (C) 2005 by Christophe GONZALES and Pierre-Henri WUILLEMIN  *
  *   {prenom.nom}_at_lip6.fr                                               *
  *                                                                         *
  *   This program is free software; you can redistribute it and/or modify  *
  *   it under the terms of the GNU General Public License as published by  *
  *   the Free Software Foundation; either version 2 of the License, or     *
  *   (at your option) any later version.                                   *
  *                                                                         *
  *   This program is distributed in the hope that it will be useful,       *
  *   but WITHOUT ANY WARRANTY; without even the implied warranty of        *
  *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the         *
  *   GNU General Public License for more details.                          *
  *                                                                         *
  *   You should have received a copy of the GNU General Public License     *
  *   along with this program; if not, write to the                         *
  *   Free Software Foundation, Inc.,                                       *
  *   59 Temple Place - Suite 330, Boston, MA  02111-1307, USA.             *
  ***************************************************************************/
 // =========================================================================
 #ifndef GUM_ADAPTIVE_RMAX_PLANER_H
 #define GUM_ADAPTIVE_RMAX_PLANER_H
 // =========================================================================
 #include <agrum/FMDP/SDyna/Strategies/IDecisionStrategy.h>
 #include <agrum/FMDP/fmdp.h>
 #include <agrum/FMDP/learning/fmdpLearner.h>
 #include <agrum/FMDP/planning/structuredPlaner.h>
 #include <agrum/FMDP/simulation/statesCounter.h>
 // =========================================================================

 namespace gum {

   class AdaptiveRMaxPlaner
       : public StructuredPlaner< double >
       , public IDecisionStrategy {
     // ###################################################################
     // ###################################################################
     public:
     // ==========================================================================
     // ==========================================================================
     static AdaptiveRMaxPlaner*
        ReducedAndOrderedInstance(const ILearningStrategy* learner,
                                  double                   discountFactor = 0.9,
                                  double                   epsilon = 0.00001,
                                  bool                     verbose = true) {
       return new AdaptiveRMaxPlaner(new MDDOperatorStrategy< double >(),
                                     discountFactor,
                                     epsilon,
                                     learner,
                                     verbose);
     }

     // ==========================================================================
     // ==========================================================================
     static AdaptiveRMaxPlaner* TreeInstance(const ILearningStrategy* learner,
                                             double discountFactor = 0.9,
                                             double epsilon = 0.00001,
                                             bool   verbose = true) {
       return new AdaptiveRMaxPlaner(new TreeOperatorStrategy< double >(),
                                     discountFactor,
                                     epsilon,
                                     learner,
                                     verbose);
     }


     // ###################################################################
     // ###################################################################
     private:
     // ==========================================================================
     // ==========================================================================
     AdaptiveRMaxPlaner(IOperatorStrategy< double >* opi,
                        double                       discountFactor,
                        double                       epsilon,
                        const ILearningStrategy*     learner,
                        bool                         verbose);

     // ==========================================================================
     // ==========================================================================
     public:
     ~AdaptiveRMaxPlaner();


     // ###################################################################
     // ###################################################################

     public:
     // ==========================================================================
     // ==========================================================================
     void initialize(const FMDP< double >* fmdp);


     // ==========================================================================
     // ==========================================================================
     void makePlanning(Idx nbStep = 1000000);


     // ###################################################################
     // ###################################################################

     protected:
     // ==========================================================================
     // ==========================================================================
     virtual void _initVFunction();

     // ==========================================================================
     // ==========================================================================
     virtual MultiDimFunctionGraph< double >* _valueIteration();


     // ###################################################################
     // ###################################################################

     protected:
     // ==========================================================================
     // ==========================================================================
     virtual void _evalPolicy();


     private:
     void __makeRMaxFunctionGraphs();

     std::pair< NodeId, NodeId > __visitLearner(const IVisitableGraphLearner*,
                                                NodeId currentNodeId,
                                                MultiDimFunctionGraph< double >*,
                                                MultiDimFunctionGraph< double >*);
     void                        __clearTables();

     private:
     HashTable< Idx, MultiDimFunctionGraph< double >* > __actionsRMaxTable;
     HashTable< Idx, MultiDimFunctionGraph< double >* > __actionsBoolTable;
     const ILearningStrategy*                           __fmdpLearner;

     double __rThreshold;
     double __rmax;


     // ###################################################################
     // ###################################################################
     public:
     void checkState(const Instantiation& newState, Idx actionId) {
       if (!__initializedTable[actionId]) {
         __counterTable[actionId]->reset(newState);
         __initializedTable[actionId] = true;
       } else
         __counterTable[actionId]->incState(newState);
     }

     private:
     HashTable< Idx, StatesCounter* > __counterTable;
     HashTable< Idx, bool >           __initializedTable;

     bool __initialized;
   };

 } /* namespace gum */

 #endif   // GUM_ADAPTIVE_RMAX_PLANER_H
gum::AdaptiveRMaxPlaner::makePlanning
void makePlanning(Idx nbStep=1000000)
Performs a value iteration.
Definition: adaptiveRMaxPlaner.cpp:111

gum::AdaptiveRMaxPlaner::__counterTable
HashTable< Idx, StatesCounter *> __counterTable
Definition: adaptiveRMaxPlaner.h:207

gum::AdaptiveRMaxPlaner::__actionsBoolTable
HashTable< Idx, MultiDimFunctionGraph< double > *> __actionsBoolTable
Definition: adaptiveRMaxPlaner.h:186

gum::StructuredPlaner
<agrum/FMDP/planning/structuredPlaner.h>
Definition: structuredPlaner.h:67

gum::AdaptiveRMaxPlaner::~AdaptiveRMaxPlaner
~AdaptiveRMaxPlaner()
Default destructor.
Definition: adaptiveRMaxPlaner.cpp:73

gum::AdaptiveRMaxPlaner::_valueIteration
virtual MultiDimFunctionGraph< double > * _valueIteration()
Performs a single step of value iteration.
Definition: adaptiveRMaxPlaner.cpp:143

statesCounter.h
Headers of the States Counter class.

gum::IDecisionStrategy
<agrum/FMDP/SDyna/IDecisionStrategy.h>
Definition: IDecisionStrategy.h:48

gum::AdaptiveRMaxPlaner::ReducedAndOrderedInstance
static AdaptiveRMaxPlaner * ReducedAndOrderedInstance(const ILearningStrategy *learner, double discountFactor=0.9, double epsilon=0.00001, bool verbose=true)
Definition: adaptiveRMaxPlaner.h:62

gum::AdaptiveRMaxPlaner::__visitLearner
std::pair< NodeId, NodeId > __visitLearner(const IVisitableGraphLearner *, NodeId currentNodeId, MultiDimFunctionGraph< double > *, MultiDimFunctionGraph< double > *)
Definition: adaptiveRMaxPlaner.cpp:306

gum::IOperatorStrategy< double >

gum::IVisitableGraphLearner
<agrum/FMDP/SDyna/IVisitableGraphLearner.h>
Definition: IVisitableGraphLearner.h:50

gum::AdaptiveRMaxPlaner::__clearTables
void __clearTables()
Definition: adaptiveRMaxPlaner.cpp:342

gum::AdaptiveRMaxPlaner::__fmdpLearner
const ILearningStrategy * __fmdpLearner
Definition: adaptiveRMaxPlaner.h:187

gum::AdaptiveRMaxPlaner::AdaptiveRMaxPlaner
AdaptiveRMaxPlaner(IOperatorStrategy< double > *opi, double discountFactor, double epsilon, const ILearningStrategy *learner, bool verbose)
Default constructor.
Definition: adaptiveRMaxPlaner.cpp:60

gum::FMDP< double >

gum::MDDOperatorStrategy
<agrum/FMDP/planning/mddOperatorStrategy.h>
Definition: mddOperatorStrategy.h:47

gum
gum is the global namespace for all aGrUM entities
Definition: agrum.h:25

gum::HashTable
The class for generic Hash Tables.
Definition: hashTable.h:676

gum::AdaptiveRMaxPlaner::__initializedTable
HashTable< Idx, bool > __initializedTable
Definition: adaptiveRMaxPlaner.h:208

structuredPlaner.h
Headers of the StructuredPlaner planer class.

IDecisionStrategy.h
Headers of the Decision Strategy interface.

fmdp.h
Class for implementation of factored markov decision process.

gum::ILearningStrategy
<agrum/FMDP/SDyna/ILearningStrategy.h>
Definition: ILearningStrategy.h:52

gum::AdaptiveRMaxPlaner::__makeRMaxFunctionGraphs
void __makeRMaxFunctionGraphs()
Definition: adaptiveRMaxPlaner.cpp:235

gum::TreeOperatorStrategy
<agrum/FMDP/planning/treeOperatorStrategy.h>
Definition: treeOperatorStrategy.h:47

gum::AdaptiveRMaxPlaner::__actionsRMaxTable
HashTable< Idx, MultiDimFunctionGraph< double > *> __actionsRMaxTable
Definition: adaptiveRMaxPlaner.h:185

gum::AdaptiveRMaxPlaner::TreeInstance
static AdaptiveRMaxPlaner * TreeInstance(const ILearningStrategy *learner, double discountFactor=0.9, double epsilon=0.00001, bool verbose=true)
Definition: adaptiveRMaxPlaner.h:76

gum::MultiDimFunctionGraph< double >

gum::AdaptiveRMaxPlaner::__rThreshold
double __rThreshold
Definition: adaptiveRMaxPlaner.h:189

gum::Instantiation
Class for assigning/browsing values to tuples of discrete variables.
Definition: instantiation.h:80

gum::AdaptiveRMaxPlaner
<agrum/FMDP/planning/adaptiveRMaxPlaner.h>
Definition: adaptiveRMaxPlaner.h:50

gum::AdaptiveRMaxPlaner::initialize
void initialize(const FMDP< double > *fmdp)
Initializes data structure needed for making the planning.
Definition: adaptiveRMaxPlaner.cpp:94

gum::StructuredPlaner< double >::fmdp
INLINE const FMDP< double > * fmdp()
Returns a const ptr on the Factored Markov Decision Process on which we&#39;re planning.
Definition: structuredPlaner.h:134

gum::AdaptiveRMaxPlaner::__rmax
double __rmax
Definition: adaptiveRMaxPlaner.h:190

gum::AdaptiveRMaxPlaner::_initVFunction
virtual void _initVFunction()
Performs a single step of value iteration.
Definition: adaptiveRMaxPlaner.cpp:130

gum::Idx
Size Idx
Type for indexes.
Definition: types.h:50

gum::AdaptiveRMaxPlaner::checkState
void checkState(const Instantiation &newState, Idx actionId)
Definition: adaptiveRMaxPlaner.h:198

fmdpLearner.h
Headers of the FMDPLearner class.

gum::NodeId
Size NodeId
Type for node ids.
Definition: graphElements.h:97

gum::AdaptiveRMaxPlaner::__initialized
bool __initialized
Definition: adaptiveRMaxPlaner.h:210

gum::AdaptiveRMaxPlaner::_evalPolicy
virtual void _evalPolicy()
Perform the required tasks to extract an optimal policy.
Definition: adaptiveRMaxPlaner.cpp:191