d9/d31/sdyna_8h_source.html

 /**
  *
  *   Copyright (c) 2005-2021 by Pierre-Henri WUILLEMIN(@LIP6) & Christophe GONZALES(@AMU)
  *   info_at_agrum_dot_org
  *
  *  This library is free software: you can redistribute it and/or modify
  *  it under the terms of the GNU Lesser General Public License as published by
  *  the Free Software Foundation, either version 3 of the License, or
  *  (at your option) any later version.
  *
  *  This library is distributed in the hope that it will be useful,
  *  but WITHOUT ANY WARRANTY; without even the implied warranty of
  *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  *  GNU Lesser General Public License for more details.
  *
  *  You should have received a copy of the GNU Lesser General Public License
  *  along with this library.  If not, see <http://www.gnu.org/licenses/>.
  *
  */


 /**
  * @file
  * @brief Headers of the SDyna abstract class.
  *
  * @author Pierre-Henri WUILLEMIN(@LIP6) and Jean-Christophe MAGNAN and Christophe
  * GONZALES(@AMU)
  */

 // =========================================================================
 #ifndef GUM_SDYNA_H
 #define GUM_SDYNA_H
 // =========================================================================
 #include <agrum/agrum.h>
 #include <agrum/tools/multidim/instantiation.h>
 // =========================================================================
 #include <agrum/tools/variables/discreteVariable.h>
 // =========================================================================
 #include <agrum/FMDP/SDyna/Strategies/IDecisionStrategy.h>
 #include <agrum/FMDP/SDyna/Strategies/ILearningStrategy.h>
 #include <agrum/FMDP/SDyna/Strategies/IPlanningStrategy.h>
 #include <agrum/FMDP/decision/E_GreedyDecider.h>
 #include <agrum/FMDP/decision/lazyDecider.h>
 #include <agrum/FMDP/decision/randomDecider.h>
 #include <agrum/FMDP/decision/statisticalLazyDecider.h>
 #include <agrum/FMDP/fmdp.h>
 #include <agrum/FMDP/learning/fmdpLearner.h>
 #include <agrum/FMDP/learning/observation.h>
 #include <agrum/FMDP/planning/actionSet.h>
 #include <agrum/FMDP/planning/adaptiveRMaxPlaner.h>
 #include <agrum/FMDP/planning/structuredPlaner.h>
 // =========================================================================

 namespace gum {

   /**
    * @class SDYNA
    * @headerfile sdyna.h <agrum/FMDP/SDyna/sdyna.h>
    * @brief
    * @ingroup fmdp_group
    *
    * The general SDyna architecture abstract class.
    * Instance of SDyna architecture should inherit
    *
    */
   class SDYNA {
     // ###################################################################
     /// @name
     // ###################################################################
     /// @
     public:
     // ==========================================================================
     ///
     // ==========================================================================
     static SDYNA* spitiInstance(double attributeSelectionThreshold = 0.99,
                                 double discountFactor              = 0.9,
                                 double epsilon                     = 1,
                                 Idx    observationPhaseLenght      = 100,
                                 Idx    nbValueIterationStep        = 10) {
       bool               actionReward = false;
       ILearningStrategy* ls
          = new FMDPLearner< CHI2TEST, CHI2TEST, ITILEARNER >(attributeSelectionThreshold,
                                                              actionReward);
       IPlanningStrategy< double >* ps
          = StructuredPlaner< double >::sviInstance(discountFactor, epsilon);
       IDecisionStrategy* ds = new E_GreedyDecider();
       return new SDYNA(ls, ps, ds, observationPhaseLenght, nbValueIterationStep, actionReward);
     }

     // ==========================================================================
     ///
     // ==========================================================================
     static SDYNA* spimddiInstance(double attributeSelectionThreshold = 0.99,
                                   double similarityThreshold         = 0.3,
                                   double discountFactor              = 0.9,
                                   double epsilon                     = 1,
                                   Idx    observationPhaseLenght      = 100,
                                   Idx    nbValueIterationStep        = 10) {
       bool               actionReward = false;
       ILearningStrategy* ls
          = new FMDPLearner< GTEST, GTEST, IMDDILEARNER >(attributeSelectionThreshold,
                                                          actionReward,
                                                          similarityThreshold);
       IPlanningStrategy< double >* ps
          = StructuredPlaner< double >::spumddInstance(discountFactor, epsilon, false);
       IDecisionStrategy* ds = new E_GreedyDecider();
       return new SDYNA(ls,
                        ps,
                        ds,
                        observationPhaseLenght,
                        nbValueIterationStep,
                        actionReward,
                        false);
     }

     // ==========================================================================
     ///
     // ==========================================================================
     static SDYNA* RMaxMDDInstance(double attributeSelectionThreshold = 0.99,
                                   double similarityThreshold         = 0.3,
                                   double discountFactor              = 0.9,
                                   double epsilon                     = 1,
                                   Idx    observationPhaseLenght      = 100,
                                   Idx    nbValueIterationStep        = 10) {
       bool               actionReward = true;
       ILearningStrategy* ls
          = new FMDPLearner< GTEST, GTEST, IMDDILEARNER >(attributeSelectionThreshold,
                                                          actionReward,
                                                          similarityThreshold);
       AdaptiveRMaxPlaner* rm
          = AdaptiveRMaxPlaner::ReducedAndOrderedInstance(ls, discountFactor, epsilon);
       IPlanningStrategy< double >* ps = rm;
       IDecisionStrategy*           ds = rm;
       return new SDYNA(ls, ps, ds, observationPhaseLenght, nbValueIterationStep, actionReward);
     }

     // ==========================================================================
     ///
     // ==========================================================================
     static SDYNA* RMaxTreeInstance(double attributeSelectionThreshold = 0.99,
                                    double discountFactor              = 0.9,
                                    double epsilon                     = 1,
                                    Idx    observationPhaseLenght      = 100,
                                    Idx    nbValueIterationStep        = 10) {
       bool               actionReward = true;
       ILearningStrategy* ls
          = new FMDPLearner< GTEST, GTEST, ITILEARNER >(attributeSelectionThreshold, actionReward);
       AdaptiveRMaxPlaner* rm = AdaptiveRMaxPlaner::TreeInstance(ls, discountFactor, epsilon);
       IPlanningStrategy< double >* ps = rm;
       IDecisionStrategy*           ds = rm;
       return new SDYNA(ls, ps, ds, observationPhaseLenght, nbValueIterationStep, actionReward);
     }

     // ==========================================================================
     ///
     // ==========================================================================
     static SDYNA* RandomMDDInstance(double attributeSelectionThreshold = 0.99,
                                     double similarityThreshold         = 0.3,
                                     double discountFactor              = 0.9,
                                     double epsilon                     = 1,
                                     Idx    observationPhaseLenght      = 100,
                                     Idx    nbValueIterationStep        = 10) {
       bool               actionReward = true;
       ILearningStrategy* ls
          = new FMDPLearner< GTEST, GTEST, IMDDILEARNER >(attributeSelectionThreshold,
                                                          actionReward,
                                                          similarityThreshold);
       IPlanningStrategy< double >* ps
          = StructuredPlaner< double >::spumddInstance(discountFactor, epsilon);
       IDecisionStrategy* ds = new RandomDecider();
       return new SDYNA(ls, ps, ds, observationPhaseLenght, nbValueIterationStep, actionReward);
     }

     // ==========================================================================
     ///
     // ==========================================================================
     static SDYNA* RandomTreeInstance(double attributeSelectionThreshold = 0.99,
                                      double discountFactor              = 0.9,
                                      double epsilon                     = 1,
                                      Idx    observationPhaseLenght      = 100,
                                      Idx    nbValueIterationStep        = 10) {
       bool               actionReward = true;
       ILearningStrategy* ls
          = new FMDPLearner< CHI2TEST, CHI2TEST, ITILEARNER >(attributeSelectionThreshold,
                                                              actionReward);
       IPlanningStrategy< double >* ps
          = StructuredPlaner< double >::sviInstance(discountFactor, epsilon);
       IDecisionStrategy* ds = new RandomDecider();
       return new SDYNA(ls, ps, ds, observationPhaseLenght, nbValueIterationStep, actionReward);
     }


     /// @}

     // ###################################################################
     /// @name Constructor & destructor.
     // ###################################################################
     /// @{

     // ==========================================================================
     /**
      * Constructor
      *
      * @return an instance of SDyna architecture
      */
     // ==========================================================================
     private:
     SDYNA(ILearningStrategy*           learner,
           IPlanningStrategy< double >* planer,
           IDecisionStrategy*           decider,
           Idx                          observationPhaseLenght,
           Idx                          nbValueIterationStep,
           bool                         actionReward,
           bool                         verbose = true);

     // ==========================================================================
     /// Destructor
     // ==========================================================================
     public:
     ~SDYNA();

     /// @}


     // ###################################################################
     /// @name Problem specification methods
     // ###################################################################
     /// @{
     public:
     // ==========================================================================
     /**
      * Inserts a new action in the SDyna instance.
      * @warning Without effect until method initialize is called
      * @param actionId : an id to identify the action
      * @param actionName : its human name
      */
     // ==========================================================================
     void addAction(const Idx actionId, const std::string& actionName) {
       fmdp_->addAction(actionId, actionName);
     }

     // ==========================================================================
     /**
      * Inserts a new variable in the SDyna instance.
      * @warning Without effect until method initialize is called
      * @param var : the var to be added.
      * Note that variable may or may not have all its modalities given.
      * If not they will be discovered by the SDyna architecture during the
      * process
      */
     // ==========================================================================
     void addVariable(const DiscreteVariable* var) { fmdp_->addVariable(var); }

     /// @}


     // ###################################################################
     /// @name Initialization
     // ###################################################################
     /// @{
     public:
     // ==========================================================================
     /**
      * Initializes the Sdyna instance.
      */
     // ==========================================================================
     void initialize();

     // ==========================================================================
     /**
      * Initializes the Sdyna instance at given state.
      * @param initialState : the state of the studied system from which we will
      * begin the explore, learn and exploit process
      */
     // ==========================================================================
     void initialize(const Instantiation& initialState);

     /// @}


     // ###################################################################
     /// @name Incremental methods
     // ###################################################################
     /// @{
     public:
     // ==========================================================================
     /**
      * Sets last state visited to the given state.
      * During the learning process, we will consider that were in this state
      * before the transition.
      * @param currentState : the state
      */
     // ==========================================================================
     void setCurrentState(const Instantiation& currentState) { lastState_ = currentState; }

     // ==========================================================================
     /**
      * @return actionId the id of the action the SDyna instance wish to be
      * performed
      * @param curState the state in which we currently are
      */
     // ==========================================================================
     Idx takeAction(const Instantiation& curState);

     // ==========================================================================
     /**
      * @return the id of the action the SDyna instance wish to be performed
      */
     // ==========================================================================
     Idx takeAction();

     // ==========================================================================
     /**
      * Performs a feedback on the last transition.
      * In extenso, learn from the transition.
      * @param originalState : the state we were in before the transition
      * @param reachedState : the state we reached after
      * @param performedAction : the action we performed
      * @param obtainedReward : the reward we obtained
      */
     // ==========================================================================
     void feedback(const Instantiation& originalState,
                   const Instantiation& reachedState,
                   Idx                  performedAction,
                   double               obtainedReward);

     // ==========================================================================
     /**
      * Performs a feedback on the last transition.
      * In extenso, learn from the transition.
      * @param reachedState : the state reached after the transition
      * @param obtainedReward : the reward obtained during the transition
      * @warning Uses the  _originalState_ and  _performedAction_ stored in cache
      * If you want to specify the original state and the performed action, see
      * below
      */
     // ==========================================================================
     void feedback(const Instantiation& reachedState, double obtainedReward);

     // ==========================================================================
     /**
      * Starts a new planning
      * @param nbStep : the maximal number of value iteration performed in this
      * planning
      */
     // ==========================================================================
     void makePlanning(Idx nbStep);

     /// @}


     public:
     // ==========================================================================
     /**
      * Returns
      * @return a string describing the learned FMDP, and the associated
      * optimal policy.
      * Both in DOT language.
      */
     // ==========================================================================
     std::string toString();

     std::string optimalPolicy2String() { return _planer_->optimalPolicy2String(); }


     // ###################################################################
     /// @name Size methods
     /// @brief just to get the size of the different data structure for
     /// performance evaluation purposes only
     // ###################################################################
     /// @{
     public:
     // ==========================================================================
     /**
      * @brief learnerSize
      * @return
      */
     // ==========================================================================
     Size learnerSize() { return _learner_->size(); }

     // ==========================================================================
     /**
      * @brief modelSize
      * @return
      */
     // ==========================================================================
     Size modelSize() { return fmdp_->size(); }

     // ==========================================================================
     /**
      * @brief valueFunctionSize
      * @return
      */
     // ==========================================================================
     Size valueFunctionSize() { return _planer_->vFunctionSize(); }

     // ==========================================================================
     /**
      * @brief optimalPolicySize
      * @return
      */
     // ==========================================================================
     Size optimalPolicySize() { return _planer_->optimalPolicySize(); }

     /// @}


     protected:
     /// The learnt Markovian Decision Process
     FMDP< double >* fmdp_;

     /// The state in which the system is before we perform a new action
     Instantiation lastState_;

     private:
     /// The learner used to learn the FMDP
     ILearningStrategy* _learner_;

     /// The planer used to plan an optimal strategy
     IPlanningStrategy< double >* _planer_;

     /// The decider
     IDecisionStrategy* _decider_;


     /// The number of observation we make before using again the planer
     Idx _observationPhaseLenght_;

     /// The total number of observation made so far
     Idx _nbObservation_;

     /// The number of Value Iteration step we perform
     Idx _nbValueIterationStep_;

     /// The last performed action
     Idx _lastAction_;

     /// Since SDYNA made these observation, it has to delete them on quitting
     Set< Observation* > _bin_;

     bool _actionReward_;

     bool verbose_;
   };


 } /* namespace gum */


 #endif   // GUM_SDYNA_H
gum::SDYNA::~SDYNA
~SDYNA()
Destructor.
Definition: sdyna.cpp:78

gum::SDYNA::optimalPolicy2String
std::string optimalPolicy2String()
Definition: sdyna.h:363

gum::SDYNA::toString
std::string toString()
Returns.
Definition: sdyna.cpp:230

gum::SDYNA::takeAction
Idx takeAction()
Definition: sdyna.cpp:216

gum::Set::emplace
INLINE void emplace(Args &&... args)
Definition: set_tpl.h:643

gum::SDYNA::setCurrentState
void setCurrentState(const Instantiation &currentState)
Sets last state visited to the given state.
Definition: sdyna.h:294

gum::SDYNA::SDYNA
SDYNA(ILearningStrategy *learner, IPlanningStrategy< double > *planer, IDecisionStrategy *decider, Idx observationPhaseLenght, Idx nbValueIterationStep, bool actionReward, bool verbose=true)
Constructor.
Definition: sdyna.cpp:57

gum::SDYNA::RMaxTreeInstance
static SDYNA * RMaxTreeInstance(double attributeSelectionThreshold=0.99, double discountFactor=0.9, double epsilon=1, Idx observationPhaseLenght=100, Idx nbValueIterationStep=10)
@
Definition: sdyna.h:140

gum::SDYNA::feedback
void feedback(const Instantiation &originalState, const Instantiation &reachedState, Idx performedAction, double obtainedReward)
Performs a feedback on the last transition.
Definition: sdyna.cpp:129

gum::SDYNA::_observationPhaseLenght_
Idx _observationPhaseLenght_
The number of observation we make before using again the planer.
Definition: sdyna.h:427

gum::SDYNA::addVariable
void addVariable(const DiscreteVariable *var)
Inserts a new variable in the SDyna instance.
Definition: sdyna.h:252

gum::SDYNA::RandomTreeInstance
static SDYNA * RandomTreeInstance(double attributeSelectionThreshold=0.99, double discountFactor=0.9, double epsilon=1, Idx observationPhaseLenght=100, Idx nbValueIterationStep=10)
@
Definition: sdyna.h:177

gum::SDYNA::lastState_
Instantiation lastState_
The state in which the system is before we perform a new action.
Definition: sdyna.h:413

gum::SDYNA::fmdp_
FMDP< double > * fmdp_
The learnt Markovian Decision Process.
Definition: sdyna.h:410

gum::SDYNA
The general SDyna architecture abstract class.
Definition: sdyna.h:66

gum::SDYNA::_decider_
IDecisionStrategy * _decider_
The decider.
Definition: sdyna.h:423

gum::SDYNA::_nbObservation_
Idx _nbObservation_
The total number of observation made so far.
Definition: sdyna.h:430

gum::SDYNA::learnerSize
Size learnerSize()
learnerSize
Definition: sdyna.h:379

gum::SDYNA::_bin_
Set< Observation *> _bin_
Since SDYNA made these observation, it has to delete them on quitting.
Definition: sdyna.h:439

gum::SDYNA::valueFunctionSize
Size valueFunctionSize()
valueFunctionSize
Definition: sdyna.h:395

gum::SDYNA::feedback
void feedback(const Instantiation &reachedState, double obtainedReward)
Performs a feedback on the last transition.
Definition: sdyna.cpp:149

gum::SDYNA::addAction
void addAction(const Idx actionId, const std::string &actionName)
Inserts a new action in the SDyna instance.
Definition: sdyna.h:238

gum::SDYNA::_planer_
IPlanningStrategy< double > * _planer_
The planer used to plan an optimal strategy.
Definition: sdyna.h:420

gum::SDYNA::RMaxMDDInstance
static SDYNA * RMaxMDDInstance(double attributeSelectionThreshold=0.99, double similarityThreshold=0.3, double discountFactor=0.9, double epsilon=1, Idx observationPhaseLenght=100, Idx nbValueIterationStep=10)
@
Definition: sdyna.h:119

gum::SDYNA::verbose_
bool verbose_
Definition: sdyna.h:443

gum::SDYNA::_nbValueIterationStep_
Idx _nbValueIterationStep_
The number of Value Iteration step we perform.
Definition: sdyna.h:433

gum::SDYNA::spitiInstance
static SDYNA * spitiInstance(double attributeSelectionThreshold=0.99, double discountFactor=0.9, double epsilon=1, Idx observationPhaseLenght=100, Idx nbValueIterationStep=10)
@
Definition: sdyna.h:75

gum::SDYNA::makePlanning
void makePlanning(Idx nbStep)
Starts a new planning.
Definition: sdyna.cpp:188

gum::SDYNA::takeAction
Idx takeAction(const Instantiation &curState)
Definition: sdyna.cpp:206

gum::SDYNA::_learner_
ILearningStrategy * _learner_
The learner used to learn the FMDP.
Definition: sdyna.h:417

gum::SDYNA::initialize
void initialize()
Initializes the Sdyna instance.
Definition: sdyna.cpp:97

gum::SDYNA::spimddiInstance
static SDYNA * spimddiInstance(double attributeSelectionThreshold=0.99, double similarityThreshold=0.3, double discountFactor=0.9, double epsilon=1, Idx observationPhaseLenght=100, Idx nbValueIterationStep=10)
@
Definition: sdyna.h:93

gum::SDYNA::initialize
void initialize(const Instantiation &initialState)
Initializes the Sdyna instance at given state.
Definition: sdyna.cpp:110

gum::SDYNA::optimalPolicySize
Size optimalPolicySize()
optimalPolicySize
Definition: sdyna.h:403

gum::SDYNA::RandomMDDInstance
static SDYNA * RandomMDDInstance(double attributeSelectionThreshold=0.99, double similarityThreshold=0.3, double discountFactor=0.9, double epsilon=1, Idx observationPhaseLenght=100, Idx nbValueIterationStep=10)
@
Definition: sdyna.h:157

gum::SDYNA::_lastAction_
Idx _lastAction_
The last performed action.
Definition: sdyna.h:436

gum::SDYNA::_actionReward_
bool _actionReward_
Definition: sdyna.h:441

gum::SDYNA::modelSize
Size modelSize()
modelSize
Definition: sdyna.h:387