d9/d40/paramEstimator_8h_source.html

 /**
  *
  *   Copyright 2005-2020 Pierre-Henri WUILLEMIN(@LIP6) & Christophe GONZALES(@AMU)
  *   info_at_agrum_dot_org
  *
  *  This library is free software: you can redistribute it and/or modify
  *  it under the terms of the GNU Lesser General Public License as published by
  *  the Free Software Foundation, either version 3 of the License, or
  *  (at your option) any later version.
  *
  *  This library is distributed in the hope that it will be useful,
  *  but WITHOUT ANY WARRANTY; without even the implied warranty of
  *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  *  GNU Lesser General Public License for more details.
  *
  *  You should have received a copy of the GNU Lesser General Public License
  *  along with this library.  If not, see <http://www.gnu.org/licenses/>.
  *
  */


 /** @file
  * @brief the base class for estimating parameters of CPTs
  *
  * @author Christophe GONZALES(@AMU) and Pierre-Henri WUILLEMIN(@LIP6)
  */
 #ifndef GUM_LEARNING_PARAM_ESTIMATOR_H
 #define GUM_LEARNING_PARAM_ESTIMATOR_H

 #include <type_traits>

 #include <agrum/agrum.h>
 #include <agrum/tools/database/databaseTable.h>
 #include <agrum/BN/learning/aprioris/apriori.h>
 #include <agrum/tools/stattests/recordCounter.h>
 #include <agrum/tools/multidim/potential.h>

 namespace gum {

   namespace learning {


     /** @class ParamEstimator
      * @brief The base class for estimating parameters of CPTs
      * @headerfile paramEstimator.h <agrum/BN/learning/paramUtils/paramEstimator.h>
      * @ingroup learning_param_utils
      */
     template < template < typename > class ALLOC = std::allocator >
     class ParamEstimator {
       public:
       /// type for the allocators passed in arguments of methods
       using allocator_type = ALLOC< NodeId >;

       // ##########################################################################
       /// @name Constructors / Destructors
       // ##########################################################################
       /// @{

       /// default constructor
       /** @param parser the parser used to parse the database
        * @param external_apriori An apriori that we add to the computation
        * of the score
        * @param score_internal_apriori The apriori within the score used
        * to learn the data structure (might be a NoApriori)
        * @param ranges a set of pairs {(X1,Y1),...,(Xn,Yn)} of database's rows
        * indices. The countings are then performed only on the union of the
        * rows [Xi,Yi), i in {1,...,n}. This is useful, e.g, when performing
        * cross validation tasks, in which part of the database should be ignored.
        * An empty set of ranges is equivalent to an interval [X,Y) ranging over
        * the whole database.
        * @param nodeId2Columns a mapping from the ids of the nodes in the
        * graphical model to the corresponding column in the DatabaseTable
        * parsed by the parser. This enables estimating from a database in
        * which variable A corresponds to the 2nd column the parameters of a BN
        * in which variable A has a NodeId of 5. An empty nodeId2Columns
        * bijection means that the mapping is an identity, i.e., the value of a
        * NodeId is equal to the index of the column in the DatabaseTable.
        * @param alloc the allocator used to allocate the structures within the
        * Score.
        * @warning If nodeId2columns is not empty, then only the scores over the
        * ids belonging to this bijection can be computed: applying method
        * score() over other ids will raise exception NotFound. */
       ParamEstimator(
          const DBRowGeneratorParser< ALLOC >& parser,
          const Apriori< ALLOC >&              external_apriori,
          const Apriori< ALLOC >&              score_internal__apriori,
          const std::vector< std::pair< std::size_t, std::size_t >,
                             ALLOC< std::pair< std::size_t, std::size_t > > >&
             ranges,
          const Bijection< NodeId, std::size_t, ALLOC< std::size_t > >&
             nodeId2columns
          = Bijection< NodeId, std::size_t, ALLOC< std::size_t > >(),
          const allocator_type& alloc = allocator_type());

       /// default constructor
       /** @param parser the parser used to parse the database
        * @param external_apriori An apriori that we add to the computation
        * of the score
        * @param score_internal_apriori The apriori within the score used
        * to learn the data structure (might be a NoApriori)
        * @param nodeId2Columns a mapping from the ids of the nodes in the
        * graphical model to the corresponding column in the DatabaseTable
        * parsed by the parser. This enables estimating from a database in
        * which variable A corresponds to the 2nd column the parameters of a BN
        * in which variable A has a NodeId of 5. An empty nodeId2Columns
        * bijection means that the mapping is an identity, i.e., the value of a
        * NodeId is equal to the index of the column in the DatabaseTable.
        * @param alloc the allocator used to allocate the structures within the
        * Score.
        * @warning If nodeId2columns is not empty, then only the scores over the
        * ids belonging to this bijection can be computed: applying method
        * score() over other ids will raise exception NotFound. */
       ParamEstimator(const DBRowGeneratorParser< ALLOC >& parser,
                      const Apriori< ALLOC >&              external_apriori,
                      const Apriori< ALLOC >&              score_internal__apriori,
                      const Bijection< NodeId, std::size_t, ALLOC< std::size_t > >&
                         nodeId2columns
                      = Bijection< NodeId, std::size_t, ALLOC< std::size_t > >(),
                      const allocator_type& alloc = allocator_type());

       /// copy constructor
       ParamEstimator(const ParamEstimator< ALLOC >& from);

       /// copy constructor with a given allocator
       ParamEstimator(const ParamEstimator< ALLOC >& from,
                      const allocator_type&          alloc);

       /// move constructor
       ParamEstimator(ParamEstimator< ALLOC >&& from);

       /// move constructor with a given allocator
       ParamEstimator(ParamEstimator< ALLOC >&& from, const allocator_type& alloc);

       /// virtual copy constructor
       virtual ParamEstimator< ALLOC >* clone() const = 0;

       /// virtual copy constructor with a given allocator
       virtual ParamEstimator< ALLOC >*
          clone(const allocator_type& alloc) const = 0;

       /// destructor
       virtual ~ParamEstimator();

       /// @}


       // ##########################################################################
       /// @name Accessors / Modifiers
       // ##########################################################################
       /// @{

       /// clears all the data structures from memory
       virtual void clear();

       /// changes the max number of threads used to parse the database
       virtual void setMaxNbThreads(std::size_t nb) const;

       /// returns the number of threads used to parse the database
       virtual std::size_t nbThreads() const;

       /** @brief changes the number min of rows a thread should process in a
        * multithreading context
        *
        * When computing score, several threads are used by record counters to
        * perform countings on the rows of the database, the MinNbRowsPerThread
        * method indicates how many rows each thread should at least process.
        * This is used to compute the number of threads actually run. This number
        * is equal to the min between the max number of threads allowed and the
        * number of records in the database divided by nb. */
       virtual void setMinNbRowsPerThread(const std::size_t nb) const;

       /// returns the minimum of rows that each thread should process
       virtual std::size_t minNbRowsPerThread() const;

       /// sets new ranges to perform the countings used by the parameter estimator
       /** @param ranges a set of pairs {(X1,Y1),...,(Xn,Yn)} of database's rows
        * indices. The countings are then performed only on the union of the
        * rows [Xi,Yi), i in {1,...,n}. This is useful, e.g, when performing
        * cross validation tasks, in which part of the database should be ignored.
        * An empty set of ranges is equivalent to an interval [X,Y) ranging over
        * the whole database. */
       template < template < typename > class XALLOC >
       void setRanges(
          const std::vector< std::pair< std::size_t, std::size_t >,
                             XALLOC< std::pair< std::size_t, std::size_t > > >&
             new_ranges);

       /// reset the ranges to the one range corresponding to the whole database
       void clearRanges();

       /// returns the current ranges
       const std::vector< std::pair< std::size_t, std::size_t >,
                          ALLOC< std::pair< std::size_t, std::size_t > > >&
          ranges() const;

       /// returns the CPT's parameters corresponding to a given target node
       std::vector< double, ALLOC< double > > parameters(const NodeId target_node);

       /// returns the CPT's parameters corresponding to a given nodeset
       /** The vector contains the parameters of an n-dimensional CPT. The
        * distribution of the dimensions of the CPT within the vector is as
        * follows:
        * first, there is the target node, then the conditioning nodes (in the
        * order in which they were specified). */
       virtual std::vector< double, ALLOC< double > > parameters(
          const NodeId                                  target_node,
          const std::vector< NodeId, ALLOC< NodeId > >& conditioning_nodes)
          = 0;

       /// sets the CPT's parameters corresponding to a given Potential
       /** The potential is assumed to be a conditional probability, the first
        * variable of its variablesSequence() being the target variable, the
        * other ones being on the right side of the conditioning bar. */
       template < typename GUM_SCALAR >
       void setParameters(
          const NodeId                                  target_node,
          const std::vector< NodeId, ALLOC< NodeId > >& conditioning_nodes,
          Potential< GUM_SCALAR >&                      pot);

       /// returns the mapping from ids to column positions in the database
       /** @warning An empty nodeId2Columns bijection means that the mapping is
        * an identity, i.e., the value of a NodeId is equal to the index of the
        * column in the DatabaseTable. */
       const Bijection< NodeId, std::size_t, ALLOC< std::size_t > >&
          nodeId2Columns() const;

       /// returns the database on which we perform the counts
       const DatabaseTable< ALLOC >& database() const;

       /// assign a new Bayes net to all the counter's generators depending on a BN
       /** Typically, generators based on EM or K-means depend on a model to
        * compute correctly their outputs. Method setBayesNet enables to
        * update their BN model. */
       template < typename GUM_SCALAR >
       void setBayesNet(const BayesNet< GUM_SCALAR >& new_bn);

       /// returns the allocator used by the score
       allocator_type getAllocator() const;

       /// @}

       protected:
       /// an external a priori
       Apriori< ALLOC >* external_apriori_{nullptr};

       /** @brief if a score was used for learning the structure of the PGM, this
        * is the a priori internal to the score */
       Apriori< ALLOC >* score_internal_apriori_{nullptr};

       /// the record counter used to parse the database
       RecordCounter< ALLOC > counter_;

       /// an empty vector of nodes, used for empty conditioning
       const std::vector< NodeId, ALLOC< NodeId > > empty_nodevect_;


       /// copy operator
       ParamEstimator< ALLOC >& operator=(const ParamEstimator< ALLOC >& from);

       /// move operator
       ParamEstimator< ALLOC >& operator=(ParamEstimator< ALLOC >&& from);

       private:
 #ifndef DOXYGEN_SHOULD_SKIP_THIS

       /** @brief check the coherency between the parameters passed to
        * the setParameters functions */
       template < typename GUM_SCALAR >
       void checkParameters__(
          const NodeId                                  target_node,
          const std::vector< NodeId, ALLOC< NodeId > >& conditioning_nodes,
          Potential< GUM_SCALAR >&                      pot);

       // sets the CPT's parameters corresponding to a given Potential
       // when the potential belongs to a BayesNet<GUM_SCALAR> when
       // GUM_SCALAR is different from a double
       template < typename GUM_SCALAR >
       typename std::enable_if< !std::is_same< GUM_SCALAR, double >::value,
                                void >::type
          setParameters__(
             const NodeId                                  target_node,
             const std::vector< NodeId, ALLOC< NodeId > >& conditioning_nodes,
             Potential< GUM_SCALAR >&                      pot);

       // sets the CPT's parameters corresponding to a given Potential
       // when the potential belongs to a BayesNet<GUM_SCALAR> when
       // GUM_SCALAR is equal to double (the code is optimized for doubles)
       template < typename GUM_SCALAR >
       typename std::enable_if< std::is_same< GUM_SCALAR, double >::value,
                                void >::type
          setParameters__(
             const NodeId                                  target_node,
             const std::vector< NodeId, ALLOC< NodeId > >& conditioning_nodes,
             Potential< GUM_SCALAR >&                      pot);

 #endif /* DOXYGEN_SHOULD_SKIP_THIS */
     };

   } /* namespace learning */

 } /* namespace gum */

 /// include the template implementation
 #include <agrum/BN/learning/paramUtils/paramEstimator_tpl.h>

 #endif /* GUM_LEARNING_PARAM_ESTIMATOR_H */
gum::learning::ParamEstimator::operator=
ParamEstimator< ALLOC > & operator=(const ParamEstimator< ALLOC > &from)
copy operator

gum::learning::ParamEstimator::parameters
virtual std::vector< double, ALLOC< double > > parameters(const NodeId target_node, const std::vector< NodeId, ALLOC< NodeId > > &conditioning_nodes)=0
returns the CPT&#39;s parameters corresponding to a given nodeset

gum::learning::ParamEstimator::nodeId2Columns
const Bijection< NodeId, std::size_t, ALLOC< std::size_t > > & nodeId2Columns() const
returns the mapping from ids to column positions in the database

gum::learning::ParamEstimator::setParameters
void setParameters(const NodeId target_node, const std::vector< NodeId, ALLOC< NodeId > > &conditioning_nodes, Potential< GUM_SCALAR > &pot)
sets the CPT&#39;s parameters corresponding to a given Potential

gum::learning::ParamEstimator::ParamEstimator
ParamEstimator(const ParamEstimator< ALLOC > &from, const allocator_type &alloc)
copy constructor with a given allocator

gum::learning::ParamEstimator::ParamEstimator
ParamEstimator(const ParamEstimator< ALLOC > &from)
copy constructor

gum::learning::ParamEstimator::clearRanges
void clearRanges()
reset the ranges to the one range corresponding to the whole database

gum::learning::ParamEstimator::ParamEstimator
ParamEstimator(const DBRowGeneratorParser< ALLOC > &parser, const Apriori< ALLOC > &external_apriori, const Apriori< ALLOC > &score_internal__apriori, const std::vector< std::pair< std::size_t, std::size_t >, ALLOC< std::pair< std::size_t, std::size_t > > > &ranges, const Bijection< NodeId, std::size_t, ALLOC< std::size_t > > &nodeId2columns=Bijection< NodeId, std::size_t, ALLOC< std::size_t > >(), const allocator_type &alloc=allocator_type())
default constructor

gum::learning::ParamEstimator::setRanges
void setRanges(const std::vector< std::pair< std::size_t, std::size_t >, XALLOC< std::pair< std::size_t, std::size_t > > > &new_ranges)
sets new ranges to perform the countings used by the parameter estimator

gum::Set::emplace
INLINE void emplace(Args &&... args)
Definition: set_tpl.h:669

gum::learning::ParamEstimator::operator=
ParamEstimator< ALLOC > & operator=(ParamEstimator< ALLOC > &&from)
move operator

gum::learning::ParamEstimator::ParamEstimator
ParamEstimator(ParamEstimator< ALLOC > &&from)
move constructor

gum::learning::ParamEstimator::setBayesNet
void setBayesNet(const BayesNet< GUM_SCALAR > &new_bn)
assign a new Bayes net to all the counter&#39;s generators depending on a BN

gum::learning::ParamEstimator::ParamEstimator
ParamEstimator(const DBRowGeneratorParser< ALLOC > &parser, const Apriori< ALLOC > &external_apriori, const Apriori< ALLOC > &score_internal__apriori, const Bijection< NodeId, std::size_t, ALLOC< std::size_t > > &nodeId2columns=Bijection< NodeId, std::size_t, ALLOC< std::size_t > >(), const allocator_type &alloc=allocator_type())
default constructor

gum::learning::ParamEstimator::score_internal_apriori_
Apriori< ALLOC > * score_internal_apriori_
if a score was used for learning the structure of the PGM, this is the a priori internal to the score...
Definition: paramEstimator.h:248

gum::learning::ParamEstimator::getAllocator
allocator_type getAllocator() const
returns the allocator used by the score

gum::learning::ParamEstimator::setMinNbRowsPerThread
virtual void setMinNbRowsPerThread(const std::size_t nb) const
changes the number min of rows a thread should process in a multithreading context ...

gum::learning::ParamEstimator::clone
virtual ParamEstimator< ALLOC > * clone(const allocator_type &alloc) const =0
virtual copy constructor with a given allocator

gum::learning::ParamEstimator::~ParamEstimator
virtual ~ParamEstimator()
destructor

gum::learning::ParamEstimator::database
const DatabaseTable< ALLOC > & database() const
returns the database on which we perform the counts

gum::learning::ParamEstimator::parameters
std::vector< double, ALLOC< double > > parameters(const NodeId target_node)
returns the CPT&#39;s parameters corresponding to a given target node

gum::learning::ParamEstimator::clear
virtual void clear()
clears all the data structures from memory

gum::learning::ParamEstimator::nbThreads
virtual std::size_t nbThreads() const
returns the number of threads used to parse the database

gum::learning::ParamEstimator::clone
virtual ParamEstimator< ALLOC > * clone() const =0
virtual copy constructor

gum::learning::genericBNLearner::Database::Database
Database(const std::string &filename, const BayesNet< GUM_SCALAR > &bn, const std::vector< std::string > &missing_symbols)
Definition: genericBNLearner_tpl.h:31

gum::learning::ParamEstimator::setMaxNbThreads
virtual void setMaxNbThreads(std::size_t nb) const
changes the max number of threads used to parse the database

gum::learning::ParamEstimator::minNbRowsPerThread
virtual std::size_t minNbRowsPerThread() const
returns the minimum of rows that each thread should process

gum::learning::ParamEstimator::counter_
RecordCounter< ALLOC > counter_
the record counter used to parse the database
Definition: paramEstimator.h:251

gum::learning::ParamEstimator::external_apriori_
Apriori< ALLOC > * external_apriori_
an external a priori
Definition: paramEstimator.h:244

gum::learning::ParamEstimator::ParamEstimator
ParamEstimator(ParamEstimator< ALLOC > &&from, const allocator_type &alloc)
move constructor with a given allocator