dd/d11/correctedMutualInformation__tpl_8h_source.html

 #ifndef DOXYGEN_SHOULD_SKIP_THIS

 namespace gum {

   namespace learning {

     template < template < typename > class ALLOC >
     typename CorrectedMutualInformation< ALLOC >::allocator_type
        CorrectedMutualInformation< ALLOC >::getAllocator() const {
       return __NH.getAllocator();
     }


     template < template < typename > class ALLOC >
     CorrectedMutualInformation< ALLOC >::CorrectedMutualInformation(
        const DBRowGeneratorParser< ALLOC >& parser,
        const Apriori< ALLOC >&              apriori,
        const std::vector< std::pair< std::size_t, std::size_t >,
                           ALLOC< std::pair< std::size_t, std::size_t > > >& ranges,
        const Bijection< NodeId, std::size_t, ALLOC< std::size_t > >&
                                                                            nodeId2columns,
        const typename CorrectedMutualInformation< ALLOC >::allocator_type& alloc) :
         __NH(parser, apriori, ranges, nodeId2columns, alloc),
         __k_NML(parser, apriori, ranges, nodeId2columns, alloc),
         __score_MDL(parser, apriori, ranges, nodeId2columns, alloc),
         __ICache(alloc), __KCache(alloc) {
       GUM_CONSTRUCTOR(CorrectedMutualInformation);
     }


     template < template < typename > class ALLOC >
     CorrectedMutualInformation< ALLOC >::CorrectedMutualInformation(
        const DBRowGeneratorParser< ALLOC >& parser,
        const Apriori< ALLOC >&              apriori,
        const Bijection< NodeId, std::size_t, ALLOC< std::size_t > >&
                                                                            nodeId2columns,
        const typename CorrectedMutualInformation< ALLOC >::allocator_type& alloc) :
         __NH(parser, apriori, nodeId2columns, alloc),
         __k_NML(parser, apriori, nodeId2columns, alloc),
         __score_MDL(parser, apriori, nodeId2columns, alloc), __ICache(alloc),
         __KCache(alloc) {
       GUM_CONSTRUCTOR(CorrectedMutualInformation);
     }


     template < template < typename > class ALLOC >
     CorrectedMutualInformation< ALLOC >::CorrectedMutualInformation(
        const CorrectedMutualInformation< ALLOC >&                          from,
        const typename CorrectedMutualInformation< ALLOC >::allocator_type& alloc) :
         __NH(from.__NH, alloc),
         __k_NML(from.__k_NML, alloc), __score_MDL(from.__score_MDL, alloc),
         __kmode(from.__kmode), __use_ICache(from.__use_ICache),
         __use_HCache(from.__use_HCache), __use_KCache(from.__use_KCache),
         __use_CnrCache(from.__use_CnrCache), __ICache(from.__ICache, alloc),
         __KCache(from.__KCache, alloc) {
       GUM_CONS_CPY(CorrectedMutualInformation);
     }


     template < template < typename > class ALLOC >
     CorrectedMutualInformation< ALLOC >::CorrectedMutualInformation(
        const CorrectedMutualInformation< ALLOC >& from) :
         CorrectedMutualInformation(from, from.getAllocator()) {}


     template < template < typename > class ALLOC >
     CorrectedMutualInformation< ALLOC >::CorrectedMutualInformation(
        CorrectedMutualInformation< ALLOC >&&                               from,
        const typename CorrectedMutualInformation< ALLOC >::allocator_type& alloc) :
         __NH(std::move(from.__NH), alloc),
         __k_NML(std::move(from.__k_NML), alloc),
         __score_MDL(std::move(from.__score_MDL), alloc), __kmode(from.__kmode),
         __use_ICache(from.__use_ICache), __use_HCache(from.__use_HCache),
         __use_KCache(from.__use_KCache), __use_CnrCache(from.__use_CnrCache),
         __ICache(std::move(from.__ICache), alloc),
         __KCache(std::move(from.__KCache), alloc) {
       GUM_CONS_MOV(CorrectedMutualInformation);
     }


     template < template < typename > class ALLOC >
     CorrectedMutualInformation< ALLOC >::CorrectedMutualInformation(
        CorrectedMutualInformation< ALLOC >&& from) :
         CorrectedMutualInformation(std::move(from), from.getAllocator()) {}


     template < template < typename > class ALLOC >
     CorrectedMutualInformation< ALLOC >*
        CorrectedMutualInformation< ALLOC >::clone(
           const typename CorrectedMutualInformation< ALLOC >::allocator_type&
              alloc) const {
       ALLOC< CorrectedMutualInformation< ALLOC > > allocator(alloc);
       CorrectedMutualInformation< ALLOC >* new_score = allocator.allocate(1);
       try {
         allocator.construct(new_score, *this, alloc);
       } catch (...) {
         allocator.deallocate(new_score, 1);
         throw;
       }

       return new_score;
     }


     template < template < typename > class ALLOC >
     CorrectedMutualInformation< ALLOC >*
        CorrectedMutualInformation< ALLOC >::clone() const {
       return clone(this->getAllocator());
     }


     template < template < typename > class ALLOC >
     CorrectedMutualInformation< ALLOC >::~CorrectedMutualInformation() {
       // for debugging purposes
       GUM_DESTRUCTOR(CorrectedMutualInformation);
     }


     template < template < typename > class ALLOC >
     CorrectedMutualInformation< ALLOC >& CorrectedMutualInformation< ALLOC >::
                                          operator=(const CorrectedMutualInformation< ALLOC >& from) {
       if (this != &from) {
         __NH = from.__NH;
         __k_NML = from.__k_NML;
         __score_MDL = from.__score_MDL;
         __kmode = from.__kmode;
         __use_ICache = from.__use_ICache;
         __use_HCache = from.__use_HCache;
         __use_KCache = from.__use_KCache;
         __use_CnrCache = from.__use_CnrCache;
         __ICache = from.__ICache;
         __KCache = from.__KCache;
       }
       return *this;
     }


     template < template < typename > class ALLOC >
     CorrectedMutualInformation< ALLOC >& CorrectedMutualInformation< ALLOC >::
                                          operator=(CorrectedMutualInformation< ALLOC >&& from) {
       if (this != &from) {
         __NH = std::move(from.__NH);
         __k_NML = std::move(from.__k_NML);
         __score_MDL = std::move(from.__score_MDL);
         __kmode = from.__kmode;
         __use_ICache = from.__use_ICache;
         __use_HCache = from.__use_HCache;
         __use_KCache = from.__use_KCache;
         __use_CnrCache = from.__use_CnrCache;
         __ICache = std::move(from.__ICache);
         __KCache = std::move(from.__KCache);
       }
       return *this;
     }


     template < template < typename > class ALLOC >
     INLINE void CorrectedMutualInformation< ALLOC >::useCache(bool on_off) {
       useICache(on_off);
       useHCache(on_off);
       useKCache(on_off);
       useCnrCache(on_off);
     }


     template < template < typename > class ALLOC >
     INLINE void CorrectedMutualInformation< ALLOC >::useICache(bool on_off) {
       if (!on_off) __ICache.clear();
       __use_ICache = on_off;
     }


     template < template < typename > class ALLOC >
     INLINE void CorrectedMutualInformation< ALLOC >::useHCache(bool on_off) {
       if (!on_off) __NH.clearCache();
       __use_HCache = on_off;
       __NH.useCache(on_off);
     }


     template < template < typename > class ALLOC >
     INLINE void CorrectedMutualInformation< ALLOC >::useKCache(bool on_off) {
       if (!on_off) __KCache.clear();
       __use_KCache = on_off;
     }


     template < template < typename > class ALLOC >
     INLINE void CorrectedMutualInformation< ALLOC >::useCnrCache(bool on_off) {
       if (!on_off) __k_NML.clearCache();
       __use_CnrCache = on_off;
       __k_NML.useCache(on_off);
     }


     template < template < typename > class ALLOC >
     INLINE void CorrectedMutualInformation< ALLOC >::clear() {
       __NH.clear();
       __k_NML.clear();
       __score_MDL.clear();
       clearCache();
     }


     template < template < typename > class ALLOC >
     INLINE void CorrectedMutualInformation< ALLOC >::clearCache() {
       __NH.clearCache();
       __k_NML.clearCache();
       __ICache.clear();
       __KCache.clear();
     }


     template < template < typename > class ALLOC >
     INLINE void CorrectedMutualInformation< ALLOC >::clearICache() {
       __ICache.clear();
     }


     template < template < typename > class ALLOC >
     INLINE void CorrectedMutualInformation< ALLOC >::clearHCache() {
       __NH.clearCache();
     }


     template < template < typename > class ALLOC >
     INLINE void CorrectedMutualInformation< ALLOC >::clearKCache() {
       __KCache.clear();
     }


     template < template < typename > class ALLOC >
     INLINE void CorrectedMutualInformation< ALLOC >::clearCnrCache() {
       __k_NML.clearCache();
     }


     template < template < typename > class ALLOC >
     void
        CorrectedMutualInformation< ALLOC >::setMaxNbThreads(std::size_t nb) const {
       __NH.setMaxNbThreads(nb);
       __k_NML.setMaxNbThreads(nb);
       __score_MDL.setMaxNbThreads(nb);
     }


     template < template < typename > class ALLOC >
     std::size_t CorrectedMutualInformation< ALLOC >::nbThreads() const {
       return __NH.nbThreads();
     }


     template < template < typename > class ALLOC >
     void CorrectedMutualInformation< ALLOC >::setMinNbRowsPerThread(
        const std::size_t nb) const {
       __NH.setMinNbRowsPerThread(nb);
       __k_NML.setMinNbRowsPerThread(nb);
       __score_MDL.setMinNbRowsPerThread(nb);
     }


     template < template < typename > class ALLOC >
     INLINE std::size_t
            CorrectedMutualInformation< ALLOC >::minNbRowsPerThread() const {
       return __NH.minNbRowsPerThread();
     }


     template < template < typename > class ALLOC >
     template < template < typename > class XALLOC >
     void CorrectedMutualInformation< ALLOC >::setRanges(
        const std::vector< std::pair< std::size_t, std::size_t >,
                           XALLOC< std::pair< std::size_t, std::size_t > > >&
           new_ranges) {
       std::vector< std::pair< std::size_t, std::size_t >,
                    ALLOC< std::pair< std::size_t, std::size_t > > >
          old_ranges = ranges();

       __NH.setRanges(new_ranges);
       __k_NML.setRanges(new_ranges);
       __score_MDL.setRanges(new_ranges);

       if (old_ranges != ranges()) clear();
     }


     template < template < typename > class ALLOC >
     void CorrectedMutualInformation< ALLOC >::clearRanges() {
       std::vector< std::pair< std::size_t, std::size_t >,
                    ALLOC< std::pair< std::size_t, std::size_t > > >
          old_ranges = ranges();
       __NH.clearRanges();
       __k_NML.clearRanges();
       __score_MDL.clearRanges();
       if (old_ranges != ranges()) clear();
     }


     template < template < typename > class ALLOC >
     INLINE const std::vector< std::pair< std::size_t, std::size_t >,
                               ALLOC< std::pair< std::size_t, std::size_t > > >&
                  CorrectedMutualInformation< ALLOC >::ranges() const {
       return __NH.ranges();
     }


     template < template < typename > class ALLOC >
     void CorrectedMutualInformation< ALLOC >::useMDL() {
       clearCache();
       __kmode = KModeTypes::MDL;
     }


     template < template < typename > class ALLOC >
     void CorrectedMutualInformation< ALLOC >::useNML() {
       clearCache();
       __kmode = KModeTypes::NML;
     }


     template < template < typename > class ALLOC >
     void CorrectedMutualInformation< ALLOC >::useNoCorr() {
       clearCache();
       __kmode = KModeTypes::NoCorr;
     }


     template < template < typename > class ALLOC >
     INLINE double CorrectedMutualInformation< ALLOC >::score(NodeId var1,
                                                              NodeId var2) {
       return score(var1, var2, __empty_conditioning_set);
     }


     template < template < typename > class ALLOC >
     INLINE double CorrectedMutualInformation< ALLOC >::score(
        NodeId                                        var1,
        NodeId                                        var2,
        const std::vector< NodeId, ALLOC< NodeId > >& conditioning_ids) {
       return __NI_score(var1, var2, conditioning_ids)
              - __K_score(var1, var2, conditioning_ids);
     }


     template < template < typename > class ALLOC >
     INLINE double CorrectedMutualInformation< ALLOC >::score(NodeId var1,
                                                              NodeId var2,
                                                              NodeId var3) {
       return score(var1, var2, var3, __empty_conditioning_set);
     }


     template < template < typename > class ALLOC >
     INLINE double CorrectedMutualInformation< ALLOC >::score(
        NodeId                                        var1,
        NodeId                                        var2,
        NodeId                                        var3,
        const std::vector< NodeId, ALLOC< NodeId > >& conditioning_ids) {
       return __NI_score(var1, var2, var3, conditioning_ids)
              + __K_score(var1, var2, var3, conditioning_ids);
     }


     template < template < typename > class ALLOC >
     double CorrectedMutualInformation< ALLOC >::__NI_score(
        NodeId                                        var_x,
        NodeId                                        var_y,
        const std::vector< NodeId, ALLOC< NodeId > >& vars_z) {
       /*
        * We have a few partial entropies to compute in order to have the
        * 2-point mutual information:
        * I(x;y) = H(x) + H(y) - H(x,y)
        * correspondingly
        * I(x;y) = Hx + Hy - Hxy
        * or
        * I(x;y|z) = H(x,z) + H(y,z) - H(z) - H(x,y,z)
        * correspondingly
        * I(x;y|z) = Hxz + Hyz - Hz - Hxyz
        * Note that Entropy H is equal to 1/N times the log2Likelihood,
        * where N is the size of the database.
        * Remember that we return N times I(x;y|z)
        */

       // if the score has already been computed, get its value
       const IdSet< ALLOC > idset_xyz(var_x, var_y, vars_z, false, false);
       if (__use_ICache) {
         try {
           return __ICache.score(idset_xyz);
         } catch (const NotFound&) {}
       }

       // compute the score

       // here, we distinguish nodesets with conditioning nodes from those
       // without conditioning nodes
       double score;
       if (!vars_z.empty()) {
         std::vector< NodeId, ALLOC< NodeId > > vars(vars_z);
         // std::sort(vars.begin(), vars.end());
         vars.push_back(var_x);
         vars.push_back(var_y);
         const double NHxyz = -__NH.score(IdSet< ALLOC >(vars, false, true));

         vars.pop_back();
         const double NHxz = -__NH.score(IdSet< ALLOC >(vars, false, true));

         vars.pop_back();
         vars.push_back(var_y);
         const double NHyz = -__NH.score(IdSet< ALLOC >(vars, false, true));

         vars.pop_back();
         const double NHz = -__NH.score(IdSet< ALLOC >(vars, false, true));

         const double NHxz_NHyz = NHxz + NHyz;
         double       NHz_NHxyz = NHz + NHxyz;

         // avoid numeric instability due to rounding errors
         double ratio = 1;
         if (NHxz_NHyz > 0) {
           ratio = (NHxz_NHyz - NHz_NHxyz) / NHxz_NHyz;
         } else if (NHz_NHxyz > 0) {
           ratio = (NHxz_NHyz - NHz_NHxyz) / NHz_NHxyz;
         }
         if (ratio < 0) ratio = -ratio;
         if (ratio < __threshold) {
           NHz_NHxyz = NHxz_NHyz;   // ensure that the score is equal to 0
         }

         score = NHxz_NHyz - NHz_NHxyz;
       } else {
         const double NHxy = -__NH.score(
            IdSet< ALLOC >(var_x, var_y, __empty_conditioning_set, true, false));
         const double NHx = -__NH.score(var_x);
         const double NHy = -__NH.score(var_y);

         double NHx_NHy = NHx + NHy;

         // avoid numeric instability due to rounding errors
         double ratio = 1;
         if (NHx_NHy > 0) {
           ratio = (NHx_NHy - NHxy) / NHx_NHy;
         } else if (NHxy > 0) {
           ratio = (NHx_NHy - NHxy) / NHxy;
         }
         if (ratio < 0) ratio = -ratio;
         if (ratio < __threshold) {
           NHx_NHy = NHxy;   // ensure that the score is equal to 0
         }

         score = NHx_NHy - NHxy;
       }


       // shall we put the score into the cache?
       if (__use_ICache) { __ICache.insert(idset_xyz, score); }

       return score;
     }


     template < template < typename > class ALLOC >
     INLINE double CorrectedMutualInformation< ALLOC >::__NI_score(
        NodeId                                        var_x,
        NodeId                                        var_y,
        NodeId                                        var_z,
        const std::vector< NodeId, ALLOC< NodeId > >& ui_ids) {
       // conditional 3-point mutual information formula:
       // I(x;y;z|{ui}) = I(x;y|{ui}) - I(x;y|z,{ui})
       std::vector< NodeId, ALLOC< NodeId > > uiz_ids = ui_ids;
       uiz_ids.push_back(var_z);
       return __NI_score(var_x, var_y, ui_ids) - __NI_score(var_x, var_y, uiz_ids);
     }


     template < template < typename > class ALLOC >
     double CorrectedMutualInformation< ALLOC >::__K_score(
        NodeId                                        var1,
        NodeId                                        var2,
        const std::vector< NodeId, ALLOC< NodeId > >& conditioning_ids) {
       // if no penalty, return 0
       if (__kmode == KModeTypes::NoCorr) return 0.0;


       // If using the K cache, verify whether the set isn't already known
       IdSet< ALLOC > idset;
       if (__use_KCache) {
         idset = std::move(IdSet< ALLOC >(var1, var2, conditioning_ids, false));
         try {
           return __KCache.score(idset);
         } catch (const NotFound&) {}
       }

       // compute the score
       double score;
       size_t rx, ry, rui;
       switch (__kmode) {
         case KModeTypes::MDL: {
           const auto& database = __NH.database();
           const auto& node2cols = __NH.nodeId2Columns();

           rui = 1;
           if (!node2cols.empty()) {
             rx = database.domainSize(node2cols.second(var1));
             ry = database.domainSize(node2cols.second(var2));
             for (const NodeId i : conditioning_ids) {
               rui *= database.domainSize(node2cols.second(i));
             }
           } else {
             rx = database.domainSize(var1);
             ry = database.domainSize(var2);
             for (const NodeId i : conditioning_ids) {
               rui *= database.domainSize(i);
             }
           }

           // compute the size of the database, including the a priori
           if (!__use_KCache) {
             idset = std::move(IdSet< ALLOC >(var1, var2, conditioning_ids, false));
           }
           const double N = __score_MDL.N(idset);

           score = 0.5 * (rx - 1) * (ry - 1) * rui * std::log2(N);
         } break;

         case KModeTypes::NML:
           score = __k_NML.score(var1, var2, conditioning_ids);
           break;

         default:
           GUM_ERROR(NotImplementedYet,
                     "CorrectedMutualInformation mode does "
                     "not support yet this correction");
       }

       // shall we put the score into the cache?
       if (__use_KCache) { __KCache.insert(idset, score); }
       return score;
     }


     template < template < typename > class ALLOC >
     INLINE double CorrectedMutualInformation< ALLOC >::__K_score(
        NodeId                                        var1,
        NodeId                                        var2,
        NodeId                                        var3,
        const std::vector< NodeId, ALLOC< NodeId > >& ui_ids) {
       // k(x;y;z|ui) = k(x;y|ui,z) - k(x;y|ui)
       std::vector< NodeId, ALLOC< NodeId > > uiz_ids = ui_ids;
       uiz_ids.push_back(var3);
       return __K_score(var1, var2, uiz_ids) - __K_score(var1, var2, ui_ids);
     }


   } /* namespace learning */

 } /* namespace gum */

 #endif /* DOXYGEN_SHOULD_SKIP_THIS */
gum::learning::CorrectedMutualInformation::useNML
void useNML()
use the kNML penalty function

gum::learning::CorrectedMutualInformation::clearRanges
void clearRanges()
reset the ranges to the one range corresponding to the whole database

gum::learning::CorrectedMutualInformation::KModeTypes::NoCorr

gum::learning::CorrectedMutualInformation::useCnrCache
void useCnrCache(bool on_off)
turn on/off the use of the CnrCache (the cache for the Cnr formula)

gum::learning::CorrectedMutualInformation::useCache
virtual void useCache(bool on_off)
turn on/off the use of all the caches

gum::learning::CorrectedMutualInformation::useHCache
void useHCache(bool on_off)
turn on/off the use of the HCache (the cache for the entropies)

gum::learning::CorrectedMutualInformation::~CorrectedMutualInformation
virtual ~CorrectedMutualInformation()
destructor

gum::learning::CorrectedMutualInformation::clear
virtual void clear()
clears all the data structures from memory

gum::learning::CorrectedMutualInformation::clearICache
void clearICache()
clears the ICache (the mutual information cache)

gum::learning::CorrectedMutualInformation::operator=
CorrectedMutualInformation< ALLOC > & operator=(const CorrectedMutualInformation< ALLOC > &from)
copy operator

gum::learning::CorrectedMutualInformation::allocator_type
ALLOC< NodeId > allocator_type
type for the allocators passed in arguments of methods
Definition: correctedMutualInformation.h:59

gum::learning::CorrectedMutualInformation::getAllocator
allocator_type getAllocator() const
returns the allocator used by the score

std
STL namespace.

gum::learning::CorrectedMutualInformation::useKCache
void useKCache(bool on_off)
turn on/off the use of the KCache (the cache for the penalties)

gum::learning::CorrectedMutualInformation::clone
virtual CorrectedMutualInformation< ALLOC > * clone() const
virtual copy constructor

gum::learning::CorrectedMutualInformation::clearCache
virtual void clearCache()
clears all the current caches

gum
Copyright 2005-2019 Pierre-Henri WUILLEMIN et Christophe GONZALES (LIP6) {prenom.nom}_at_lip6.fr.
Definition: agrum.h:25

gum::learning::CorrectedMutualInformation::minNbRowsPerThread
virtual std::size_t minNbRowsPerThread() const
returns the minimum of rows that each thread should process

gum::learning::CorrectedMutualInformation::ranges
const std::vector< std::pair< std::size_t, std::size_t >, ALLOC< std::pair< std::size_t, std::size_t > > > & ranges() const
returns the current ranges

gum::learning::CorrectedMutualInformation::score
double score(NodeId var1, NodeId var2)
returns the 2-point mutual information corresponding to a given nodeset

gum::learning::CorrectedMutualInformation::KModeTypes::MDL

gum::learning::CorrectedMutualInformation::setRanges
void setRanges(const std::vector< std::pair< std::size_t, std::size_t >, XALLOC< std::pair< std::size_t, std::size_t > > > &new_ranges)
sets new ranges to perform the countings used by the mutual information

gum::learning::CorrectedMutualInformation::KModeTypes::NML

gum::learning::CorrectedMutualInformation::CorrectedMutualInformation
CorrectedMutualInformation(const DBRowGeneratorParser< ALLOC > &parser, const Apriori< ALLOC > &apriori, const std::vector< std::pair< std::size_t, std::size_t >, ALLOC< std::pair< std::size_t, std::size_t > > > &ranges, const Bijection< NodeId, std::size_t, ALLOC< std::size_t > > &nodeId2columns=Bijection< NodeId, std::size_t, ALLOC< std::size_t > >(), const allocator_type &alloc=allocator_type())
default constructor

gum::learning::CorrectedMutualInformation::clearKCache
void clearKCache()
clears the KCache (the cache for the penalties)

gum::learning::CorrectedMutualInformation::clearHCache
void clearHCache()
clears the HCache (the cache for the entropies)

gum::learning::CorrectedMutualInformation::nbThreads
virtual std::size_t nbThreads() const
returns the number of threads used to parse the database

gum::learning::CorrectedMutualInformation::setMinNbRowsPerThread
virtual void setMinNbRowsPerThread(const std::size_t nb) const
changes the number min of rows a thread should process in a multithreading context ...

gum::learning::CorrectedMutualInformation::useMDL
void useMDL()
use the MDL penalty function

gum::learning::CorrectedMutualInformation::useICache
void useICache(bool on_off)
turn on/off the use of the ICache (the mutual information cache)

gum::learning::CorrectedMutualInformation::clearCnrCache
void clearCnrCache()
clears the CnrCache (the cache for the Cnr formula)

gum::NodeId
Size NodeId
Type for node ids.
Definition: graphElements.h:98

GUM_ERROR
#define GUM_ERROR(type, msg)
Definition: exceptions.h:55

gum::learning::CorrectedMutualInformation::useNoCorr
void useNoCorr()
use no correction/penalty function

gum::learning::CorrectedMutualInformation::setMaxNbThreads
virtual void setMaxNbThreads(std::size_t nb) const
changes the max number of threads used to parse the database