![]() |
aGrUM
0.20.3
a C++ library for (probabilistic) graphical models
|
The base class for all the independence tests used for learning. More...
#include <agrum/BN/learning/scores_and_tests/independenceTest.h>
Public Member Functions | |
Constructors / Destructors | |
IndependenceTest (const DBRowGeneratorParser< ALLOC > &parser, const Apriori< ALLOC > &external_apriori, const std::vector< std::pair< std::size_t, std::size_t >, ALLOC< std::pair< std::size_t, std::size_t > > > &ranges, const Bijection< NodeId, std::size_t, ALLOC< std::size_t > > &nodeId2columns=Bijection< NodeId, std::size_t, ALLOC< std::size_t > >(), const allocator_type &alloc=allocator_type()) | |
default constructor More... | |
IndependenceTest (const DBRowGeneratorParser< ALLOC > &parser, const Apriori< ALLOC > &external_apriori, const Bijection< NodeId, std::size_t, ALLOC< std::size_t > > &nodeId2columns=Bijection< NodeId, std::size_t, ALLOC< std::size_t > >(), const allocator_type &alloc=allocator_type()) | |
default constructor More... | |
virtual IndependenceTest< ALLOC > * | clone () const =0 |
virtual copy constructor More... | |
virtual IndependenceTest< ALLOC > * | clone (const allocator_type &alloc) const =0 |
virtual copy constructor with a given allocator More... | |
virtual | ~IndependenceTest () |
destructor More... | |
Accessors / Modifiers | |
virtual void | setMaxNbThreads (std::size_t nb) const |
changes the max number of threads used to parse the database More... | |
virtual std::size_t | nbThreads () const |
returns the number of threads used to parse the database More... | |
virtual void | setMinNbRowsPerThread (const std::size_t nb) const |
changes the number min of rows a thread should process in a multithreading context More... | |
virtual std::size_t | minNbRowsPerThread () const |
returns the minimum of rows that each thread should process More... | |
template<template< typename > class XALLOC> | |
void | setRanges (const std::vector< std::pair< std::size_t, std::size_t >, XALLOC< std::pair< std::size_t, std::size_t > > > &new_ranges) |
sets new ranges to perform the countings used by the independence test More... | |
void | clearRanges () |
reset the ranges to the one range corresponding to the whole database More... | |
const std::vector< std::pair< std::size_t, std::size_t >, ALLOC< std::pair< std::size_t, std::size_t > > > & | ranges () const |
returns the current ranges More... | |
double | score (const NodeId var1, const NodeId var2) |
returns the score of a pair of nodes More... | |
double | score (const NodeId var1, const NodeId var2, const std::vector< NodeId, ALLOC< NodeId > > &rhs_ids) |
returns the score of a pair of nodes given some other nodes More... | |
virtual void | clear () |
clears all the data structures from memory, including the cache More... | |
virtual void | clearCache () |
clears the current cache More... | |
virtual void | useCache (const bool on_off) |
turn on/off the use of a cache of the previously computed score More... | |
const Bijection< NodeId, std::size_t, ALLOC< std::size_t > > & | nodeId2Columns () const |
return the mapping between the columns of the database and the node ids More... | |
const DatabaseTable< ALLOC > & | database () const |
return the database used by the score More... | |
allocator_type | getAllocator () const |
returns the allocator used by the score More... | |
Public Types | |
using | allocator_type = ALLOC< NodeId > |
type for the allocators passed in arguments of methods More... | |
Protected Attributes | |
const double | one_log2_ {M_LOG2E} |
1 / log(2) More... | |
Apriori< ALLOC > * | apriori_ {nullptr} |
the expert knowledge a priori we add to the contingency tables More... | |
RecordCounter< ALLOC > | counter_ |
the record counter used for the countings over discrete variables More... | |
ScoringCache< ALLOC > | cache_ |
the scoring cache More... | |
bool | use_cache_ {true} |
a Boolean indicating whether we wish to use the cache More... | |
const std::vector< NodeId, ALLOC< NodeId > > | empty_ids_ |
an empty vector More... | |
Protected Member Functions | |
IndependenceTest (const IndependenceTest< ALLOC > &from) | |
copy constructor More... | |
IndependenceTest (const IndependenceTest< ALLOC > &from, const allocator_type &alloc) | |
copy constructor with a given allocator More... | |
IndependenceTest (IndependenceTest< ALLOC > &&from) | |
move constructor More... | |
IndependenceTest (IndependenceTest< ALLOC > &&from, const allocator_type &alloc) | |
move constructor with a given allocator More... | |
IndependenceTest< ALLOC > & | operator= (const IndependenceTest< ALLOC > &from) |
copy operator More... | |
IndependenceTest< ALLOC > & | operator= (IndependenceTest< ALLOC > &&from) |
move operator More... | |
virtual double | score_ (const IdCondSet< ALLOC > &idset)=0 |
returns the score for a given IdCondSet More... | |
std::vector< double, ALLOC< double > > | marginalize_ (const std::size_t node_2_marginalize, const std::size_t X_size, const std::size_t Y_size, const std::size_t Z_size, const std::vector< double, ALLOC< double > > &N_xyz) const |
returns a counting vector where variables are marginalized from N_xyz More... | |
The base class for all the independence tests used for learning.
Definition at line 51 of file independenceTest.h.
using gum::learning::IndependenceTest< ALLOC >::allocator_type = ALLOC< NodeId > |
type for the allocators passed in arguments of methods
Definition at line 54 of file independenceTest.h.
gum::learning::IndependenceTest< ALLOC >::IndependenceTest | ( | const DBRowGeneratorParser< ALLOC > & | parser, |
const Apriori< ALLOC > & | external_apriori, | ||
const std::vector< std::pair< std::size_t, std::size_t >, ALLOC< std::pair< std::size_t, std::size_t > > > & | ranges, | ||
const Bijection< NodeId, std::size_t, ALLOC< std::size_t > > & | nodeId2columns = Bijection< NodeId, std::size_t, ALLOC< std::size_t > >() , |
||
const allocator_type & | alloc = allocator_type() |
||
) |
default constructor
parser | the parser used to parse the database |
external_apriori | An apriori that we add to the computation of the score (this should come from expert knowledge): this consists in adding numbers to countings in the contingency tables |
ranges | a set of pairs {(X1,Y1),...,(Xn,Yn)} of database's rows indices. The countings are then performed only on the union of the rows [Xi,Yi), i in {1,...,n}. This is useful, e.g, when performing cross validation tasks, in which part of the database should be ignored. An empty set of ranges is equivalent to an interval [X,Y) ranging over the whole database. |
nodeId2Columns | a mapping from the ids of the nodes in the graphical model to the corresponding column in the DatabaseTable parsed by the parser. This enables estimating from a database in which variable A corresponds to the 2nd column the parameters of a BN in which variable A has a NodeId of 5. An empty nodeId2Columns bijection means that the mapping is an identity, i.e., the value of a NodeId is equal to the index of the column in the DatabaseTable. |
alloc | the allocator used to allocate the structures within the IndependenceTest. |
gum::learning::IndependenceTest< ALLOC >::IndependenceTest | ( | const DBRowGeneratorParser< ALLOC > & | parser, |
const Apriori< ALLOC > & | external_apriori, | ||
const Bijection< NodeId, std::size_t, ALLOC< std::size_t > > & | nodeId2columns = Bijection< NodeId, std::size_t, ALLOC< std::size_t > >() , |
||
const allocator_type & | alloc = allocator_type() |
||
) |
default constructor
parser | the parser used to parse the database |
external_apriori | An apriori that we add to the computation of the score (this should come from expert knowledge): this consists in adding numbers to countings in the contingency tables |
nodeId2Columns | a mapping from the ids of the nodes in the graphical model to the corresponding column in the DatabaseTable parsed by the parser. This enables estimating from a database in which variable A corresponds to the 2nd column the parameters of a BN in which variable A has a NodeId of 5. An empty nodeId2Columns bijection means that the mapping is an identity, i.e., the value of a NodeId is equal to the index of the column in the DatabaseTable. |
alloc | the allocator used to allocate the structures within the IndependenceTest. |
|
virtual |
destructor
|
protected |
copy constructor
|
protected |
copy constructor with a given allocator
|
protected |
move constructor
|
protected |
move constructor with a given allocator
|
virtual |
clears all the data structures from memory, including the cache
Reimplemented in gum::learning::KNML< ALLOC >.
|
virtual |
clears the current cache
Reimplemented in gum::learning::KNML< ALLOC >.
void gum::learning::IndependenceTest< ALLOC >::clearRanges | ( | ) |
reset the ranges to the one range corresponding to the whole database
|
pure virtual |
virtual copy constructor
Implemented in gum::learning::KNML< ALLOC >, gum::learning::IndepTestChi2< ALLOC >, and gum::learning::IndepTestG2< ALLOC >.
|
pure virtual |
virtual copy constructor with a given allocator
Implemented in gum::learning::KNML< ALLOC >, gum::learning::IndepTestChi2< ALLOC >, and gum::learning::IndepTestG2< ALLOC >.
const DatabaseTable< ALLOC >& gum::learning::IndependenceTest< ALLOC >::database | ( | ) | const |
return the database used by the score
allocator_type gum::learning::IndependenceTest< ALLOC >::getAllocator | ( | ) | const |
returns the allocator used by the score
|
protected |
returns a counting vector where variables are marginalized from N_xyz
node_2_marginalize | indicates which node(s) shall be marginalized:
|
X_size | the domain size of variable X |
Y_size | the domain size of variable Y |
Z_size | the domain size of the set of conditioning variables Z |
N_xyz | a counting vector of dimension X * Y * Z (in this order) |
|
virtual |
returns the minimum of rows that each thread should process
|
virtual |
returns the number of threads used to parse the database
const Bijection< NodeId, std::size_t, ALLOC< std::size_t > >& gum::learning::IndependenceTest< ALLOC >::nodeId2Columns | ( | ) | const |
return the mapping between the columns of the database and the node ids
|
protected |
copy operator
|
protected |
move operator
const std::vector< std::pair< std::size_t, std::size_t >, ALLOC< std::pair< std::size_t, std::size_t > > >& gum::learning::IndependenceTest< ALLOC >::ranges | ( | ) | const |
returns the current ranges
double gum::learning::IndependenceTest< ALLOC >::score | ( | const NodeId | var1, |
const NodeId | var2 | ||
) |
returns the score of a pair of nodes
double gum::learning::IndependenceTest< ALLOC >::score | ( | const NodeId | var1, |
const NodeId | var2, | ||
const std::vector< NodeId, ALLOC< NodeId > > & | rhs_ids | ||
) |
returns the score of a pair of nodes given some other nodes
var1 | the first variable on the left side of the conditioning bar |
var2 | the second variable on the left side of the conditioning bar |
rhs_ids | the set of variables on the right side of the conditioning bar |
|
protectedpure virtual |
returns the score for a given IdCondSet
OperationNotAllowed | is raised if the score does not support calling method score such an idset (due to too many/too few variables in the left hand side or the right hand side of the idset). |
Implemented in gum::learning::KNML< ALLOC >, gum::learning::IndepTestChi2< ALLOC >, and gum::learning::IndepTestG2< ALLOC >.
|
virtual |
changes the max number of threads used to parse the database
|
virtual |
changes the number min of rows a thread should process in a multithreading context
When computing score, several threads are used by record counters to perform countings on the rows of the database, the MinNbRowsPerThread method indicates how many rows each thread should at least process. This is used to compute the number of threads actually run. This number is equal to the min between the max number of threads allowed and the number of records in the database divided by nb.
void gum::learning::IndependenceTest< ALLOC >::setRanges | ( | const std::vector< std::pair< std::size_t, std::size_t >, XALLOC< std::pair< std::size_t, std::size_t > > > & | new_ranges | ) |
sets new ranges to perform the countings used by the independence test
ranges | a set of pairs {(X1,Y1),...,(Xn,Yn)} of database's rows indices. The countings are then performed only on the union of the rows [Xi,Yi), i in {1,...,n}. This is useful, e.g, when performing cross validation tasks, in which part of the database should be ignored. An empty set of ranges is equivalent to an interval [X,Y) ranging over the whole database. |
|
virtual |
turn on/off the use of a cache of the previously computed score
Reimplemented in gum::learning::KNML< ALLOC >.
|
protected |
the expert knowledge a priori we add to the contingency tables
Definition at line 215 of file independenceTest.h.
|
protected |
the scoring cache
Definition at line 221 of file independenceTest.h.
|
protected |
the record counter used for the countings over discrete variables
Definition at line 218 of file independenceTest.h.
|
protected |
an empty vector
Definition at line 227 of file independenceTest.h.
|
protected |
1 / log(2)
Definition at line 212 of file independenceTest.h.
|
protected |
a Boolean indicating whether we wish to use the cache
Definition at line 224 of file independenceTest.h.