![]() |
aGrUM
0.16.0
|
a helper to easily read databases More...
#include <genericBNLearner.h>
Public Member Functions | |
template<typename GUM_SCALAR > | |
Database (const std::string &filename, const BayesNet< GUM_SCALAR > &bn, const std::vector< std::string > &missing_symbols) | |
Constructors / Destructors | |
Database (const std::string &file, const std::vector< std::string > &missing_symbols) | |
default constructor More... | |
Database (const DatabaseTable<> &db) | |
default constructor More... | |
Database (const std::string &filename, Database &score_database, const std::vector< std::string > &missing_symbols) | |
constructor for the aprioris More... | |
template<typename GUM_SCALAR > | |
Database (const std::string &filename, const gum::BayesNet< GUM_SCALAR > &bn, const std::vector< std::string > &missing_symbols) | |
constructor with a BN providing the variables of interest More... | |
Database (const Database &from) | |
copy constructor More... | |
Database (Database &&from) | |
move constructor More... | |
~Database () | |
destructor More... | |
Operators | |
Database & | operator= (const Database &from) |
copy operator More... | |
Database & | operator= (Database &&from) |
move operator More... | |
Accessors / Modifiers | |
DBRowGeneratorParser & | parser () |
returns the parser for the database More... | |
const std::vector< std::size_t > & | domainSizes () const |
returns the domain sizes of the variables More... | |
const std::vector< std::string > & | names () const |
returns the names of the variables in the database More... | |
NodeId | idFromName (const std::string &var_name) const |
returns the node id corresponding to a variable name More... | |
const std::string & | nameFromId (NodeId id) const |
returns the variable name corresponding to a given node id More... | |
const DatabaseTable & | databaseTable () const |
returns the internal database table More... | |
void | setDatabaseWeight (const double new_weight) |
assign a weight to all the rows of the database so that the sum of their weights is equal to new_weight More... | |
const Bijection< NodeId, std::size_t > & | nodeId2Columns () const |
returns the mapping between node ids and their columns in the database More... | |
const std::vector< std::string > & | missingSymbols () const |
returns the set of missing symbols taken into account More... | |
std::size_t | nbRows () const |
returns the number of records in the database More... | |
std::size_t | size () const |
returns the number of records in the database More... | |
void | setWeight (const std::size_t i, const double weight) |
sets the weight of the ith record More... | |
double | weight (const std::size_t i) const |
returns the weight of the ith record More... | |
double | weight () const |
returns the weight of the whole database More... | |
Protected Attributes | |
DatabaseTable | __database |
the database itself More... | |
DBRowGeneratorParser * | __parser {nullptr} |
the parser used for reading the database More... | |
std::vector< std::size_t > | __domain_sizes |
the domain sizes of the variables (useful to speed-up computations) More... | |
Bijection< NodeId, std::size_t > | __nodeId2cols |
a bijection assigning to each variable name its NodeId More... | |
Size | __max_threads_number {1} |
the max number of threads authorized More... | |
Size | __min_nb_rows_per_thread {100} |
the minimal number of rows to parse (on average) by thread More... | |
a helper to easily read databases
Definition at line 135 of file genericBNLearner.h.
|
explicit |
default constructor
file | the name of the CSV file containing the data |
missing_symbols | the set of symbols in the CSV file that correspond to missing data |
Definition at line 68 of file genericBNLearner.cpp.
|
explicit |
default constructor
db | an already initialized database table that is used to fill the Database |
Definition at line 51 of file genericBNLearner.cpp.
References __database, __domain_sizes, __nodeId2cols, __parser, gum::learning::DatabaseTable< ALLOC >::domainSizes(), gum::learning::IDatabaseTable< T_DATA, ALLOC >::handler(), gum::BijectionImplementation< T1, T2, Alloc, Gen >::insert(), and gum::learning::IDatabaseTable< T_DATA, ALLOC >::variableNames().
gum::learning::genericBNLearner::Database::Database | ( | const std::string & | filename, |
Database & | score_database, | ||
const std::vector< std::string > & | missing_symbols | ||
) |
constructor for the aprioris
We must ensure that the variables of the Database are identical to those of the score database (else the countings used by the scores might be erroneous). However, we allow the variables to be ordered differently in the two databases: variables with the same name in both databases are supposed to be the same.
file | the name of the CSV file containing the data |
score_database | the main database used for the learning |
missing_symbols | the set of symbols in the CSV file that correspond to missing data |
Definition at line 74 of file genericBNLearner.cpp.
References gum::learning::genericBNLearner::__checkFileName(), __database, __domain_sizes, __nodeId2cols, __parser, databaseTable(), gum::learning::DatabaseTable< ALLOC >::domainSizes(), gum::learning::IDBInitializer< ALLOC >::fillDatabase(), GUM_ERROR, gum::learning::IDatabaseTable< T_DATA, ALLOC >::handler(), gum::HashTable< Key, Val, Alloc >::insert(), gum::learning::DatabaseTable< ALLOC >::insertTranslator(), gum::learning::IDatabaseTable< T_DATA, ALLOC >::nbVariables(), nodeId2Columns(), gum::learning::DatabaseTable< ALLOC >::variable(), gum::learning::IDBInitializer< ALLOC >::variableNames(), and gum::learning::IDatabaseTable< T_DATA, ALLOC >::variableNames().
gum::learning::genericBNLearner::Database::Database | ( | const std::string & | filename, |
const gum::BayesNet< GUM_SCALAR > & | bn, | ||
const std::vector< std::string > & | missing_symbols | ||
) |
constructor with a BN providing the variables of interest
file | the name of the CSV file containing the data |
bn | a Bayesian network indicating which variables of the CSV file are used for learning |
missing_symbols | the set of symbols in the CSV file that correspond to missing data |
gum::learning::genericBNLearner::Database::Database | ( | const Database & | from | ) |
copy constructor
Definition at line 135 of file genericBNLearner.cpp.
References __database, __parser, and gum::learning::IDatabaseTable< T_DATA, ALLOC >::handler().
gum::learning::genericBNLearner::Database::Database | ( | Database && | from | ) |
move constructor
Definition at line 144 of file genericBNLearner.cpp.
References __database, __parser, and gum::learning::IDatabaseTable< T_DATA, ALLOC >::handler().
gum::learning::genericBNLearner::Database::~Database | ( | ) |
destructor
Definition at line 154 of file genericBNLearner.cpp.
References __parser, and operator=().
gum::learning::genericBNLearner::Database::Database | ( | const std::string & | filename, |
const BayesNet< GUM_SCALAR > & | bn, | ||
const std::vector< std::string > & | missing_symbols | ||
) |
Definition at line 31 of file genericBNLearner_tpl.h.
References gum::learning::genericBNLearner::__checkFileName(), __database, __domain_sizes, __nodeId2cols, __parser, gum::DAGmodel::dag(), gum::learning::DatabaseTable< ALLOC >::domainSizes(), gum::learning::IDBInitializer< ALLOC >::fillDatabase(), GUM_ERROR, gum::learning::IDatabaseTable< T_DATA, ALLOC >::handler(), gum::BijectionImplementation< T1, T2, Alloc, Gen >::insert(), gum::HashTable< Key, Val, Alloc >::insert(), gum::learning::DatabaseTable< ALLOC >::insertTranslator(), gum::Variable::name(), gum::BayesNet< GUM_SCALAR >::variable(), and gum::learning::IDBInitializer< ALLOC >::variableNames().
|
private |
Definition at line 76 of file genericBNLearner_tpl.h.
References __database, gum::BayesNet< GUM_SCALAR >::add(), gum::learning::IDatabaseTable< T_DATA, ALLOC >::nbVariables(), and gum::learning::DatabaseTable< ALLOC >::variable().
INLINE const DatabaseTable & gum::learning::genericBNLearner::Database::databaseTable | ( | ) | const |
returns the internal database table
Definition at line 94 of file genericBNLearner_inl.h.
References __database.
Referenced by gum::learning::genericBNLearner::__createApriori(), gum::learning::genericBNLearner::__learnDAG(), gum::learning::genericBNLearner::__prepare_miic_3off2(), Database(), gum::learning::genericBNLearner::database(), gum::learning::genericBNLearner::genericBNLearner(), gum::learning::genericBNLearner::hasMissingValues(), gum::learning::genericBNLearner::learnMixedStructure(), gum::learning::genericBNLearner::nbRows(), and gum::learning::genericBNLearner::useCrossValidationFold().
INLINE const std::vector< std::size_t > & gum::learning::genericBNLearner::Database::domainSizes | ( | ) | const |
returns the domain sizes of the variables
Definition at line 47 of file genericBNLearner_inl.h.
References __domain_sizes.
Referenced by gum::learning::genericBNLearner::domainSizes(), and gum::learning::genericBNLearner::nbCols().
INLINE NodeId gum::learning::genericBNLearner::Database::idFromName | ( | const std::string & | var_name | ) | const |
returns the node id corresponding to a variable name
Definition at line 67 of file genericBNLearner_inl.h.
References __database, __nodeId2cols, gum::learning::IDatabaseTable< T_DATA, ALLOC >::columnsFromVariableName(), gum::BijectionImplementation< T1, T2, Alloc, Gen >::first(), and GUM_ERROR.
Referenced by gum::learning::genericBNLearner::addForbiddenArc(), gum::learning::genericBNLearner::addMandatoryArc(), gum::learning::genericBNLearner::addPossibleEdge(), gum::learning::genericBNLearner::eraseForbiddenArc(), gum::learning::genericBNLearner::eraseMandatoryArc(), gum::learning::genericBNLearner::erasePossibleEdge(), gum::learning::genericBNLearner::idFromName(), and gum::learning::genericBNLearner::setSliceOrder().
INLINE const std::vector< std::string > & gum::learning::genericBNLearner::Database::missingSymbols | ( | ) | const |
returns the set of missing symbols taken into account
Definition at line 101 of file genericBNLearner_inl.h.
References __database, and gum::learning::IDatabaseTable< T_DATA, ALLOC >::missingSymbols().
Referenced by gum::learning::genericBNLearner::__createApriori().
INLINE const std::string & gum::learning::genericBNLearner::Database::nameFromId | ( | NodeId | id | ) | const |
returns the variable name corresponding to a given node id
Definition at line 81 of file genericBNLearner_inl.h.
References __database, __nodeId2cols, GUM_ERROR, gum::BijectionImplementation< T1, T2, Alloc, Gen >::second(), and gum::learning::IDatabaseTable< T_DATA, ALLOC >::variableName().
Referenced by gum::learning::genericBNLearner::nameFromId().
INLINE const std::vector< std::string > & gum::learning::genericBNLearner::Database::names | ( | ) | const |
returns the names of the variables in the database
Definition at line 53 of file genericBNLearner_inl.h.
References __database, and gum::learning::IDatabaseTable< T_DATA, ALLOC >::variableNames().
Referenced by gum::learning::genericBNLearner::names().
INLINE std::size_t gum::learning::genericBNLearner::Database::nbRows | ( | ) | const |
returns the number of records in the database
Definition at line 114 of file genericBNLearner_inl.h.
References __database, and gum::learning::IDatabaseTable< T_DATA, ALLOC >::nbRows().
INLINE const Bijection< NodeId, std::size_t > & gum::learning::genericBNLearner::Database::nodeId2Columns | ( | ) | const |
returns the mapping between node ids and their columns in the database
Definition at line 108 of file genericBNLearner_inl.h.
References __nodeId2cols.
Referenced by gum::learning::genericBNLearner::__createApriori(), gum::learning::genericBNLearner::__createCorrectedMutualInformation(), gum::learning::genericBNLearner::__createParamEstimator(), gum::learning::genericBNLearner::__createScore(), and Database().
genericBNLearner::Database & gum::learning::genericBNLearner::Database::operator= | ( | const Database & | from | ) |
copy operator
Definition at line 157 of file genericBNLearner.cpp.
References __database, __domain_sizes, __nodeId2cols, __parser, and gum::learning::IDatabaseTable< T_DATA, ALLOC >::handler().
Referenced by ~Database().
genericBNLearner::Database & gum::learning::genericBNLearner::Database::operator= | ( | Database && | from | ) |
move operator
Definition at line 173 of file genericBNLearner.cpp.
References __database, __domain_sizes, __nodeId2cols, __parser, and gum::learning::IDatabaseTable< T_DATA, ALLOC >::handler().
INLINE DBRowGeneratorParser & gum::learning::genericBNLearner::Database::parser | ( | ) |
returns the parser for the database
Definition at line 41 of file genericBNLearner_inl.h.
References __parser.
Referenced by gum::learning::genericBNLearner::__createApriori(), gum::learning::genericBNLearner::__createCorrectedMutualInformation(), gum::learning::genericBNLearner::__createScore(), gum::learning::genericBNLearner::chi2(), gum::learning::genericBNLearner::G2(), gum::learning::genericBNLearner::logLikelihood(), and gum::learning::genericBNLearner::useDatabaseRanges().
INLINE void gum::learning::genericBNLearner::Database::setDatabaseWeight | ( | const double | new_weight | ) |
assign a weight to all the rows of the database so that the sum of their weights is equal to new_weight
assign new weight to the rows of the learning database
Definition at line 59 of file genericBNLearner_inl.h.
References __database, gum::learning::IDatabaseTable< T_DATA, ALLOC >::nbRows(), gum::learning::IDatabaseTable< T_DATA, ALLOC >::setAllRowsWeight(), and weight().
Referenced by gum::learning::genericBNLearner::setDatabaseWeight().
INLINE void gum::learning::genericBNLearner::Database::setWeight | ( | const std::size_t | i, |
const double | weight | ||
) |
sets the weight of the ith record
OutOfBounds | if i is outside the set of indices of the records or if the weight is negative |
Definition at line 126 of file genericBNLearner_inl.h.
References __database, and gum::learning::IDatabaseTable< T_DATA, ALLOC >::setWeight().
Referenced by gum::learning::genericBNLearner::setRecordWeight().
INLINE std::size_t gum::learning::genericBNLearner::Database::size | ( | ) | const |
returns the number of records in the database
Definition at line 120 of file genericBNLearner_inl.h.
References __database, and gum::learning::IDatabaseTable< T_DATA, ALLOC >::size().
INLINE double gum::learning::genericBNLearner::Database::weight | ( | const std::size_t | i | ) | const |
returns the weight of the ith record
OutOfBounds | if i is outside the set of indices of the records |
Definition at line 133 of file genericBNLearner_inl.h.
References __database, and gum::learning::IDatabaseTable< T_DATA, ALLOC >::weight().
Referenced by gum::learning::genericBNLearner::databaseWeight(), and gum::learning::genericBNLearner::recordWeight().
INLINE double gum::learning::genericBNLearner::Database::weight | ( | ) | const |
returns the weight of the whole database
Definition at line 139 of file genericBNLearner_inl.h.
References __database, and gum::learning::IDatabaseTable< T_DATA, ALLOC >::weight().
Referenced by gum::learning::genericBNLearner::__setAprioriWeight(), and setDatabaseWeight().
|
protected |
the database itself
Definition at line 263 of file genericBNLearner.h.
Referenced by __BNVars(), Database(), databaseTable(), idFromName(), missingSymbols(), nameFromId(), names(), nbRows(), operator=(), setDatabaseWeight(), setWeight(), size(), and weight().
|
protected |
the domain sizes of the variables (useful to speed-up computations)
Definition at line 269 of file genericBNLearner.h.
Referenced by Database(), domainSizes(), and operator=().
|
protected |
the max number of threads authorized
Definition at line 278 of file genericBNLearner.h.
|
protected |
the minimal number of rows to parse (on average) by thread
Definition at line 282 of file genericBNLearner.h.
a bijection assigning to each variable name its NodeId
Definition at line 272 of file genericBNLearner.h.
Referenced by Database(), idFromName(), nameFromId(), nodeId2Columns(), and operator=().
|
protected |
the parser used for reading the database
Definition at line 266 of file genericBNLearner.h.
Referenced by Database(), operator=(), parser(), and ~Database().