![]() |
aGrUM
0.20.3
a C++ library for (probabilistic) graphical models
|
the class for packing together the translators used to preprocess the datasets More...
#include <agrum/tools/database/DBTranslatorSet.h>
Public Member Functions | |
Constructors / Destructors | |
DBTranslatorSet (const allocator_type &alloc=allocator_type()) | |
default constructor More... | |
DBTranslatorSet (const DBTranslatorSet< ALLOC > &from) | |
copy constructor More... | |
DBTranslatorSet (const DBTranslatorSet< ALLOC > &from, const allocator_type &alloc) | |
copy constructor with a given allocator More... | |
DBTranslatorSet (DBTranslatorSet< ALLOC > &&from) | |
move constructor More... | |
DBTranslatorSet (DBTranslatorSet< ALLOC > &&from, const allocator_type &alloc) | |
move constructor with a given allocator More... | |
virtual DBTranslatorSet< ALLOC > * | clone () const |
virtual copy constructor More... | |
virtual DBTranslatorSet< ALLOC > * | clone (const allocator_type &alloc) const |
virtual copy constructor with a given allocator More... | |
virtual | ~DBTranslatorSet () |
destructor More... | |
Operators | |
DBTranslatorSet< ALLOC > & | operator= (const DBTranslatorSet< ALLOC > &from) |
copy operator More... | |
DBTranslatorSet< ALLOC > & | operator= (DBTranslatorSet< ALLOC > &&from) |
move operator More... | |
DBTranslator< ALLOC > & | operator[] (const std::size_t k) |
returns the kth translator More... | |
const DBTranslator< ALLOC > & | operator[] (const std::size_t k) const |
returns the kth translator More... | |
Accessors / Modifiers | |
template<template< template< typename > class > class Translator> | |
std::size_t | insertTranslator (const Translator< ALLOC > &translator, const std::size_t column, const bool unique_column=true) |
inserts a new translator at the end of the translator set More... | |
template<template< typename > class XALLOC> | |
std::size_t | insertTranslator (const Variable &var, const std::size_t column, const std::vector< std::string, XALLOC< std::string > > &missing_symbols, const bool unique_column=true) |
inserts a new translator for a given variable at the end of the translator set More... | |
std::size_t | insertTranslator (const Variable &var, const std::size_t column, const bool unique_column=true) |
inserts a new translator for a given variable at the end of the translator set More... | |
void | eraseTranslator (const std::size_t k, const bool k_is_input_col=false) |
erases either the kth translator or those parsing the kth column of the input database More... | |
DBTranslator< ALLOC > & | translator (const std::size_t k) |
returns the kth translator More... | |
const DBTranslator< ALLOC > & | translator (const std::size_t k) const |
returns the kth translator More... | |
DBTranslator< ALLOC > & | translatorSafe (const std::size_t k) |
returns the kth translator More... | |
const DBTranslator< ALLOC > & | translatorSafe (const std::size_t k) const |
returns the kth translator More... | |
template<template< typename > class OTHER_ALLOC> | |
DBTranslatedValue | translate (const std::vector< std::string, OTHER_ALLOC< std::string > > &row, const std::size_t k) const |
ask the kth translator to translate a string in a row of the database More... | |
template<template< typename > class OTHER_ALLOC> | |
DBTranslatedValue | translateSafe (const std::vector< std::string, OTHER_ALLOC< std::string > > &row, const std::size_t k) const |
similar to method translate, except that it checks that the kth translator exists More... | |
std::string | translateBack (const DBTranslatedValue translated_val, const std::size_t k) const |
returns the original string that was translated into translated_val More... | |
std::string | translateBackSafe (const DBTranslatedValue translated_val, const std::size_t k) const |
similar to method translateBack, except that it checks that the kth translator exists More... | |
bool | isMissingValue (const DBTranslatedValue translated_val, const std::size_t k) const |
indicates whether the kth translator considers a translated_val as a missing value More... | |
bool | isMissingValueSafe (const DBTranslatedValue translated_val, const std::size_t k) const |
similar to method isMissingValue, except that it checks that the kth translator exists More... | |
std::size_t | domainSize (const std::size_t k) const |
returns the domain size of the variable stored into the kth translator More... | |
std::size_t | domainSizeSafe (const std::size_t k) const |
returns the domain size of the variable stored into the kth translator More... | |
const Variable & | variable (const std::size_t k) const |
returns the variable stored into the kth translator More... | |
const Variable & | variableSafe (const std::size_t k) const |
returns the variable stored into the kth translator More... | |
bool | needsReordering (const std::size_t k) const |
indicates whether a reordering is needed to make the kth translator sorted More... | |
bool | needsReorderingSafe (const std::size_t k) const |
same as method needsReordering but checks that the kth translator exists More... | |
HashTable< std::size_t, std::size_t, ALLOC< std::pair< std::size_t, std::size_t > > > | reorder (const std::size_t k) |
performs a reordering of the dictionary and returns a mapping from the old translated values to the new ones. More... | |
HashTable< std::size_t, std::size_t, ALLOC< std::pair< std::size_t, std::size_t > > > | reorderSafe (const std::size_t k) |
same as method reorder but checks that the kth translator exists More... | |
std::size_t | inputColumn (const std::size_t k) const |
returns the column of the input database that will be read by the kth translator More... | |
std::size_t | inputColumnSafe (const std::size_t k) const |
returns the column of the input database that will be read by the kth translator More... | |
std::size_t | highestInputColumn () const |
returns the largest input database column index read by the translators More... | |
void | clear () |
remove all the translators More... | |
std::size_t | nbTranslators () const |
returns the number of translators stored into the set More... | |
std::size_t | size () const |
returns the number of translators stored into the set More... | |
allocator_type | getAllocator () const |
returns the allocator used by the translator set More... | |
const std::vector< DBTranslator< ALLOC > *, ALLOC< DBTranslator< ALLOC > *> > & | translators () const |
returns the set of translators More... | |
Public Types | |
using | allocator_type = typename DBTranslator< ALLOC >::allocator_type |
type for the allocators passed in arguments of methods More... | |
the class for packing together the translators used to preprocess the datasets
When learning Bayesian networks, the records of the train dataset are used to construct contingency tables that are either exploited in statistical conditional independence tests or in scores. In both cases, the values observed in the records must be translated into indices in the finite domain of the corresponding random variables. The DBTranslator classes are used for this purpose. To make the parsing of all the columns of the dataset easier, all the DBTranslator instances used are gathered into a DBTranslatorSet.
Definition at line 114 of file DBTranslatorSet.h.
using gum::learning::DBTranslatorSet< ALLOC >::allocator_type = typename DBTranslator< ALLOC >::allocator_type |
type for the allocators passed in arguments of methods
Definition at line 117 of file DBTranslatorSet.h.
gum::learning::DBTranslatorSet< ALLOC >::DBTranslatorSet | ( | const allocator_type & | alloc = allocator_type() | ) |
default constructor
gum::learning::DBTranslatorSet< ALLOC >::DBTranslatorSet | ( | const DBTranslatorSet< ALLOC > & | from | ) |
copy constructor
gum::learning::DBTranslatorSet< ALLOC >::DBTranslatorSet | ( | const DBTranslatorSet< ALLOC > & | from, |
const allocator_type & | alloc | ||
) |
copy constructor with a given allocator
gum::learning::DBTranslatorSet< ALLOC >::DBTranslatorSet | ( | DBTranslatorSet< ALLOC > && | from | ) |
move constructor
gum::learning::DBTranslatorSet< ALLOC >::DBTranslatorSet | ( | DBTranslatorSet< ALLOC > && | from, |
const allocator_type & | alloc | ||
) |
move constructor with a given allocator
|
virtual |
destructor
void gum::learning::DBTranslatorSet< ALLOC >::clear | ( | ) |
remove all the translators
|
virtual |
virtual copy constructor
|
virtual |
virtual copy constructor with a given allocator
std::size_t gum::learning::DBTranslatorSet< ALLOC >::domainSize | ( | const std::size_t | k | ) | const |
returns the domain size of the variable stored into the kth translator
std::size_t gum::learning::DBTranslatorSet< ALLOC >::domainSizeSafe | ( | const std::size_t | k | ) | const |
returns the domain size of the variable stored into the kth translator
UndefinedElement | is raised if there are fewer than k translators in the translator set. |
void gum::learning::DBTranslatorSet< ALLOC >::eraseTranslator | ( | const std::size_t | k, |
const bool | k_is_input_col = false |
||
) |
erases either the kth translator or those parsing the kth column of the input database
DBTranslatorSets do not necessarily read all the columns of their input database. For instance, a CSV may contain 10 columns, but the DBTranslatorSet may only contain two translators reading columns 3 and 5 respectively. When k_is_input_col is set to false, Parameter k passed in argument corresponds to either 0 or 1, i.e., to the index of one of the two translators stored into the DBTranslatorSet. When k_is_input_col is set to true, the translators to be erased are the ones that parse the kth column of the input database (when several translators parse the column k, all of them are removed).
allocator_type gum::learning::DBTranslatorSet< ALLOC >::getAllocator | ( | ) | const |
returns the allocator used by the translator set
std::size_t gum::learning::DBTranslatorSet< ALLOC >::highestInputColumn | ( | ) | const |
returns the largest input database column index read by the translators
std::size_t gum::learning::DBTranslatorSet< ALLOC >::inputColumn | ( | const std::size_t | k | ) | const |
returns the column of the input database that will be read by the kth translator
std::size_t gum::learning::DBTranslatorSet< ALLOC >::inputColumnSafe | ( | const std::size_t | k | ) | const |
returns the column of the input database that will be read by the kth translator
UndefinedElement | is raised if there are fewer than k translators in the translator set. |
std::size_t gum::learning::DBTranslatorSet< ALLOC >::insertTranslator | ( | const Translator< ALLOC > & | translator, |
const std::size_t | column, | ||
const bool | unique_column = true |
||
) |
inserts a new translator at the end of the translator set
translator | a translator that will be copied into the translator set |
column | the index of the column that this new translator should read in the database. |
unique_column | indicates whether the column can be read by several translators. |
DuplicateElement | is raised if there already exists a translator reading the column passed in argument and the unique_column argument is set to true. |
std::size_t gum::learning::DBTranslatorSet< ALLOC >::insertTranslator | ( | const Variable & | var, |
const std::size_t | column, | ||
const std::vector< std::string, XALLOC< std::string > > & | missing_symbols, | ||
const bool | unique_column = true |
||
) |
inserts a new translator for a given variable at the end of the translator set
var | the variable that will be contained into the translator |
column | the index of the column that this new translator should read in the database. |
missing_symbols | the set of symbols in the database representing missing values |
unique_column | indicates whether the column can be read by several translators. |
DuplicateElement | is raised if there already exists a translator reading the column passed in argument and the unique_column argument is set to true. |
std::size_t gum::learning::DBTranslatorSet< ALLOC >::insertTranslator | ( | const Variable & | var, |
const std::size_t | column, | ||
const bool | unique_column = true |
||
) |
inserts a new translator for a given variable at the end of the translator set
var | the variable that will be contained into the translator |
column | the index of the column that this new translator should read in the database. |
unique_column | indicates whether the column can be read by several translators. |
DuplicateElement | is raised if there already exists a translator reading the column passed in argumentt and the unique_column argument is set to true. |
bool gum::learning::DBTranslatorSet< ALLOC >::isMissingValue | ( | const DBTranslatedValue | translated_val, |
const std::size_t | k | ||
) | const |
indicates whether the kth translator considers a translated_val as a missing value
translated_val | the value that we compare to the translation of a missing value |
k | the index of the translator that performed the translation |
bool gum::learning::DBTranslatorSet< ALLOC >::isMissingValueSafe | ( | const DBTranslatedValue | translated_val, |
const std::size_t | k | ||
) | const |
similar to method isMissingValue, except that it checks that the kth translator exists
translated_val | the value that we compare to the translation of a missing value |
k | the index of the translator that performed the translation |
UndefinedElement | is raised if there are fewer than k translators in the translator set. |
std::size_t gum::learning::DBTranslatorSet< ALLOC >::nbTranslators | ( | ) | const |
returns the number of translators stored into the set
bool gum::learning::DBTranslatorSet< ALLOC >::needsReordering | ( | const std::size_t | k | ) | const |
indicates whether a reordering is needed to make the kth translator sorted
For a given translator, if the strings represented by the translations are only numbers, the translations are considered to be sorted if and only if they are sorted by increasing number. If the strings do not only represent numbers, then translations are considered to be sorted if and only if they are sorted lexicographically.
When constructing dynamically its dictionary, the translator may assign wrong DBTranslatedValue values to strings. For instance, a translator reading sequentially integer strings 4, 1, 3, may map 4 into DBTranslatedValue{std::size_t(0)}, 1 into DBTranslatedValue{std::size_t(1)} and 3 into DBTranslatedValue{std::size_t(2)}, resulting in random variables having domain {4,1,3}. The user may prefer having domain {1,3,4}, i.e., a domain specified with increasing values. This requires a reordering. Method needsReodering() returns a Boolean indicating whether such a reordering should be performed or whether the current order is OK.
bool gum::learning::DBTranslatorSet< ALLOC >::needsReorderingSafe | ( | const std::size_t | k | ) | const |
same as method needsReordering but checks that the kth translator exists
UndefinedElement | is raised if there are fewer than k translators in the translator set. |
DBTranslatorSet< ALLOC >& gum::learning::DBTranslatorSet< ALLOC >::operator= | ( | const DBTranslatorSet< ALLOC > & | from | ) |
copy operator
DBTranslatorSet< ALLOC >& gum::learning::DBTranslatorSet< ALLOC >::operator= | ( | DBTranslatorSet< ALLOC > && | from | ) |
move operator
DBTranslator< ALLOC >& gum::learning::DBTranslatorSet< ALLOC >::operator[] | ( | const std::size_t | k | ) |
returns the kth translator
const DBTranslator< ALLOC >& gum::learning::DBTranslatorSet< ALLOC >::operator[] | ( | const std::size_t | k | ) | const |
returns the kth translator
HashTable< std::size_t, std::size_t, ALLOC< std::pair< std::size_t, std::size_t > > > gum::learning::DBTranslatorSet< ALLOC >::reorder | ( | const std::size_t | k | ) |
performs a reordering of the dictionary and returns a mapping from the old translated values to the new ones.
When a reordering is needed, i.e., string values must be translated differently, Method reorder() computes how the translations should be changed. It updates accordingly the dictionary and returns the mapping that enables changing the old dictionary values into the new ones. Note that the hash table returned is expressed in terms of std::size_t because only the translations for discrete random variables need be reordered, those for continuous random variables are identity mappings.
HashTable< std::size_t, std::size_t, ALLOC< std::pair< std::size_t, std::size_t > > > gum::learning::DBTranslatorSet< ALLOC >::reorderSafe | ( | const std::size_t | k | ) |
same as method reorder but checks that the kth translator exists
UndefinedElement | is raised if there are fewer than k translators in the translator set. |
std::size_t gum::learning::DBTranslatorSet< ALLOC >::size | ( | ) | const |
returns the number of translators stored into the set
DBTranslatedValue gum::learning::DBTranslatorSet< ALLOC >::translate | ( | const std::vector< std::string, OTHER_ALLOC< std::string > > & | row, |
const std::size_t | k | ||
) | const |
ask the kth translator to translate a string in a row of the database
row | a row of the original database |
k | the index of the translator that will perform the translation |
std::string gum::learning::DBTranslatorSet< ALLOC >::translateBack | ( | const DBTranslatedValue | translated_val, |
const std::size_t | k | ||
) | const |
returns the original string that was translated into translated_val
translated_val | the value from which we look for the original string |
k | the index of the translator that performed the translation |
std::string gum::learning::DBTranslatorSet< ALLOC >::translateBackSafe | ( | const DBTranslatedValue | translated_val, |
const std::size_t | k | ||
) | const |
similar to method translateBack, except that it checks that the kth translator exists
translated_val | the value from which we look for the original string |
k | the index of the translator that performed the translation |
UndefinedElement | is raised if there are fewer than k translators in the translator set. |
DBTranslatedValue gum::learning::DBTranslatorSet< ALLOC >::translateSafe | ( | const std::vector< std::string, OTHER_ALLOC< std::string > > & | row, |
const std::size_t | k | ||
) | const |
similar to method translate, except that it checks that the kth translator exists
row | a row of the original database |
k | the index of the translator that will perform the translation |
UndefinedElement | is raised if there are fewer than k translators in the translator set. |
DBTranslator< ALLOC >& gum::learning::DBTranslatorSet< ALLOC >::translator | ( | const std::size_t | k | ) |
returns the kth translator
const DBTranslator< ALLOC >& gum::learning::DBTranslatorSet< ALLOC >::translator | ( | const std::size_t | k | ) | const |
returns the kth translator
const std::vector< DBTranslator< ALLOC >*, ALLOC< DBTranslator< ALLOC >* > >& gum::learning::DBTranslatorSet< ALLOC >::translators | ( | ) | const |
returns the set of translators
DBTranslator< ALLOC >& gum::learning::DBTranslatorSet< ALLOC >::translatorSafe | ( | const std::size_t | k | ) |
returns the kth translator
UndefinedElement | is raised if there are fewer than k translators in the translator set. |
const DBTranslator< ALLOC >& gum::learning::DBTranslatorSet< ALLOC >::translatorSafe | ( | const std::size_t | k | ) | const |
returns the kth translator
UndefinedElement | is raised if there are fewer than k translators in the translator set. |
const Variable& gum::learning::DBTranslatorSet< ALLOC >::variable | ( | const std::size_t | k | ) | const |
returns the variable stored into the kth translator
const Variable& gum::learning::DBTranslatorSet< ALLOC >::variableSafe | ( | const std::size_t | k | ) | const |
returns the variable stored into the kth translator
UndefinedElement | is raised if there are fewer than k translators in the translator set. |