![]() |
aGrUM
0.20.3
a C++ library for (probabilistic) graphical models
|
The class representing a tabular database as used by learning tasks. More...
#include <agrum/tools/database/databaseTable.h>
Public Member Functions | |
Constructors / Destructors | |
template<template< typename > class XALLOC> | |
DatabaseTable (const MissingValType< XALLOC > &missing_symbols, const DBTranslatorSet< ALLOC > &translators=DBTranslatorSet< ALLOC >(), const allocator_type &alloc=allocator_type()) | |
default constructor More... | |
DatabaseTable (const DBTranslatorSet< ALLOC > &translators=DBTranslatorSet< ALLOC >(), const allocator_type &alloc=allocator_type()) | |
default constructor More... | |
DatabaseTable (const DatabaseTable< ALLOC > &from) | |
copy constructor More... | |
DatabaseTable (const DatabaseTable< ALLOC > &from, const allocator_type &alloc) | |
copy constructor with a given allocator More... | |
DatabaseTable (DatabaseTable< ALLOC > &&from) | |
move constructor More... | |
DatabaseTable (DatabaseTable< ALLOC > &&from, const allocator_type &alloc) | |
move constructor with a given allocator More... | |
virtual DatabaseTable< ALLOC > * | clone () const final |
virtual copy constructor More... | |
virtual DatabaseTable< ALLOC > * | clone (const allocator_type &alloc) const final |
virtual copy constructor with a given allocator More... | |
virtual | ~DatabaseTable () |
destructor More... | |
Operators | |
DatabaseTable< ALLOC > & | operator= (const DatabaseTable< ALLOC > &from) |
copy operator More... | |
DatabaseTable< ALLOC > & | operator= (DatabaseTable< ALLOC > &&from) |
move constructor More... | |
Accessors / Modifiers | |
std::size_t | insertTranslator (const DBTranslator< ALLOC > &translator, const std::size_t input_column, const bool unique_column=true) |
insert a new translator into the database table More... | |
std::size_t | insertTranslator (const Variable &var, const std::size_t input_column, const bool unique_column=true) |
insert a new translator into the database table More... | |
template<template< typename > class XALLOC> | |
std::size_t | insertTranslator (const Variable &var, const std::size_t input_column, std::vector< std::string, XALLOC< std::string > > missing_symbols, const bool unique_column=true) |
insert a new translator into the database table More... | |
void | eraseTranslators (const std::size_t k, const bool k_is_input_col=false) |
erases either the kth translator or all those parsing the kth column of the input dataset More... | |
const DBTranslatorSet< ALLOC > & | translatorSet () const |
returns the set of translators More... | |
const DBTranslator< ALLOC > & | translator (const std::size_t k, const bool k_is_input_col=false) const |
returns either the kth translator of the database table or the first one reading the kth column of the input database More... | |
const Variable & | variable (const std::size_t k, const bool k_is_input_col=false) const |
returns either the kth variable of the database table or the first one corresponding to the kth column of the input database More... | |
virtual void | setVariableNames (const std::vector< std::string, ALLOC< std::string > > &names, const bool from_external_object=true) final |
sets the names of the variables More... | |
virtual void | ignoreColumn (const std::size_t k, const bool from_external_object=true) final |
makes the database table ignore from now on the kth column of the input dataset or the column parsed by the kth translator More... | |
virtual const DBVector< std::size_t > | ignoredColumns () const final |
returns the set of columns of the original dataset that are ignored More... | |
virtual const DBVector< std::size_t > | inputColumns () const final |
returns the set of columns of the original dataset that are present in the DatabaseTable More... | |
std::size_t | domainSize (const std::size_t k, const bool k_is_input_col=false) const |
returns the domain size of the kth variable of the database table or of that of the first one corresponding to the kth column of the input database More... | |
DBVector< std::size_t > | domainSizes () const |
returns the domain sizes of all the variables in the database table More... | |
bool | needsReordering (const std::size_t k, const bool k_is_input_col=false) const |
indicates whether a reordering is needed to sort the translations of the kth translator or those of the first translator parsing the kth column More... | |
void | reorder (const std::size_t k, const bool k_is_input_col=false) |
performs a reordering of the kth translator or of the first translator parsing the kth column of the input database More... | |
void | reorder () |
performs a reordering of all the columns More... | |
virtual void | insertRow (const std::vector< std::string, ALLOC< std::string > > &new_row) final |
insert a new row at the end of the database More... | |
virtual void | insertRow (Row< DBTranslatedValue > &&new_row, const IsMissing contains_missing_data) final |
insert a new DBRow at the end of the database More... | |
virtual void | insertRow (const Row< DBTranslatedValue > &new_row, const IsMissing contains_missing_data) final |
insert a new row at the end of the database More... | |
virtual void | insertRow (const Row< DBCell > &new_row) final |
insert a new DBRow of DBCells at the end of the database More... | |
virtual void | insertRow (Row< DBCell > &&new_row) final |
insert a new DBRow of DBCells at the end of the database More... | |
virtual void | insertRows (Matrix< DBTranslatedValue > &&new_rows, const DBVector< IsMissing > &rows_have_missing_vals) final |
insert a set of new DBRows at the end of the database More... | |
virtual void | insertRows (const Matrix< DBTranslatedValue > &new_rows, const DBVector< IsMissing > &rows_have_missing_vals) final |
insert a set of new DBRows at the end of the database More... | |
virtual void | insertRows (Matrix< DBCell > &&new_rows) final |
insert a set of new DBRows at the end of the database More... | |
virtual void | insertRows (const Matrix< DBCell > &new_rows) final |
insert a set of new DBRows at the end of the database More... | |
virtual void | clear () final |
erase the content of the database, including the names of the variables More... | |
Iterators | |
iterator | begin () const |
returns a new unsafe handler pointing to the beginning of the database More... | |
iterator_safe | beginSafe () const |
returns a new safe handler pointing to the beginning of the database More... | |
const iterator & | end () const noexcept |
returns a new unsafe handler pointing to the end of the database More... | |
const iterator_safe & | endSafe () const noexcept |
returns a new safe handler pointing to the end of the database More... | |
Accessors / Modifiers | |
const Matrix< DBTranslatedValue > & | content () const noexcept |
returns the content (the records) of the database More... | |
iterator | handler () const |
returns a new unsafe handler pointing to the 1st record of the database More... | |
iterator_safe | handlerSafe () const |
returns a new safe handler pointing to the 1st record of the database More... | |
const DBVector< std::string > & | variableNames () const noexcept |
returns the variable names for all the columns of the database More... | |
void | setVariableNames (const std::vector< std::string, OTHER_ALLOC< std::string > > &names, const bool from_external_object=true) |
sets the names of the variables More... | |
const std::string & | variableName (const std::size_t k) const |
returns the name of the kth column of the IDatabaseTable More... | |
std::size_t | columnFromVariableName (const std::string &name) const |
returns the index of the column whose name is passed in argument More... | |
DBVector< std::size_t > | columnsFromVariableName (const std::string &name) const |
returns the indices of all the columns whose name is passed in argument More... | |
std::size_t | nbVariables () const noexcept |
returns the number of variables (columns) of the database More... | |
std::size_t | nbRows () const noexcept |
returns the number of records (rows) in the database More... | |
std::size_t | size () const noexcept |
returns the number of records (rows) in the database More... | |
bool | empty () const noexcept |
indicates whether the database contains some records or not More... | |
void | insertRow (const std::vector< std::string, OTHER_ALLOC< std::string > > &new_row) |
insert a new row at the end of the database More... | |
virtual void | insertRow (Row< DBTranslatedValue > &&new_row, const IsMissing contains_missing_data) |
insert a new DBRow at the end of the database More... | |
virtual void | insertRow (const Row< DBTranslatedValue > &new_row, const IsMissing contains_missing_data) |
insert a new row at the end of the database More... | |
virtual void | insertRows (Matrix< DBTranslatedValue > &&new_rows, const DBVector< IsMissing > &rows_have_missing_vals) |
insert a set of new DBRows at the end of the database More... | |
virtual void | insertRows (const Matrix< DBTranslatedValue > &new_rows, const DBVector< IsMissing > &rows_have_missing_vals) |
insert a set of new DBRows at the end of the database More... | |
void | eraseRow (std::size_t index) |
erase a given row specified by its index in the table More... | |
void | eraseFirstRow () |
erase the first row More... | |
void | eraseLastRow () |
erase the last row More... | |
void | eraseFirstRows (const std::size_t k) |
erase the k first rows More... | |
void | eraseLastRows (const std::size_t k) |
erase the k last rows More... | |
void | eraseRows (std::size_t deb, std::size_t end) |
erase the rows from the debth to the endth (not included) More... | |
void | eraseAllRows () |
erase all the rows More... | |
ALLOC< DBTranslatedValue > | getAllocator () const |
returns the allocator of the database More... | |
const DBVector< std::string > & | missingSymbols () const |
returns the set of missing symbols More... | |
bool | hasMissingValues () const |
indicates whether the database contains some missing values More... | |
bool | hasMissingValues (const std::size_t k) const |
indicates whether the kth row contains some missing values More... | |
void | setMaxNbThreads (const std::size_t nb) const |
changes the max number of threads that a database can use More... | |
std::size_t | nbThreads () const |
returns the number of threads used to parse the database More... | |
void | setMinNbRowsPerThread (const std::size_t nb) const |
changes the number min of rows a thread should process in a multithreading context More... | |
std::size_t | minNbRowsPerThread () const |
returns the minimum of rows that each thread should process More... | |
void | setAllRowsWeight (const double new_weight) |
assign a given weight to all the rows of the database More... | |
void | setWeight (const std::size_t i, const double weight) |
assigns a given weight to the ith row of the database More... | |
double | weight (const std::size_t i) const |
returns the weight of the ith record More... | |
double | weight () const |
returns the weight of the whole database More... | |
Public Types | |
template<typename TX_DATA > | |
using | DBVector = std::vector< TX_DATA, ALLOC< TX_DATA > > |
the type for the vectors used in the DatabaseTable More... | |
template<typename TX_DATA > | |
using | Row = DBRow< TX_DATA, ALLOC > |
a row of the database More... | |
template<typename TX_DATA > | |
using | Matrix = std::vector< DBRow< TX_DATA, ALLOC >, ALLOC< DBRow< TX_DATA, ALLOC > > > |
the type for the matrices stored into the database More... | |
template<template< typename > class XALLOC> | |
using | MissingValType = std::vector< std::string, XALLOC< std::string > > |
using | Handler = typename IDatabaseTable< DBTranslatedValue, ALLOC >::Handler |
the unsafe handler type More... | |
using | HandlerSafe = typename IDatabaseTable< DBTranslatedValue, ALLOC >::HandlerSafe |
the safe handler type More... | |
using | IsMissing = typename IDatabaseTable< DBTranslatedValue, ALLOC >::IsMissing |
using | value_type = Row< DBTranslatedValue > |
Types for STL compliance. More... | |
using | reference = value_type & |
Types for STL compliance. More... | |
using | const_reference = const value_type & |
Types for STL compliance. More... | |
using | pointer = value_type * |
Types for STL compliance. More... | |
using | const_pointer = const value_type * |
Types for STL compliance. More... | |
using | size_type = std::size_t |
Types for STL compliance. More... | |
using | difference_type = std::ptrdiff_t |
Types for STL compliance. More... | |
using | iterator = Handler |
Types for STL compliance. More... | |
using | iterator_safe = HandlerSafe |
Types for STL compliance. More... | |
using | allocator_type = ALLOC< DBTranslatedValue > |
Types for STL compliance. More... | |
using | const_iterator = const Handler |
Types for STL compliance. More... | |
using | const_iterator_safe = const HandlerSafe |
Types for STL compliance. More... | |
Protected Attributes | |
DBVector< std::string > | variable_names_ |
the names of the variables for each column More... | |
Matrix< DBTranslatedValue > | rows_ |
DBVector< std::string > | missing_symbols_ |
DBVector< IsMissing > | has_row_missing_val_ |
std::size_t | max_nb_threads_ |
std::size_t | min_nb_rows_per_thread_ |
Protected Member Functions | |
bool | isRowSizeOK_ (const std::size_t size) const |
checks whether a size corresponds to the number of columns of the database More... | |
The class representing a tabular database as used by learning tasks.
Class DatabaseTable represents a tabular database that stores in the computer's random access memory (RAM) its content as a vector of DBRows of DBTranslatedValue instances. This class is very well suited for fast learning algorithms.
Definition at line 186 of file databaseTable.h.
using gum::learning::DatabaseTable< ALLOC >::allocator_type = ALLOC< DBTranslatedValue > |
Types for STL compliance.
Definition at line 222 of file databaseTable.h.
|
inherited |
Types for STL compliance.
Definition at line 768 of file IDatabaseTable.h.
|
inherited |
Types for STL compliance.
Definition at line 769 of file IDatabaseTable.h.
using gum::learning::DatabaseTable< ALLOC >::const_pointer = const value_type* |
Types for STL compliance.
Definition at line 217 of file databaseTable.h.
using gum::learning::DatabaseTable< ALLOC >::const_reference = const value_type& |
Types for STL compliance.
Definition at line 215 of file databaseTable.h.
using gum::learning::DatabaseTable< ALLOC >::DBVector = std::vector< TX_DATA, ALLOC< TX_DATA > > |
the type for the vectors used in the DatabaseTable
Definition at line 190 of file databaseTable.h.
using gum::learning::DatabaseTable< ALLOC >::difference_type = std::ptrdiff_t |
Types for STL compliance.
Definition at line 219 of file databaseTable.h.
using gum::learning::DatabaseTable< ALLOC >::Handler = typename IDatabaseTable< DBTranslatedValue, ALLOC >::Handler |
the unsafe handler type
Definition at line 204 of file databaseTable.h.
using gum::learning::DatabaseTable< ALLOC >::HandlerSafe = typename IDatabaseTable< DBTranslatedValue, ALLOC >::HandlerSafe |
the safe handler type
Definition at line 207 of file databaseTable.h.
using gum::learning::DatabaseTable< ALLOC >::IsMissing = typename IDatabaseTable< DBTranslatedValue, ALLOC >::IsMissing |
Definition at line 209 of file databaseTable.h.
using gum::learning::DatabaseTable< ALLOC >::iterator = Handler |
Types for STL compliance.
Definition at line 220 of file databaseTable.h.
using gum::learning::DatabaseTable< ALLOC >::iterator_safe = HandlerSafe |
Types for STL compliance.
Definition at line 221 of file databaseTable.h.
using gum::learning::DatabaseTable< ALLOC >::Matrix = std::vector< DBRow< TX_DATA, ALLOC >, ALLOC< DBRow< TX_DATA, ALLOC > > > |
the type for the matrices stored into the database
Definition at line 198 of file databaseTable.h.
using gum::learning::DatabaseTable< ALLOC >::MissingValType = std::vector< std::string, XALLOC< std::string > > |
Definition at line 201 of file databaseTable.h.
using gum::learning::DatabaseTable< ALLOC >::pointer = value_type* |
Types for STL compliance.
Definition at line 216 of file databaseTable.h.
using gum::learning::DatabaseTable< ALLOC >::reference = value_type& |
Types for STL compliance.
Definition at line 214 of file databaseTable.h.
using gum::learning::DatabaseTable< ALLOC >::Row = DBRow< TX_DATA, ALLOC > |
a row of the database
Definition at line 194 of file databaseTable.h.
using gum::learning::DatabaseTable< ALLOC >::size_type = std::size_t |
Types for STL compliance.
Definition at line 218 of file databaseTable.h.
using gum::learning::DatabaseTable< ALLOC >::value_type = Row< DBTranslatedValue > |
Types for STL compliance.
Definition at line 213 of file databaseTable.h.
gum::learning::DatabaseTable< ALLOC >::DatabaseTable | ( | const MissingValType< XALLOC > & | missing_symbols, |
const DBTranslatorSet< ALLOC > & | translators = DBTranslatorSet< ALLOC >() , |
||
const allocator_type & | alloc = allocator_type() |
||
) |
default constructor
gum::learning::DatabaseTable< ALLOC >::DatabaseTable | ( | const DBTranslatorSet< ALLOC > & | translators = DBTranslatorSet< ALLOC >() , |
const allocator_type & | alloc = allocator_type() |
||
) |
default constructor
gum::learning::DatabaseTable< ALLOC >::DatabaseTable | ( | const DatabaseTable< ALLOC > & | from | ) |
copy constructor
gum::learning::DatabaseTable< ALLOC >::DatabaseTable | ( | const DatabaseTable< ALLOC > & | from, |
const allocator_type & | alloc | ||
) |
copy constructor with a given allocator
gum::learning::DatabaseTable< ALLOC >::DatabaseTable | ( | DatabaseTable< ALLOC > && | from | ) |
move constructor
gum::learning::DatabaseTable< ALLOC >::DatabaseTable | ( | DatabaseTable< ALLOC > && | from, |
const allocator_type & | alloc | ||
) |
move constructor with a given allocator
|
virtual |
destructor
|
inherited |
returns a new unsafe handler pointing to the beginning of the database
|
inherited |
returns a new safe handler pointing to the beginning of the database
|
finalvirtual |
erase the content of the database, including the names of the variables
Reimplemented from gum::learning::IDatabaseTable< DBTranslatedValue, ALLOC >.
|
finalvirtual |
virtual copy constructor
Implements gum::learning::IDatabaseTable< DBTranslatedValue, ALLOC >.
|
finalvirtual |
virtual copy constructor with a given allocator
Implements gum::learning::IDatabaseTable< DBTranslatedValue, ALLOC >.
|
inherited |
returns the index of the column whose name is passed in argument
UndefinedElement | is raised if there exists no column with the given name |
|
inherited |
returns the indices of all the columns whose name is passed in argument
It may happen that several columns correspond to a given variable name. In this case, the function returns the indices of all the columns of the IDatabase that match the name.
|
noexceptinherited |
returns the content (the records) of the database
std::size_t gum::learning::DatabaseTable< ALLOC >::domainSize | ( | const std::size_t | k, |
const bool | k_is_input_col = false |
||
) | const |
returns the domain size of the kth variable of the database table or of that of the first one corresponding to the kth column of the input database
Translators read an input dataset that is not necessarily the same as the content of the DatabaseTable. For instance, a CSV may contain 10 columns, but if a DatabaseTable only contains two translators reading columns 3 and 5 respectively, then the DatabaseTable only contains 2 columns. When k_is_input_col is set to false, Parameter k passed in argument corresponds to either 0 or 1, i.e., the index of one of these two columns. When k_is_input_col is set to true, the variable is that of the translator that parses the kth column of the input database.
UndefinedElement | is raised if there is no translator corresponding to k. |
DBVector< std::size_t > gum::learning::DatabaseTable< ALLOC >::domainSizes | ( | ) | const |
returns the domain sizes of all the variables in the database table
|
noexceptinherited |
indicates whether the database contains some records or not
|
noexceptinherited |
returns a new unsafe handler pointing to the end of the database
|
noexceptinherited |
returns a new safe handler pointing to the end of the database
|
inherited |
erase all the rows
|
inherited |
erase the first row
|
inherited |
erase the k first rows
|
inherited |
erase the last row
|
inherited |
erase the k last rows
|
inherited |
erase a given row specified by its index in the table
In the database, rows are indexed, starting from 0.
|
inherited |
erase the rows from the debth to the endth (not included)
In the database, rows are indexed, starting from 0.
void gum::learning::DatabaseTable< ALLOC >::eraseTranslators | ( | const std::size_t | k, |
const bool | k_is_input_col = false |
||
) |
erases either the kth translator or all those parsing the kth column of the input dataset
Translators read an input dataset that is not necessarily the same as the content of the DatabaseTable. For instance, a CSV may contain 10 columns, but if a DatabaseTable only contains two translators reading columns 3 and 5 respectively, then the DatabaseTable only contains 2 columns. When k_is_input_col is set to false, Parameter k passed in argument corresponds to either 0 or 1, i.e., to the index of one of these two output columns. When k_is_input_col is set to true, the translators to be erased are all those that parse the kth column of the input database.
|
inherited |
returns the allocator of the database
|
inherited |
returns a new unsafe handler pointing to the 1st record of the database
|
inherited |
returns a new safe handler pointing to the 1st record of the database
|
inherited |
indicates whether the database contains some missing values
|
inherited |
indicates whether the kth row contains some missing values
|
finalvirtual |
makes the database table ignore from now on the kth column of the input dataset or the column parsed by the kth translator
This method can be called in two different ways: either k refers to the current kth column of the database table (in this case parameter from_external_object is set to false), or k corresponds to the kth column of an original database used to fill the database table (in this case from_external_object is set to true). Depending on from_external_object's value, the ignored columns may differ. As an example, imagine that the database table is created from a CSV file with 5 columns named X0, X1, X2, X3 and X4 respectivly. Then a call to ignoreColumn ( 1, true ) will exclude column X1 from the database table. As a result, the database table columns are X0, X2, X3 and X4. Therefore, subsequently calling ignoreColumn ( 1, false ) will result in excluding X2 since X2 is the 2nd column (columns are indexed starting from 0). So, now the database table's columns are X0, X3 and X4. If, now, we call ignoreColumn ( 3, true ), this will remove column X3 because, in the original database, X3 was the 4th column.
The method also erases all the translators corresponding to column k, if any. If the DatabaseTable contains some rows, then their column corresponding to k is removed. If the resulting DatabaseTable contains only empty rows, then those are removed.
k | the column to remove. See Method setVariableNames for a detailed description on how k is computed. |
from_external_object | indicates whether k refers to the kth column of an original external database (true) or to the current kth column of the DatabaseTable (false). |
UndefinedElement | is raised if k refers to the position of a translator that does not exist (k >= number of translators). |
Implements gum::learning::IDatabaseTable< DBTranslatedValue, ALLOC >.
|
finalvirtual |
returns the set of columns of the original dataset that are ignored
In this vector, all the column indices greater than or equal to its last element are also ignored.
Implements gum::learning::IDatabaseTable< DBTranslatedValue, ALLOC >.
|
finalvirtual |
returns the set of columns of the original dataset that are present in the DatabaseTable
Implements gum::learning::IDatabaseTable< DBTranslatedValue, ALLOC >.
|
finalvirtual |
insert a new row at the end of the database
The new_row passed in argument is supposed to come from an external database. So it must contain data for the ignored columns.
SizeError | is raised if the vector of string cannot be inserted in the DatabaseTable because its size does not allow a matching with the columns of the DatabaseTable (taking into account the ignored columns) |
UnknownLabelInDatabase | is raised if the translation of an element in the new row cannot be found and the corresponding translator is not in an editable dictionary mode. |
SizeError | is raised if the number of entries in the dictionary of a translator has already reached its maximum. |
OperationNotAllowed | exception is raised if the translation of an element in new_row cannot be found and the insertion of the string into the corresponding translator's dictionary fails because it would induce incoherent behavior (e.g., a DBTranslator4ContinuousVariable that contains a variable whose domain is [x,y] as well as a missing value symbol z \(\in\) [x,y]). |
TypeError | is raised if the translation of an element in new_row cannot be found and the insertion of the string into the translator's dictionary fails due to str being impossible to be converted into an appropriate type. |
|
finalvirtual |
insert a new DBRow at the end of the database
Unlike methods insertRow for data whose type is different from DBTranslatedValue, this method assumes that the new row passed in argument does not contain any data of the ignored columns. So, basically, it could be copied as is into the database table.
SizeError | is raised if the size of the new_row is not equal to the number of translators of the DatabaseTable is raised if at least one element of new_row does not belong to the domain of its corresponding translator. |
|
finalvirtual |
insert a new row at the end of the database
Unlike methods insertRow for data whose type is different from DBTranslatedValue, this method assumes that the new row passed in argument does not contain any data of the ignored columns. So, basically, it could be copied as is into the database table.
SizeError | is raised if the size of the new_row is not equal to the number of translators of the DatabaseTable is raised if at least one element of new_row does not belong to the domain of its corresponding translator. |
|
finalvirtual |
insert a new DBRow of DBCells at the end of the database
The new_row passed in argument is supposed to come from an external database. So it must contain data for the ignored columns.
SizeError | is raised if the vector of string cannot be inserted in the DatabaseTable because its size does not allow a matching with the columns of the DatabaseTable (taking into account the ignored columns) |
|
finalvirtual |
insert a new DBRow of DBCells at the end of the database
The new_row passed in argument is supposed to come from an external database. So it must contain data for the ignored columns.
SizeError | is raised if the vector of string cannot be inserted in the DatabaseTable because its size does not allow a matching with the columns of the DatabaseTable (taking into account the ignored columns) |
|
inherited |
insert a new row at the end of the database
The new_row passed in argument is supposed to come from an external database. So it must contain data for the ignored columns.
SizeError | is raised if the vector of string cannot be inserted in the IDatabaseTable because its size does not allow a matching with the columns of the IDatabaseTable (taking into account the ignored columns) |
|
virtualinherited |
insert a new DBRow at the end of the database
Unlike methods insertRow for data whose type is different from T_DATA, this method assumes that the new row passed in argument does not contain any data of the ignored columns. So, basically, it could be copied as is into the database table.
SizeError | is raised if the size of the new_row is not equal to the number of columns retained in the IDatabaseTable |
|
virtualinherited |
insert a new row at the end of the database
Unlike methods insertRow for data whose type is different from T_DATA, this method assumes that the new row passed in argument does not contain any data of the ignored columns. So, basically, it could be copied as is into the database table.
SizeError | is raised if the size of the new_row is not equal to the number of columns retained in the IDatabaseTable |
|
finalvirtual |
insert a set of new DBRows at the end of the database
Unlike methods insertRows for data whose type is different from DBTranslatedValue, this method assumes that the new rows passed in argument do not contain any data of the ignored columns. So, basically, these rows could be copied as is into the database table.
SizeError | is raised if the size of at least one row in new_rows is not equal to the number of translators in the DatabaseTable is raised if at least one element of new_row does not belong to the domain of its corresponding translator. |
|
finalvirtual |
insert a set of new DBRows at the end of the database
Unlike methods insertRows for data whose type is different from DBTranslatedValue, this method assumes that the new rows passed in argument do not contain any data of the ignored columns. So, basically, these rows could be copied as is into the database table.
SizeError | is raised if the size of at least one row in new_rows is not equal to the number of translators in the DatabaseTable is raised if at least one element of new_row does not belong to the domain of its corresponding translator. |
|
finalvirtual |
insert a set of new DBRows at the end of the database
The new rows passed in argument are supposed to come from an external database. So they must contain data for the ignored columns.
SizeError | is raised if the vector of string cannot be inserted in the DatabaseTable because its size does not allow a matching with the columns of the DatabaseTable (taking into account the ignored columns) |
|
finalvirtual |
insert a set of new DBRows at the end of the database
The new rows passed in argument are supposed to come from an external database. So they must contain data for the ignored columns.
SizeError | is raised if the vector of string cannot be inserted in the DatabaseTable because its size does not allow a matching with the columns of the DatabaseTable (taking into account the ignored columns) |
|
virtualinherited |
insert a set of new DBRows at the end of the database
Unlike methods insertRows for data whose type is different from T_DATA, this method assumes that the new rows passed in argument do not contain any data of the ignored columns. So, basically, these rows could be copied as is into the database table.
new_rows | the new set of rows to be copied as is |
rows_have_missing_vals | a vector of the same size as new_rows that indicates, for each new row, whether it contains some missing value or not |
SizeError | is raised if the size of at least one row in new_rows is not equal to the number of columns retained in the IDatabaseTable. A SizeError exception will also be raised if the number of new rows is not identical to the size of vector rows_have_missing_vals. |
|
virtualinherited |
insert a set of new DBRows at the end of the database
Unlike methods insertRows for data whose type is different from T_DATA, this method assumes that the new rows passed in argument do not contain any data of the ignored columns. So, basically, these rows could be copied as is into the database table.
new_rows | the new set of rows to be copied as is |
rows_have_missing_vals | a vector of the same size as new_rows that indicates, for each new row, whether it contains some missing value or not |
SizeError | is raised if the size of at least one row in new_rows is not equal to the number of columns retained in the IDatabaseTable. A SizeError exception will also be raised if the number of new rows is not identical to the size of vector rows_have_missing_vals. |
std::size_t gum::learning::DatabaseTable< ALLOC >::insertTranslator | ( | const DBTranslator< ALLOC > & | translator, |
const std::size_t | input_column, | ||
const bool | unique_column = true |
||
) |
insert a new translator into the database table
translator | This translator is copied into the DatabaseTable |
input_column | indicates which column in the original dataset (usually a CSV file) the translator will read |
unique_column | indicates whether the input column can be read by several translators. |
OperationNotAllowed | if the input column is marked as ignored |
DuplicateElement | if there already exists a translator reading the input column passed in argument, and if the unique_column is set to true |
std::size_t gum::learning::DatabaseTable< ALLOC >::insertTranslator | ( | const Variable & | var, |
const std::size_t | input_column, | ||
const bool | unique_column = true |
||
) |
insert a new translator into the database table
var | the variable that will be contained into the translator |
input_column | indicates which column in the original dataset (usually a CSV file) the translator will read |
unique_column | indicates whether the input column can be read by several translators |
missing_symbols | the set of symbols in the database representing missing values |
OperationNotAllowed | if the input column is marked as ignored |
DuplicateElement | if there already exists a translator reading the input column passed in argument, and if the unique_column is set to true |
if | the database is not empty, i.e., it contains some records, all the columns of the database corresponding to the new translator should be filled with missing values, which is impossible since we do not know which symbols correspond to missing values. Therefore, we raise a MissingValueInDatabase exception. If you do not want such a behavior, use method insertTranslator in which you specify the set of missing symbols. |
std::size_t gum::learning::DatabaseTable< ALLOC >::insertTranslator | ( | const Variable & | var, |
const std::size_t | input_column, | ||
std::vector< std::string, XALLOC< std::string > > | missing_symbols, | ||
const bool | unique_column = true |
||
) |
insert a new translator into the database table
var | the variable that will be contained into the translator |
input_column | indicates which column in the original dataset (usually a CSV file) the translator will read |
unique_column | indicates whether the input column can be read by several translators |
missing_symbols | the set of symbols in the database representing missing values |
OperationNotAllowed | if the input column is marked as ignored |
DuplicateElement | if there already exists a translator reading the input column passed in argument, and if the unique_column is set to true |
|
protectedinherited |
checks whether a size corresponds to the number of columns of the database
|
inherited |
returns the minimum of rows that each thread should process
|
inherited |
returns the set of missing symbols
|
noexceptinherited |
returns the number of records (rows) in the database
|
inherited |
returns the number of threads used to parse the database
|
noexceptinherited |
returns the number of variables (columns) of the database
bool gum::learning::DatabaseTable< ALLOC >::needsReordering | ( | const std::size_t | k, |
const bool | k_is_input_col = false |
||
) | const |
indicates whether a reordering is needed to sort the translations of the kth translator or those of the first translator parsing the kth column
For a given translator, if the strings represented by the translations are only numbers, the translations are considered to be sorted if and only if they are sorted by increasing number. If the strings do not only represent numbers, then translations are considered to be sorted if and only if they are sorted lexicographically.
When constructing dynamically its dictionary, the translator may assign wrong DBTranslatedValue values to strings. For instance, a translator reading sequentially integer strings 4, 1, 3, may map 4 into DBTranslatedValue{std::size_t(0)}, 1 into DBTranslatedValue{std::size_t(1)} and 3 into DBTranslatedValue{std::size_t(2)}, resulting in random variables having domain {4,1,3}. The user may prefer having domain {1,3,4}, i.e., a domain specified with increasing values. This requires a reordering. Method needsReodering() returns a Boolean indicating whether such a reordering should be performed or whether the current order is OK.
Translators read an input dataset that is not necessarily the same as the content of the DatabaseTable. For instance, a CSV may contain 10 columns, but if a DatabaseTable only contains two translators reading columns 3 and 5 respectively, then the DatabaseTable only contains 2 columns. When k_is_input_col is set to false, Parameter k passed in argument corresponds to either 0 or 1, i.e., the index of one of these two columns. When k_is_input_col is set to true, the translator to be reordered is that which parses the kth column of the input database.
UndefinedElement | is raised if there is no translator corresponding to k. |
DatabaseTable< ALLOC >& gum::learning::DatabaseTable< ALLOC >::operator= | ( | const DatabaseTable< ALLOC > & | from | ) |
copy operator
DatabaseTable< ALLOC >& gum::learning::DatabaseTable< ALLOC >::operator= | ( | DatabaseTable< ALLOC > && | from | ) |
move constructor
void gum::learning::DatabaseTable< ALLOC >::reorder | ( | const std::size_t | k, |
const bool | k_is_input_col = false |
||
) |
performs a reordering of the kth translator or of the first translator parsing the kth column of the input database
For a given translator, if the strings represented by the translations are only numbers, the translations are considered to be sorted if and only if they are sorted by increasing number. If the strings do not only represent numbers, then translations are considered to be sorted if and only if they are sorted lexicographically.
Translators read an input dataset that is not necessarily the same as the content of the DatabaseTable. For instance, a CSV may contain 10 columns, but if a DatabaseTable only contains two translators reading columns 3 and 5 respectively, then the DatabaseTable only contains 2 columns. When k_is_input_col is set to false, Parameter k passed in argument corresponds to either 0 or 1, i.e., the index of one of these two columns. When k_is_input_col is set to true, the translator to be reordered is that which parses the kth column of the input database.
UndefinedElement | is raised if there is no translator corresponding to k. |
void gum::learning::DatabaseTable< ALLOC >::reorder | ( | ) |
performs a reordering of all the columns
For a given translator, if the strings represented by the translations are only numbers, the translations are considered to be sorted if and only if they are sorted by increasing number. If the strings do not only represent numbers, then translations are considered to be sorted if and only if they are sorted lexicographically.
|
inherited |
assign a given weight to all the rows of the database
|
inherited |
changes the max number of threads that a database can use
Within databases, some methods can be processed in a parallel fashion. This methods indicates the maximum number of threads that can be run in parallel at the same time.
|
inherited |
changes the number min of rows a thread should process in a multithreading context
When a method executes several threads to perform actions on the rows of the database, the MinNbRowsPerThread indicates how many rows each thread should at least process. This is used to compute the number of threads actually run. This number is equal to the min between the max number of threads allowed and the number of records in the database divided by nb.
|
finalvirtual |
sets the names of the variables
This method can be called in two different ways: either the names correspond precisely to the columns stored into the database table (in this case, parameter from_external_object is equal to false), or they correspond to the columns of an external database (e.g., a CSV file) from which we potentially excluded some columns and, consequently, the latter should not be taken into account (in this case, parameter from_external_object is equal to true). As an example, imagine that the database table is created from a CSV file with 5 columns named X0, X1, X2, X3 and X4 respectively. Suppose that we asked the database table to ignore columns X1 and X3. Then setVariableNames( { "X0", "X1", "X2", "X3", "X4" }, true ) will set the columns of the database table as { "X0", "X2", "X4" }. The same result could be obtained by executing setVariableNames( { "X0", "X2", "X4" }, false ), which specifies directly the set of names to retain in the database table.
names | the names of all the columns, including the ignored columns if from_external_object is set to true, else excluding them (i.e., this should precisely correspond to the columns stored into the database table). |
from_external_object | a Boolean indicating whether parameter names includes the columns ignored by the database table (true) or not (false). |
SizeError | is raised if the names passed in arguments cannot be assigned to the columns of the DatabaseTable because the size of their vector is inadequate. |
Implements gum::learning::IDatabaseTable< DBTranslatedValue, ALLOC >.
|
inherited |
sets the names of the variables
This method can be called in two different ways: either the names correspond precisely to the columns stored into the database table (in this case, parameter from_external_object is equal to false), or they corresponds to the columns of an external database (e.g., a CSV file) from which we potentially excluded some columns and, consequently, the latter should not be taken into account (in this case, parameter from_external_object is equal to true). As an example, imagine that the database table is created from a CSV file with 5 columns named X0, X1, X2, X3 and X4 respectively. Suppose that we asked the database table to ignore columns X1 and X3. Then setVariableNames( { "X0", "X1", "X2", "X3", "X4" }, true ) will set the columns of the database table as { "X0", "X2", "X4" }. The same result could be obtained by executing setVariableNames( { "X0", "X2", "X4" }, false ), which specifies directly the set of names to retain in the database table.
names | the names of all the columns, including the ignored columns if from_external_object is set to true, else excluding them (i.e., this should precisely correspond to the columns stored into the database table). |
from_external_object | a Boolean indicating whether parameter names includes the columns ignored by the database table (true) or not (false). |
SizeError | is raised if the names passed in arguments cannot be assigned to the columns of the IDatabaseTable because the size of their vector is inadequate. |
|
inherited |
assigns a given weight to the ith row of the database
OutOfBounds | if i is outside the set of indices of the records or if the weight is negative |
|
noexceptinherited |
returns the number of records (rows) in the database
const DBTranslator< ALLOC >& gum::learning::DatabaseTable< ALLOC >::translator | ( | const std::size_t | k, |
const bool | k_is_input_col = false |
||
) | const |
returns either the kth translator of the database table or the first one reading the kth column of the input database
Translators read an input dataset that is not necessarily the same as the content of the DatabaseTable. For instance, a CSV may contain 10 columns, but if a DatabaseTable only contains two translators reading columns 3 and 5 respectively, then the DatabaseTable only contains 2 columns. When k_is_input_col is set to false, Parameter k passed in argument corresponds to either 0 or 1, i.e., the index of one of these two columns. When k_is_input_col is set to true, the translator returned is the first one that parses the kth column of the input database.
UndefinedElement | is raised if there is no translator corresponding to k. |
const DBTranslatorSet< ALLOC >& gum::learning::DatabaseTable< ALLOC >::translatorSet | ( | ) | const |
returns the set of translators
const Variable& gum::learning::DatabaseTable< ALLOC >::variable | ( | const std::size_t | k, |
const bool | k_is_input_col = false |
||
) | const |
returns either the kth variable of the database table or the first one corresponding to the kth column of the input database
Translators read an input dataset that is not necessarily the same as the content of the DatabaseTable. For instance, a CSV may contain 10 columns, but if a DatabaseTable only contains two translators reading columns 3 and 5 respectively, then the DatabaseTable only contains 2 columns. When k_is_input_col is set to false, Parameter k passed in argument corresponds to either 0 or 1, i.e., the index of one of these two columns. When k_is_input_col is set to true, the variable is that of the translator that parses the kth column of the input database.
UndefinedElement | is raised if there is no translator corresponding to k. |
|
inherited |
returns the name of the kth column of the IDatabaseTable
OutOfBounds | is raised if the IDatabaseTable contains fewer than k columns. |
|
noexceptinherited |
returns the variable names for all the columns of the database
The names do not include the ignored columns.
|
inherited |
returns the weight of the ith record
OutOfBounds | if i is outside the set of indices of the records |
|
inherited |
returns the weight of the whole database
|
protectedinherited |
Definition at line 1142 of file IDatabaseTable.h.
|
mutableprotectedinherited |
Definition at line 1145 of file IDatabaseTable.h.
|
mutableprotectedinherited |
Definition at line 1149 of file IDatabaseTable.h.
|
protectedinherited |
Definition at line 1139 of file IDatabaseTable.h.
|
protectedinherited |
Definition at line 1136 of file IDatabaseTable.h.
|
protectedinherited |
the names of the variables for each column
Definition at line 1133 of file IDatabaseTable.h.