aGrUM  0.20.3
a C++ library for (probabilistic) graphical models
gum::learning::RawDatabaseTable< ALLOC > Class Template Reference

The table containing the raw/original data of a databaseClass RawDatabaseTable is intended to store in RAM the raw/original data of a database. More...

#include <agrum/tools/databaseTable.h>

+ Inheritance diagram for gum::learning::RawDatabaseTable< ALLOC >:
+ Collaboration diagram for gum::learning::RawDatabaseTable< ALLOC >:

Public Member Functions

Constructors / Destructors
template<template< typename > class VARALLOC, template< typename > class MISSALLOC>
 RawDatabaseTable (const MissingValType< MISSALLOC > &missing_symbols, const std::vector< std::string, VARALLOC< std::string > > &var_names, const allocator_type &alloc=allocator_type())
 default constructor More...
 
template<template< typename > class MISSALLOC>
 RawDatabaseTable (const MissingValType< MISSALLOC > &missing_symbols, const allocator_type &alloc=allocator_type())
 default constructor More...
 
 RawDatabaseTable (const allocator_type &alloc=allocator_type())
 default constructor More...
 
 RawDatabaseTable (const RawDatabaseTable< ALLOC > &from)
 copy constructor More...
 
 RawDatabaseTable (const RawDatabaseTable< ALLOC > &from, const allocator_type &alloc)
 copy constructor with a given allocator More...
 
 RawDatabaseTable (RawDatabaseTable< ALLOC > &&from)
 move constructor More...
 
 RawDatabaseTable (RawDatabaseTable< ALLOC > &&from, const allocator_type &alloc)
 move constructor with a given allocator More...
 
virtual RawDatabaseTable< ALLOC > * clone () const final
 virtual copy constructor More...
 
virtual RawDatabaseTable< ALLOC > * clone (const allocator_type &alloc) const final
 virtual copy constructor with a given allocator More...
 
virtual ~RawDatabaseTable ()
 destructor More...
 
Operators
RawDatabaseTable< ALLOC > & operator= (const RawDatabaseTable< ALLOC > &from)
 copy operator More...
 
RawDatabaseTable< ALLOC > & operator= (RawDatabaseTable< ALLOC > &&from)
 move constructor More...
 
Accessors / Modifiers
virtual void setVariableNames (const std::vector< std::string, ALLOC< std::string > > &names, const bool from_external_object=true) final
 sets the names of the variables More...
 
virtual void ignoreColumn (const std::size_t k, const bool from_external_object=true) final
 makes the database table ignore from now on the kth column More...
 
virtual const DBVector< std::size_t > ignoredColumns () const final
 returns the set of columns of the original dataset that are ignored More...
 
virtual const DBVector< std::size_t > inputColumns () const final
 returns the set of columns of the original dataset that are present in the RawDatabaseTable More...
 
virtual void insertRow (const std::vector< std::string, ALLOC< std::string > > &new_row) final
 insert a new row at the end of the database More...
 
virtual void clear () final
 erase the content of the database, including the names of the variables More...
 
Iterators
iterator begin () const
 returns a new unsafe handler pointing to the beginning of the database More...
 
iterator_safe beginSafe () const
 returns a new safe handler pointing to the beginning of the database More...
 
const iteratorend () const noexcept
 returns a new unsafe handler pointing to the end of the database More...
 
const iterator_safeendSafe () const noexcept
 returns a new safe handler pointing to the end of the database More...
 
Accessors / Modifiers
const Matrix< DBCell > & content () const noexcept
 returns the content (the records) of the database More...
 
iterator handler () const
 returns a new unsafe handler pointing to the 1st record of the database More...
 
iterator_safe handlerSafe () const
 returns a new safe handler pointing to the 1st record of the database More...
 
const DBVector< std::string > & variableNames () const noexcept
 returns the variable names for all the columns of the database More...
 
void setVariableNames (const std::vector< std::string, OTHER_ALLOC< std::string > > &names, const bool from_external_object=true)
 sets the names of the variables More...
 
const std::string & variableName (const std::size_t k) const
 returns the name of the kth column of the IDatabaseTable More...
 
std::size_t columnFromVariableName (const std::string &name) const
 returns the index of the column whose name is passed in argument More...
 
DBVector< std::size_t > columnsFromVariableName (const std::string &name) const
 returns the indices of all the columns whose name is passed in argument More...
 
std::size_t nbVariables () const noexcept
 returns the number of variables (columns) of the database More...
 
std::size_t nbRows () const noexcept
 returns the number of records (rows) in the database More...
 
std::size_t size () const noexcept
 returns the number of records (rows) in the database More...
 
bool empty () const noexcept
 indicates whether the database contains some records or not More...
 
void insertRow (const std::vector< std::string, OTHER_ALLOC< std::string > > &new_row)
 insert a new row at the end of the database More...
 
virtual void insertRow (Row< DBCell > &&new_row, const IsMissing contains_missing_data)
 insert a new DBRow at the end of the database More...
 
virtual void insertRow (const Row< DBCell > &new_row, const IsMissing contains_missing_data)
 insert a new row at the end of the database More...
 
virtual void insertRows (Matrix< DBCell > &&new_rows, const DBVector< IsMissing > &rows_have_missing_vals)
 insert a set of new DBRows at the end of the database More...
 
virtual void insertRows (const Matrix< DBCell > &new_rows, const DBVector< IsMissing > &rows_have_missing_vals)
 insert a set of new DBRows at the end of the database More...
 
void eraseRow (std::size_t index)
 erase a given row specified by its index in the table More...
 
void eraseFirstRow ()
 erase the first row More...
 
void eraseLastRow ()
 erase the last row More...
 
void eraseFirstRows (const std::size_t k)
 erase the k first rows More...
 
void eraseLastRows (const std::size_t k)
 erase the k last rows More...
 
void eraseRows (std::size_t deb, std::size_t end)
 erase the rows from the debth to the endth (not included) More...
 
void eraseAllRows ()
 erase all the rows More...
 
ALLOC< DBCellgetAllocator () const
 returns the allocator of the database More...
 
const DBVector< std::string > & missingSymbols () const
 returns the set of missing symbols More...
 
bool hasMissingValues () const
 indicates whether the database contains some missing values More...
 
bool hasMissingValues (const std::size_t k) const
 indicates whether the kth row contains some missing values More...
 
void setMaxNbThreads (const std::size_t nb) const
 changes the max number of threads that a database can use More...
 
std::size_t nbThreads () const
 returns the number of threads used to parse the database More...
 
void setMinNbRowsPerThread (const std::size_t nb) const
 changes the number min of rows a thread should process in a multithreading context More...
 
std::size_t minNbRowsPerThread () const
 returns the minimum of rows that each thread should process More...
 
void setAllRowsWeight (const double new_weight)
 assign a given weight to all the rows of the database More...
 
void setWeight (const std::size_t i, const double weight)
 assigns a given weight to the ith row of the database More...
 
double weight (const std::size_t i) const
 returns the weight of the ith record More...
 
double weight () const
 returns the weight of the whole database More...
 

Public Types

template<typename TX_DATA >
using DBVector = std::vector< TX_DATA, ALLOC< TX_DATA > >
 the type for the vectors used in the RawDatabaseTable More...
 
template<typename TX_DATA >
using Row = DBRow< TX_DATA, ALLOC >
 a row of the database More...
 
template<typename TX_DATA >
using Matrix = DBVector< Row< TX_DATA > >
 the type for the matrices stored into the database More...
 
template<template< typename > class XALLOC>
using MissingValType = std::vector< std::string, XALLOC< std::string > >
 
using Handler = typename IDatabaseTable< DBCell, ALLOC >::Handler
 the unsafe handler type More...
 
using HandlerSafe = typename IDatabaseTable< DBCell, ALLOC >::HandlerSafe
 the safe handler type More...
 
using IsMissing = typename IDatabaseTable< DBCell, ALLOC >::IsMissing
 
using value_type = Row< DBCell >
 Types for STL compliance. More...
 
using reference = value_type &
 Types for STL compliance. More...
 
using const_reference = const value_type &
 Types for STL compliance. More...
 
using pointer = value_type *
 Types for STL compliance. More...
 
using const_pointer = const value_type *
 Types for STL compliance. More...
 
using size_type = std::size_t
 Types for STL compliance. More...
 
using difference_type = std::ptrdiff_t
 Types for STL compliance. More...
 
using iterator = Handler
 Types for STL compliance. More...
 
using iterator_safe = HandlerSafe
 Types for STL compliance. More...
 
using allocator_type = ALLOC< DBCell >
 Types for STL compliance. More...
 
using const_iterator = const Handler
 Types for STL compliance. More...
 
using const_iterator_safe = const HandlerSafe
 Types for STL compliance. More...
 

Protected Attributes

DBVector< std::string > variable_names_
 the names of the variables for each column More...
 
Matrix< DBCellrows_
 
DBVector< std::string > missing_symbols_
 
DBVector< IsMissinghas_row_missing_val_
 
std::size_t max_nb_threads_
 
std::size_t min_nb_rows_per_thread_
 

Protected Member Functions

bool isRowSizeOK_ (const std::size_t size) const
 checks whether a size corresponds to the number of columns of the database More...
 

Detailed Description

template<template< typename > class ALLOC = std::allocator>
class gum::learning::RawDatabaseTable< ALLOC >

The table containing the raw/original data of a database

Class RawDatabaseTable is intended to store in RAM the raw/original data of a database.

Such raw data are not well suited for learning tasks because they need to be interpreted by the learning algorithm, which would incur a strong overhead. However, reading a CSV file and interpreting its data in order to reshape them in a way that will allow fast parsing by learning algorithms is also very time consuming. So, if you are unsure about the correct interpretation and need to change it several times either before processing the learning or during several learning phases, it is efficient to first read the CSV file and store its useful data (removing comment, for instance) into a first database table and, then, use this preprocessed table to produce quickly the interpreted database table that will subsequently be used by the learning. The purpose of the RawDatabaseTable class is precisely to implement this preprocessed table.

Usage example:
// create an empty database
// create a new row with 3 DBCells containing integer 2
Row<gum::learning::DBCell> new_row ( 3, gum::learning::DBCell ( 2 ) );
// add it into the database
database.insertRow ( new_row );
database.insertRow ( std::move ( new_row ) );
// erase the first Row
database.eraseFirstRow ();
// returns the content of the database
const auto& content = database.content ();
// sets the names of the variables (the columns) of the database
std::vector<std::string> new_names { "col1", "col2", "col3" };
database.setVariableNames ( new_names );
// print the names of the columns
std::cout << database.variableNames () << std::endl;
// print all the records of the database
for ( auto row : database )
std::cout << row << std::endl;
// make the handler parse the 3rd record to the 5th record (included)
auto handler = database.handler ();
handler.setRange ( 2, 5 ); // 2 = 3rd record; 5 = 6th record (excluded)
while ( const auto& row : handler ) {
std::cout << row << std::endl;
}
// clears the content of the database and update the safe database's
// handlers
database.clear ();

Definition at line 115 of file rawDatabaseTable.h.

Member Typedef Documentation

◆ allocator_type

template<template< typename > class ALLOC = std::allocator>
using gum::learning::RawDatabaseTable< ALLOC >::allocator_type = ALLOC< DBCell >

Types for STL compliance.

Definition at line 151 of file rawDatabaseTable.h.

◆ const_iterator

using gum::learning::IDatabaseTable< DBCell , ALLOC >::const_iterator = const Handler
inherited

Types for STL compliance.

Definition at line 768 of file IDatabaseTable.h.

◆ const_iterator_safe

Types for STL compliance.

Definition at line 769 of file IDatabaseTable.h.

◆ const_pointer

template<template< typename > class ALLOC = std::allocator>
using gum::learning::RawDatabaseTable< ALLOC >::const_pointer = const value_type*

Types for STL compliance.

Definition at line 146 of file rawDatabaseTable.h.

◆ const_reference

template<template< typename > class ALLOC = std::allocator>
using gum::learning::RawDatabaseTable< ALLOC >::const_reference = const value_type&

Types for STL compliance.

Definition at line 144 of file rawDatabaseTable.h.

◆ DBVector

template<template< typename > class ALLOC = std::allocator>
template<typename TX_DATA >
using gum::learning::RawDatabaseTable< ALLOC >::DBVector = std::vector< TX_DATA, ALLOC< TX_DATA > >

the type for the vectors used in the RawDatabaseTable

Definition at line 119 of file rawDatabaseTable.h.

◆ difference_type

template<template< typename > class ALLOC = std::allocator>
using gum::learning::RawDatabaseTable< ALLOC >::difference_type = std::ptrdiff_t

Types for STL compliance.

Definition at line 148 of file rawDatabaseTable.h.

◆ Handler

template<template< typename > class ALLOC = std::allocator>
using gum::learning::RawDatabaseTable< ALLOC >::Handler = typename IDatabaseTable< DBCell, ALLOC >::Handler

the unsafe handler type

Definition at line 133 of file rawDatabaseTable.h.

◆ HandlerSafe

template<template< typename > class ALLOC = std::allocator>
using gum::learning::RawDatabaseTable< ALLOC >::HandlerSafe = typename IDatabaseTable< DBCell, ALLOC >::HandlerSafe

the safe handler type

Definition at line 136 of file rawDatabaseTable.h.

◆ IsMissing

template<template< typename > class ALLOC = std::allocator>
using gum::learning::RawDatabaseTable< ALLOC >::IsMissing = typename IDatabaseTable< DBCell, ALLOC >::IsMissing

Definition at line 138 of file rawDatabaseTable.h.

◆ iterator

template<template< typename > class ALLOC = std::allocator>
using gum::learning::RawDatabaseTable< ALLOC >::iterator = Handler

Types for STL compliance.

Definition at line 149 of file rawDatabaseTable.h.

◆ iterator_safe

template<template< typename > class ALLOC = std::allocator>
using gum::learning::RawDatabaseTable< ALLOC >::iterator_safe = HandlerSafe

Types for STL compliance.

Definition at line 150 of file rawDatabaseTable.h.

◆ Matrix

template<template< typename > class ALLOC = std::allocator>
template<typename TX_DATA >
using gum::learning::RawDatabaseTable< ALLOC >::Matrix = DBVector< Row< TX_DATA > >

the type for the matrices stored into the database

Definition at line 127 of file rawDatabaseTable.h.

◆ MissingValType

template<template< typename > class ALLOC = std::allocator>
template<template< typename > class XALLOC>
using gum::learning::RawDatabaseTable< ALLOC >::MissingValType = std::vector< std::string, XALLOC< std::string > >

Definition at line 130 of file rawDatabaseTable.h.

◆ pointer

template<template< typename > class ALLOC = std::allocator>
using gum::learning::RawDatabaseTable< ALLOC >::pointer = value_type*

Types for STL compliance.

Definition at line 145 of file rawDatabaseTable.h.

◆ reference

template<template< typename > class ALLOC = std::allocator>
using gum::learning::RawDatabaseTable< ALLOC >::reference = value_type&

Types for STL compliance.

Definition at line 143 of file rawDatabaseTable.h.

◆ Row

template<template< typename > class ALLOC = std::allocator>
template<typename TX_DATA >
using gum::learning::RawDatabaseTable< ALLOC >::Row = DBRow< TX_DATA, ALLOC >

a row of the database

Definition at line 123 of file rawDatabaseTable.h.

◆ size_type

template<template< typename > class ALLOC = std::allocator>
using gum::learning::RawDatabaseTable< ALLOC >::size_type = std::size_t

Types for STL compliance.

Definition at line 147 of file rawDatabaseTable.h.

◆ value_type

template<template< typename > class ALLOC = std::allocator>
using gum::learning::RawDatabaseTable< ALLOC >::value_type = Row< DBCell >

Types for STL compliance.

Definition at line 142 of file rawDatabaseTable.h.

Constructor & Destructor Documentation

◆ RawDatabaseTable() [1/7]

template<template< typename > class ALLOC = std::allocator>
template<template< typename > class VARALLOC, template< typename > class MISSALLOC>
gum::learning::RawDatabaseTable< ALLOC >::RawDatabaseTable ( const MissingValType< MISSALLOC > &  missing_symbols,
const std::vector< std::string, VARALLOC< std::string > > &  var_names,
const allocator_type alloc = allocator_type() 
)

default constructor

◆ RawDatabaseTable() [2/7]

template<template< typename > class ALLOC = std::allocator>
template<template< typename > class MISSALLOC>
gum::learning::RawDatabaseTable< ALLOC >::RawDatabaseTable ( const MissingValType< MISSALLOC > &  missing_symbols,
const allocator_type alloc = allocator_type() 
)

default constructor

◆ RawDatabaseTable() [3/7]

template<template< typename > class ALLOC = std::allocator>
gum::learning::RawDatabaseTable< ALLOC >::RawDatabaseTable ( const allocator_type alloc = allocator_type())

default constructor

◆ RawDatabaseTable() [4/7]

template<template< typename > class ALLOC = std::allocator>
gum::learning::RawDatabaseTable< ALLOC >::RawDatabaseTable ( const RawDatabaseTable< ALLOC > &  from)

copy constructor

◆ RawDatabaseTable() [5/7]

template<template< typename > class ALLOC = std::allocator>
gum::learning::RawDatabaseTable< ALLOC >::RawDatabaseTable ( const RawDatabaseTable< ALLOC > &  from,
const allocator_type alloc 
)

copy constructor with a given allocator

◆ RawDatabaseTable() [6/7]

template<template< typename > class ALLOC = std::allocator>
gum::learning::RawDatabaseTable< ALLOC >::RawDatabaseTable ( RawDatabaseTable< ALLOC > &&  from)

move constructor

◆ RawDatabaseTable() [7/7]

template<template< typename > class ALLOC = std::allocator>
gum::learning::RawDatabaseTable< ALLOC >::RawDatabaseTable ( RawDatabaseTable< ALLOC > &&  from,
const allocator_type alloc 
)

move constructor with a given allocator

◆ ~RawDatabaseTable()

template<template< typename > class ALLOC = std::allocator>
virtual gum::learning::RawDatabaseTable< ALLOC >::~RawDatabaseTable ( )
virtual

destructor

Member Function Documentation

◆ begin()

iterator gum::learning::IDatabaseTable< DBCell , ALLOC >::begin ( ) const
inherited

returns a new unsafe handler pointing to the beginning of the database

◆ beginSafe()

iterator_safe gum::learning::IDatabaseTable< DBCell , ALLOC >::beginSafe ( ) const
inherited

returns a new safe handler pointing to the beginning of the database

◆ clear()

template<template< typename > class ALLOC = std::allocator>
virtual void gum::learning::RawDatabaseTable< ALLOC >::clear ( )
finalvirtual

erase the content of the database, including the names of the variables

Reimplemented from gum::learning::IDatabaseTable< DBCell, ALLOC >.

◆ clone() [1/2]

template<template< typename > class ALLOC = std::allocator>
virtual RawDatabaseTable< ALLOC >* gum::learning::RawDatabaseTable< ALLOC >::clone ( ) const
finalvirtual

virtual copy constructor

Implements gum::learning::IDatabaseTable< DBCell, ALLOC >.

◆ clone() [2/2]

template<template< typename > class ALLOC = std::allocator>
virtual RawDatabaseTable< ALLOC >* gum::learning::RawDatabaseTable< ALLOC >::clone ( const allocator_type alloc) const
finalvirtual

virtual copy constructor with a given allocator

Implements gum::learning::IDatabaseTable< DBCell, ALLOC >.

◆ columnFromVariableName()

std::size_t gum::learning::IDatabaseTable< DBCell , ALLOC >::columnFromVariableName ( const std::string &  name) const
inherited

returns the index of the column whose name is passed in argument

Warning
If several columns correspond to the name, only the column with the lowest index is returned. If you wish to retrieve all the columns, use method columnsFromVariableName
Exceptions
UndefinedElementis raised if there exists no column with the given name

◆ columnsFromVariableName()

DBVector< std::size_t > gum::learning::IDatabaseTable< DBCell , ALLOC >::columnsFromVariableName ( const std::string &  name) const
inherited

returns the indices of all the columns whose name is passed in argument

It may happen that several columns correspond to a given variable name. In this case, the function returns the indices of all the columns of the IDatabase that match the name.

◆ content()

const Matrix< DBCell >& gum::learning::IDatabaseTable< DBCell , ALLOC >::content ( ) const
noexceptinherited

returns the content (the records) of the database

◆ empty()

bool gum::learning::IDatabaseTable< DBCell , ALLOC >::empty ( ) const
noexceptinherited

indicates whether the database contains some records or not

◆ end()

const iterator& gum::learning::IDatabaseTable< DBCell , ALLOC >::end ( ) const
noexceptinherited

returns a new unsafe handler pointing to the end of the database

◆ endSafe()

const iterator_safe& gum::learning::IDatabaseTable< DBCell , ALLOC >::endSafe ( ) const
noexceptinherited

returns a new safe handler pointing to the end of the database

◆ eraseAllRows()

void gum::learning::IDatabaseTable< DBCell , ALLOC >::eraseAllRows ( )
inherited

erase all the rows

◆ eraseFirstRow()

void gum::learning::IDatabaseTable< DBCell , ALLOC >::eraseFirstRow ( )
inherited

erase the first row

Warning
if the row does not exist, nothing is done. In particular, no exception is raised.

◆ eraseFirstRows()

void gum::learning::IDatabaseTable< DBCell , ALLOC >::eraseFirstRows ( const std::size_t  k)
inherited

erase the k first rows

Warning
if there are fewer than k rows in the database, the database is completely emptied

◆ eraseLastRow()

void gum::learning::IDatabaseTable< DBCell , ALLOC >::eraseLastRow ( )
inherited

erase the last row

Warning
if the row does not exist, nothing is done. In particular, no exception is raised.

◆ eraseLastRows()

void gum::learning::IDatabaseTable< DBCell , ALLOC >::eraseLastRows ( const std::size_t  k)
inherited

erase the k last rows

Warning
if there are fewer than k rows in the database, the database is completely emptied

◆ eraseRow()

void gum::learning::IDatabaseTable< DBCell , ALLOC >::eraseRow ( std::size_t  index)
inherited

erase a given row specified by its index in the table

In the database, rows are indexed, starting from 0.

Warning
If the row does not exist, nothing is done. In particular, no exception is raised.

◆ eraseRows()

void gum::learning::IDatabaseTable< DBCell , ALLOC >::eraseRows ( std::size_t  deb,
std::size_t  end 
)
inherited

erase the rows from the debth to the endth (not included)

In the database, rows are indexed, starting from 0.

◆ getAllocator()

ALLOC< DBCell > gum::learning::IDatabaseTable< DBCell , ALLOC >::getAllocator ( ) const
inherited

returns the allocator of the database

◆ handler()

iterator gum::learning::IDatabaseTable< DBCell , ALLOC >::handler ( ) const
inherited

returns a new unsafe handler pointing to the 1st record of the database

◆ handlerSafe()

iterator_safe gum::learning::IDatabaseTable< DBCell , ALLOC >::handlerSafe ( ) const
inherited

returns a new safe handler pointing to the 1st record of the database

◆ hasMissingValues() [1/2]

bool gum::learning::IDatabaseTable< DBCell , ALLOC >::hasMissingValues ( ) const
inherited

indicates whether the database contains some missing values

◆ hasMissingValues() [2/2]

bool gum::learning::IDatabaseTable< DBCell , ALLOC >::hasMissingValues ( const std::size_t  k) const
inherited

indicates whether the kth row contains some missing values

◆ ignoreColumn()

template<template< typename > class ALLOC = std::allocator>
virtual void gum::learning::RawDatabaseTable< ALLOC >::ignoreColumn ( const std::size_t  k,
const bool  from_external_object = true 
)
finalvirtual

makes the database table ignore from now on the kth column

This method can be called in two different ways: either k refers to the current kth column of the database table (in this case parameter from_external_object is set to false), or k corresponds to the kth column of an original database used to fill the database table (in this case from_external_object is set to true). Depending on from_external_object's value, the ignored columns may differ. As an example, imagine that the database table is created from a CSV file with 5 columns named X0, X1, X2, X3 and X4 respectivly. Then a call to ignoreColumn ( 1, true ) will exclude column X1 from the database table. As a result, the database table columns are X0, X2, X3 and X4. Therefore, subsequently calling ignoreColumn ( 1, false ) will result in excluding X2 since X2 is the 2nd column (columns are indexed starting from 0). So, now the database table's columns are X0, X3 and X4. If, now, we call ignoreColumn ( 3, true ), this will remove column X3 because, in the original database, X3 was the 4th column.

Warning
If the database table was not empty, then the kth column is removed from all the rows currently stored.
If the kth column does not exist (i.e., the original dataset does not contain the kth column when from_external_object is set to true, or the RawDatabaseTable has no kth column when from_external_object is set to false), column k is marked as to be ignored and nothing is done on the content of the RawDatabaseTable. No exception is raised.
Parameters
kthe column to remove. See Method setVariableNames for a detailed description on how k is computed.
from_external_objectindicates whether k refers to the kth column of an original external database (true) or to the current kth column of the RawDatabaseTable.

Implements gum::learning::IDatabaseTable< DBCell, ALLOC >.

◆ ignoredColumns()

template<template< typename > class ALLOC = std::allocator>
virtual const DBVector< std::size_t > gum::learning::RawDatabaseTable< ALLOC >::ignoredColumns ( ) const
finalvirtual

returns the set of columns of the original dataset that are ignored

Implements gum::learning::IDatabaseTable< DBCell, ALLOC >.

◆ inputColumns()

template<template< typename > class ALLOC = std::allocator>
virtual const DBVector< std::size_t > gum::learning::RawDatabaseTable< ALLOC >::inputColumns ( ) const
finalvirtual

returns the set of columns of the original dataset that are present in the RawDatabaseTable

Implements gum::learning::IDatabaseTable< DBCell, ALLOC >.

◆ insertRow() [1/4]

template<template< typename > class ALLOC = std::allocator>
virtual void gum::learning::RawDatabaseTable< ALLOC >::insertRow ( const std::vector< std::string, ALLOC< std::string > > &  new_row)
finalvirtual

insert a new row at the end of the database

The new_row passed in argument is supposed to come from an external database. So it must contain data for the ignored columns.

Exceptions
SizeErroris raised if the vector of string cannot be inserted in the RawDatabaseTable because its size does not allow a matching with the columns of the RawDatabaseTable (taking into account the ignored columns)

◆ insertRow() [2/4]

void gum::learning::IDatabaseTable< DBCell , ALLOC >::insertRow ( const std::vector< std::string, OTHER_ALLOC< std::string > > &  new_row)
inherited

insert a new row at the end of the database

The new_row passed in argument is supposed to come from an external database. So it must contain data for the ignored columns.

Exceptions
SizeErroris raised if the vector of string cannot be inserted in the IDatabaseTable because its size does not allow a matching with the columns of the IDatabaseTable (taking into account the ignored columns)

◆ insertRow() [3/4]

virtual void gum::learning::IDatabaseTable< DBCell , ALLOC >::insertRow ( Row< DBCell > &&  new_row,
const IsMissing  contains_missing_data 
)
virtualinherited

insert a new DBRow at the end of the database

Unlike methods insertRow for data whose type is different from T_DATA, this method assumes that the new row passed in argument does not contain any data of the ignored columns. So, basically, it could be copied as is into the database table.

Exceptions
SizeErroris raised if the size of the new_row is not equal to the number of columns retained in the IDatabaseTable

◆ insertRow() [4/4]

virtual void gum::learning::IDatabaseTable< DBCell , ALLOC >::insertRow ( const Row< DBCell > &  new_row,
const IsMissing  contains_missing_data 
)
virtualinherited

insert a new row at the end of the database

Unlike methods insertRow for data whose type is different from T_DATA, this method assumes that the new row passed in argument does not contain any data of the ignored columns. So, basically, it could be copied as is into the database table.

Exceptions
SizeErroris raised if the size of the new_row is not equal to the number of columns retained in the IDatabaseTable

◆ insertRows() [1/2]

virtual void gum::learning::IDatabaseTable< DBCell , ALLOC >::insertRows ( Matrix< DBCell > &&  new_rows,
const DBVector< IsMissing > &  rows_have_missing_vals 
)
virtualinherited

insert a set of new DBRows at the end of the database

Unlike methods insertRows for data whose type is different from T_DATA, this method assumes that the new rows passed in argument do not contain any data of the ignored columns. So, basically, these rows could be copied as is into the database table.

Parameters
new_rowsthe new set of rows to be copied as is
rows_have_missing_valsa vector of the same size as new_rows that indicates, for each new row, whether it contains some missing value or not
Exceptions
SizeErroris raised if the size of at least one row in new_rows is not equal to the number of columns retained in the IDatabaseTable. A SizeError exception will also be raised if the number of new rows is not identical to the size of vector rows_have_missing_vals.

◆ insertRows() [2/2]

virtual void gum::learning::IDatabaseTable< DBCell , ALLOC >::insertRows ( const Matrix< DBCell > &  new_rows,
const DBVector< IsMissing > &  rows_have_missing_vals 
)
virtualinherited

insert a set of new DBRows at the end of the database

Unlike methods insertRows for data whose type is different from T_DATA, this method assumes that the new rows passed in argument do not contain any data of the ignored columns. So, basically, these rows could be copied as is into the database table.

Parameters
new_rowsthe new set of rows to be copied as is
rows_have_missing_valsa vector of the same size as new_rows that indicates, for each new row, whether it contains some missing value or not
Exceptions
SizeErroris raised if the size of at least one row in new_rows is not equal to the number of columns retained in the IDatabaseTable. A SizeError exception will also be raised if the number of new rows is not identical to the size of vector rows_have_missing_vals.

◆ isRowSizeOK_()

bool gum::learning::IDatabaseTable< DBCell , ALLOC >::isRowSizeOK_ ( const std::size_t  size) const
protectedinherited

checks whether a size corresponds to the number of columns of the database

◆ minNbRowsPerThread()

std::size_t gum::learning::IDatabaseTable< DBCell , ALLOC >::minNbRowsPerThread ( ) const
inherited

returns the minimum of rows that each thread should process

◆ missingSymbols()

const DBVector< std::string >& gum::learning::IDatabaseTable< DBCell , ALLOC >::missingSymbols ( ) const
inherited

returns the set of missing symbols

◆ nbRows()

std::size_t gum::learning::IDatabaseTable< DBCell , ALLOC >::nbRows ( ) const
noexceptinherited

returns the number of records (rows) in the database

◆ nbThreads()

std::size_t gum::learning::IDatabaseTable< DBCell , ALLOC >::nbThreads ( ) const
inherited

returns the number of threads used to parse the database

◆ nbVariables()

std::size_t gum::learning::IDatabaseTable< DBCell , ALLOC >::nbVariables ( ) const
noexceptinherited

returns the number of variables (columns) of the database

◆ operator=() [1/2]

template<template< typename > class ALLOC = std::allocator>
RawDatabaseTable< ALLOC >& gum::learning::RawDatabaseTable< ALLOC >::operator= ( const RawDatabaseTable< ALLOC > &  from)

copy operator

◆ operator=() [2/2]

template<template< typename > class ALLOC = std::allocator>
RawDatabaseTable< ALLOC >& gum::learning::RawDatabaseTable< ALLOC >::operator= ( RawDatabaseTable< ALLOC > &&  from)

move constructor

◆ setAllRowsWeight()

void gum::learning::IDatabaseTable< DBCell , ALLOC >::setAllRowsWeight ( const double  new_weight)
inherited

assign a given weight to all the rows of the database

◆ setMaxNbThreads()

void gum::learning::IDatabaseTable< DBCell , ALLOC >::setMaxNbThreads ( const std::size_t  nb) const
inherited

changes the max number of threads that a database can use

Within databases, some methods can be processed in a parallel fashion. This methods indicates the maximum number of threads that can be run in parallel at the same time.

◆ setMinNbRowsPerThread()

void gum::learning::IDatabaseTable< DBCell , ALLOC >::setMinNbRowsPerThread ( const std::size_t  nb) const
inherited

changes the number min of rows a thread should process in a multithreading context

When a method executes several threads to perform actions on the rows of the database, the MinNbRowsPerThread indicates how many rows each thread should at least process. This is used to compute the number of threads actually run. This number is equal to the min between the max number of threads allowed and the number of records in the database divided by nb.

◆ setVariableNames() [1/2]

template<template< typename > class ALLOC = std::allocator>
virtual void gum::learning::RawDatabaseTable< ALLOC >::setVariableNames ( const std::vector< std::string, ALLOC< std::string > > &  names,
const bool  from_external_object = true 
)
finalvirtual

sets the names of the variables

This method can be called in two different ways: either the names correspond precisely to the columns stored into the database table (in this case, parameter from_external_object is equal to false), or they corresponds to the columns of an external database (e.g., a CSV file) from which we potentially excluded some columns and, consequently, these columns should not be taken into account (in this case, parameter from_external_object is equal to true). As an example, imagine that the database table is created from a CSV file with 5 columns named X0, X1, X2, X3 and X4 respectivly. Suppose that we asked the database table to ignore columns X1 and X3. Then setVariableNames( { "X0", "X1", "X2", "X3", "X4" }, true ) will set the columns of the database table as { "X0", "X2", "X4" }. The same result could be obtained by executing setVariableNames( { "X0", "X2", "X4" }, false ), which specifies directly the set of names to retain in the database table.

Parameters
namesthe names of all the columns, including the ignored columns if from_external_object is set to true, else excluding them (i.e., this should precisely correspond to the columns stored into the database table).
from_external_objecta Boolean indicating whether parameter names includes the columns ignored by the database table (true) or not (false).
Exceptions
SizeErroris raised if the names passed in arguments cannot be assigned to the columns of the RawDatabaseTable because the size of their vector is inadequate.

Implements gum::learning::IDatabaseTable< DBCell, ALLOC >.

◆ setVariableNames() [2/2]

void gum::learning::IDatabaseTable< DBCell , ALLOC >::setVariableNames ( const std::vector< std::string, OTHER_ALLOC< std::string > > &  names,
const bool  from_external_object = true 
)
inherited

sets the names of the variables

This method can be called in two different ways: either the names correspond precisely to the columns stored into the database table (in this case, parameter from_external_object is equal to false), or they corresponds to the columns of an external database (e.g., a CSV file) from which we potentially excluded some columns and, consequently, the latter should not be taken into account (in this case, parameter from_external_object is equal to true). As an example, imagine that the database table is created from a CSV file with 5 columns named X0, X1, X2, X3 and X4 respectively. Suppose that we asked the database table to ignore columns X1 and X3. Then setVariableNames( { "X0", "X1", "X2", "X3", "X4" }, true ) will set the columns of the database table as { "X0", "X2", "X4" }. The same result could be obtained by executing setVariableNames( { "X0", "X2", "X4" }, false ), which specifies directly the set of names to retain in the database table.

Parameters
namesthe names of all the columns, including the ignored columns if from_external_object is set to true, else excluding them (i.e., this should precisely correspond to the columns stored into the database table).
from_external_objecta Boolean indicating whether parameter names includes the columns ignored by the database table (true) or not (false).
Exceptions
SizeErroris raised if the names passed in arguments cannot be assigned to the columns of the IDatabaseTable because the size of their vector is inadequate.

◆ setWeight()

void gum::learning::IDatabaseTable< DBCell , ALLOC >::setWeight ( const std::size_t  i,
const double  weight 
)
inherited

assigns a given weight to the ith row of the database

Exceptions
OutOfBoundsif i is outside the set of indices of the records or if the weight is negative

◆ size()

std::size_t gum::learning::IDatabaseTable< DBCell , ALLOC >::size ( ) const
noexceptinherited

returns the number of records (rows) in the database

◆ variableName()

const std::string& gum::learning::IDatabaseTable< DBCell , ALLOC >::variableName ( const std::size_t  k) const
inherited

returns the name of the kth column of the IDatabaseTable

Exceptions
OutOfBoundsis raised if the IDatabaseTable contains fewer than k columns.

◆ variableNames()

const DBVector< std::string >& gum::learning::IDatabaseTable< DBCell , ALLOC >::variableNames ( ) const
noexceptinherited

returns the variable names for all the columns of the database

The names do not include the ignored columns.

◆ weight() [1/2]

double gum::learning::IDatabaseTable< DBCell , ALLOC >::weight ( const std::size_t  i) const
inherited

returns the weight of the ith record

Exceptions
OutOfBoundsif i is outside the set of indices of the records

◆ weight() [2/2]

double gum::learning::IDatabaseTable< DBCell , ALLOC >::weight ( ) const
inherited

returns the weight of the whole database

Member Data Documentation

◆ has_row_missing_val_

DBVector< IsMissing > gum::learning::IDatabaseTable< DBCell , ALLOC >::has_row_missing_val_
protectedinherited

Definition at line 1142 of file IDatabaseTable.h.

◆ max_nb_threads_

std::size_t gum::learning::IDatabaseTable< DBCell , ALLOC >::max_nb_threads_
mutableprotectedinherited

Definition at line 1145 of file IDatabaseTable.h.

◆ min_nb_rows_per_thread_

std::size_t gum::learning::IDatabaseTable< DBCell , ALLOC >::min_nb_rows_per_thread_
mutableprotectedinherited

Definition at line 1149 of file IDatabaseTable.h.

◆ missing_symbols_

DBVector< std::string > gum::learning::IDatabaseTable< DBCell , ALLOC >::missing_symbols_
protectedinherited

Definition at line 1139 of file IDatabaseTable.h.

◆ rows_

Matrix< DBCell > gum::learning::IDatabaseTable< DBCell , ALLOC >::rows_
protectedinherited

Definition at line 1136 of file IDatabaseTable.h.

◆ variable_names_

DBVector< std::string > gum::learning::IDatabaseTable< DBCell , ALLOC >::variable_names_
protectedinherited

the names of the variables for each column

Definition at line 1133 of file IDatabaseTable.h.


The documentation for this class was generated from the following file: