30 #ifndef GUM_IDATABASE_TABLE_H 31 #define GUM_IDATABASE_TABLE_H 41 #include <agrum/agrum.h> 42 #include <agrum/tools/core/thread.h> 43 #include <agrum/tools/core/OMPThreads.h> 44 #include <agrum/tools/database/DBCell.h> 45 #include <agrum/tools/database/DBRow.h> 46 #include <agrum/tools/database/DBHandler.h> 47 #include <agrum/tools/database/DBTranslator.h> 54 template <
template <
typename >
class ALLOC,
bool ENABLE_INSERT >
55 struct IDatabaseTableInsert4DBCell;
57 template <
template <
typename >
class ALLOC >
101 template <
template <
typename >
class ALLOC >
250 template <
typename T_DATA,
template <
typename >
class ALLOC = std::allocator >
253 private ALLOC< T_DATA > {
256 template <
typename TX_DATA >
257 using DBVector = std::vector< TX_DATA, ALLOC< TX_DATA > >;
267 template <
template <
typename >
class XALLOC >
268 using MissingValType = std::vector< std::string, XALLOC< std::string > >;
373 using iterator_category = std::random_access_iterator_tag;
379 using difference_type = std::ptrdiff_t;
380 using allocator_type = ALLOC< T_DATA >;
384 template <
typename TX_DATA >
385 using DBVector = std::vector< TX_DATA, ALLOC< TX_DATA > >;
402 Handler(
const IDatabaseTable< T_DATA, ALLOC >& db);
487 virtual std::size_t
size()
const final;
490 virtual std::size_t
DBSize()
const final;
518 virtual std::size_t
numRow()
const final;
521 virtual bool hasRows()
const final;
524 virtual void reset()
final;
551 virtual void setRange(std::size_t first, std::size_t last)
final;
554 virtual std::pair< std::size_t, std::size_t >
range()
const final;
570 #ifndef DOXYGEN_SHOULD_SKIP_THIS 574 const IDatabaseTable< T_DATA, ALLOC >* _db_;
579 const Matrix< T_DATA >* _row_;
582 std::size_t _index_{std::size_t(0)};
585 std::size_t _begin_index_{std::size_t(0)};
588 std::size_t _end_index_{std::size_t(0)};
691 using iterator_category = std::random_access_iterator_tag;
692 using value_type =
typename Handler::value_type;
693 using reference = value_type&;
694 using const_reference =
const value_type&;
695 using pointer = value_type*;
696 using const_pointer =
const value_type*;
697 using difference_type = std::ptrdiff_t;
698 using allocator_type = ALLOC< T_DATA >;
709 HandlerSafe(
const IDatabaseTable< T_DATA, ALLOC >& db);
742 #ifndef DOXYGEN_SHOULD_SKIP_THIS 746 void _attachHandler_();
749 void _detachHandler_();
764 using size_type = std::size_t;
765 using difference_type = std::ptrdiff_t;
770 using allocator_type = ALLOC< T_DATA >;
780 template <
template <
typename >
class VARALLOC,
template <
typename >
class MISSALLOC >
781 IDatabaseTable(
const MissingValType< MISSALLOC >& missing_symbols,
782 const std::vector< std::string, VARALLOC< std::string > >& var_names,
783 const ALLOC< T_DATA >& alloc);
789 IDatabaseTable(
const IDatabaseTable< T_DATA, ALLOC >& from,
const allocator_type& alloc);
795 IDatabaseTable(IDatabaseTable< T_DATA, ALLOC >&& from,
const allocator_type& alloc);
815 iterator
begin()
const;
821 const iterator&
end()
const noexcept;
824 const iterator_safe&
endSafe()
const noexcept;
873 virtual void setVariableNames(
const std::vector< std::string, ALLOC< std::string > >& names,
874 const bool from_external_object =
true)
903 template <
template <
typename >
class OTHER_ALLOC >
904 void setVariableNames(
const std::vector< std::string, OTHER_ALLOC< std::string > >& names,
905 const bool from_external_object =
true);
910 const std::string&
variableName(
const std::size_t k)
const;
930 std::size_t
nbRows()
const noexcept;
933 std::size_t
size()
const noexcept;
936 bool empty()
const noexcept;
968 virtual void ignoreColumn(
const std::size_t k,
const bool from_external_object =
true) = 0;
985 template <
template <
typename >
class OTHER_ALLOC >
986 void insertRow(
const std::vector< std::string, OTHER_ALLOC< std::string > >& new_row);
995 virtual void insertRow(Row< T_DATA >&& new_row,
const IsMissing contains_missing_data);
1004 virtual void insertRow(
const Row< T_DATA >& new_row,
const IsMissing contains_missing_data);
1022 virtual void insertRows(Matrix< T_DATA >&& new_rows,
1023 const DBVector< IsMissing >& rows_have_missing_vals);
1038 virtual void insertRows(
const Matrix< T_DATA >& new_rows,
1039 const DBVector< IsMissing >& rows_have_missing_vals);
1069 void eraseRows(std::size_t deb, std::size_t end);
1075 virtual void clear();
1118 void setWeight(
const std::size_t i,
const double weight);
1123 double weight(
const std::size_t i)
const;
1163 #ifndef DOXYGEN_SHOULD_SKIP_THIS 1168 mutable DBVector< HandlerSafe* > _list_of_safe_handlers_;
1171 mutable std::mutex _safe_handlers_mutex_;
1177 iterator_safe* _end_safe_{
nullptr};
1186 void _updateHandlers_(std::size_t new_size)
const;
1189 void _createEndIterators_();
1204 #include <agrum/tools/database/IDatabaseTable_tpl.h> virtual void insertRow(const Row< T_DATA > &new_row, const IsMissing contains_missing_data)
insert a new row at the end of the database
virtual HandlerSafe & operator=(const Handler &)
copy operator
virtual Handler & operator--() final
makes the operator point to the previous row in the database
HandlerSafe(const IDatabaseTable< T_DATA, ALLOC > &db)
default constructor
void eraseFirstRows(const std::size_t k)
erase the k first rows
void insertRow(const std::vector< std::string, OTHER_ALLOC< std::string > > &new_row)
insert a new row at the end of the database
virtual const_reference row() const final
returns the current row pointed to by the handler (unsafe version)
virtual bool hasRows() const final
indicates whether the handler has reached its end or not
IDatabaseTable(IDatabaseTable< T_DATA, ALLOC > &&from)
move constructor
virtual reference rowSafe() final
returns the current row pointed to by the handler (safe version)
void setMaxNbThreads(const std::size_t nb) const
changes the max number of threads that a database can use
virtual IDatabaseTable< T_DATA, ALLOC > * clone(const allocator_type &alloc) const =0
virtual copy constructor with a given allocator
std::size_t columnFromVariableName(const std::string &name) const
returns the index of the column whose name is passed in argument
virtual std::size_t DBSize() const final
returns the number of rows of the whole database
virtual ~Handler()
destructor
ALLOC< T_DATA > getAllocator() const
returns the allocator of the database
DBVector< std::string > variable_names_
the names of the variables for each column
DBVector< std::size_t > columnsFromVariableName(const std::string &name) const
returns the indices of all the columns whose name is passed in argument
INLINE void emplace(Args &&... args)
void eraseRow(std::size_t index)
erase a given row specified by its index in the table
std::size_t nbThreads() const
returns the number of threads used to parse the database
virtual void insertRows(const Matrix< T_DATA > &new_rows, const DBVector< IsMissing > &rows_have_missing_vals)
insert a set of new DBRows at the end of the database
the (unsafe) handler for the tabular databases
std::size_t size() const noexcept
returns the number of records (rows) in the database
virtual const_pointer operator->() const final
Dereferences the value pointed to by the handler (unsafe version)
virtual reference row() final
returns the current row pointed to by the handler (unsafe version)
IDatabaseTable< T_DATA, ALLOC > & operator=(const IDatabaseTable< T_DATA, ALLOC > &from)
copy operator
virtual void setRange(std::size_t first, std::size_t last) final
sets the area in the database the handler will handle
virtual const_reference operator*() const final
returns the current row pointed to by the handler (unsafe version)
void setMinNbRowsPerThread(const std::size_t nb) const
changes the number min of rows a thread should process in a multithreading context ...
const DBVector< std::string > & missingSymbols() const
returns the set of missing symbols
void eraseRows(std::size_t deb, std::size_t end)
erase the rows from the debth to the endth (not included)
virtual Handler begin() const
returns a new handler that points to the beginning of the database's area of the current handler ...
virtual std::size_t nbVariables() const final
returns the number of variables (columns) of the database
void setVariableNames(const std::vector< std::string, OTHER_ALLOC< std::string > > &names, const bool from_external_object=true)
sets the names of the variables
virtual Handler & operator-=(const std::size_t i) final
moves back the handler by i rows in the database
void eraseLastRow()
erase the last row
virtual const DBVector< std::size_t > ignoredColumns() const =0
returns the set of columns of the original dataset that are ignored
void eraseAllRows()
erase all the rows
Handler(Handler &&h)
move constructor
Handler(const Handler &h)
copy constructor
IDatabaseTable< T_DATA, ALLOC > & operator=(IDatabaseTable< T_DATA, ALLOC > &&from)
move operator
iterator_safe beginSafe() const
returns a new safe handler pointing to the beginning of the database
bool isRowSizeOK_(const std::size_t size) const
checks whether a size corresponds to the number of columns of the database
virtual void setVariableNames(const std::vector< std::string, ALLOC< std::string > > &names, const bool from_external_object=true)=0
sets the names of the variables
IDatabaseTable(const IDatabaseTable< T_DATA, ALLOC > &from)
copy constructor
virtual HandlerSafe & operator=(HandlerSafe &&)
move operator
void eraseLastRows(const std::size_t k)
erase the k last rows
const Matrix< T_DATA > & content() const noexcept
returns the content (the records) of the database
DBVector< IsMissing > has_row_missing_val_
virtual Handler end() const
returns a new handler that points to the end of the database's area of the current handler ...
std::size_t nbVariables() const noexcept
returns the number of variables (columns) of the database
virtual bool operator==(const Handler &handler) const final
checks whether two handlers point to the same row in the database
virtual HandlerSafe & operator=(Handler &&)
move operator
DBVector< std::string > missing_symbols_
friend class Handler
allow the handlers to access the database directly
IDatabaseTable(const MissingValType< MISSALLOC > &missing_symbols, const std::vector< std::string, VARALLOC< std::string > > &var_names, const ALLOC< T_DATA > &alloc)
default constructor
virtual void nextRow() final
makes the handler point to the next row, equivalent to operator++
virtual std::size_t numRow() const final
the number of the current row (0 = the 1st row managed by the handler)
const iterator_safe & endSafe() const noexcept
returns a new safe handler pointing to the end of the database
virtual bool operator!=(const Handler &handler) const final
checks whether two handlers point to different rows in the database
bool hasMissingValues() const
indicates whether the database contains some missing values
bool empty() const noexcept
indicates whether the database contains some records or not
double weight() const
returns the weight of the whole database
const iterator & end() const noexcept
returns a new unsafe handler pointing to the end of the database
virtual const_reference rowSafe() const final
returns the current row pointed to by the handler (safe version)
virtual IDatabaseTable< T_DATA, ALLOC > * clone() const =0
virtual copy constructor
std::size_t nbRows() const noexcept
returns the number of records (rows) in the database
virtual const IDatabaseTable< T_DATA, ALLOC > & database() const
returns a pointer on the database
virtual Handler & operator=(Handler &&)
move operator
virtual HandlerSafe & operator=(const HandlerSafe &)
copy operator
the safe handler of the tabular databases
virtual Handler & operator+=(const std::size_t i) final
advances the handler by i rows in the database
double weight(const std::size_t i) const
returns the weight of the ith record
virtual void insertRows(Matrix< T_DATA > &&new_rows, const DBVector< IsMissing > &rows_have_missing_vals)
insert a set of new DBRows at the end of the database
virtual void insertRow(Row< T_DATA > &&new_row, const IsMissing contains_missing_data)
insert a new DBRow at the end of the database
virtual ~HandlerSafe()
destructor
Handler(const IDatabaseTable< T_DATA, ALLOC > &db)
default constructor
IDatabaseTable(IDatabaseTable< T_DATA, ALLOC > &&from, const allocator_type &alloc)
move constructor with a given allocator
virtual Handler & operator=(const Handler &)
copy operator
const std::string & variableName(const std::size_t k) const
returns the name of the kth column of the IDatabaseTable
iterator_safe handlerSafe() const
returns a new safe handler pointing to the 1st record of the database
HandlerSafe(HandlerSafe &&h)
move constructor
virtual const DBVector< std::string > & variableNames() const final
returns the names of the variables
iterator begin() const
returns a new unsafe handler pointing to the beginning of the database
void setAllRowsWeight(const double new_weight)
assign a given weight to all the rows of the database
virtual const DBVector< std::size_t > inputColumns() const =0
returns the set of columns of the original dataset that are present in the IDatabaseTable ...
bool hasMissingValues(const std::size_t k) const
indicates whether the kth row contains some missing values
virtual std::size_t size() const final
returns the number of rows managed by the handler
std::size_t max_nb_threads_
Database(const std::string &filename, const BayesNet< GUM_SCALAR > &bn, const std::vector< std::string > &missing_symbols)
std::size_t minNbRowsPerThread() const
returns the minimum of rows that each thread should process
HandlerSafe(const HandlerSafe &h)
copy constructor
virtual ~IDatabaseTable()
destructor
const DBVector< std::string > & variableNames() const noexcept
returns the variable names for all the columns of the database
virtual std::pair< std::size_t, std::size_t > range() const final
returns the current range of the handler [begin,end)
std::size_t min_nb_rows_per_thread_
virtual void ignoreColumn(const std::size_t k, const bool from_external_object=true)=0
makes the database table ignore from now on the kth column
IDatabaseTable(const IDatabaseTable< T_DATA, ALLOC > &from, const allocator_type &alloc)
copy constructor with a given allocator
virtual Handler & operator++() final
makes the operator point to the next row in the database
iterator handler() const
returns a new unsafe handler pointing to the 1st record of the database
void eraseFirstRow()
erase the first row
virtual void clear()
erase the content of the database, including the names of the variables
virtual void reset() final
puts the handler to the beginning of the database's area it handles
void setWeight(const std::size_t i, const double weight)
assigns a given weight to the ith row of the database