aGrUM  0.20.3
a C++ library for (probabilistic) graphical models
gum::learning::DBTranslator4DiscretizedVariable< ALLOC > Class Template Reference

The databases' cell translators for discretized variables. More...

#include <agrum/tools/database/DBTranslator4DiscretizedVariable.h>

+ Inheritance diagram for gum::learning::DBTranslator4DiscretizedVariable< ALLOC >:
+ Collaboration diagram for gum::learning::DBTranslator4DiscretizedVariable< ALLOC >:

Public Member Functions

Constructors / Destructors
template<typename GUM_SCALAR , template< typename > class XALLOC>
 DBTranslator4DiscretizedVariable (const DiscretizedVariable< GUM_SCALAR > &var, const std::vector< std::string, XALLOC< std::string > > &missing_symbols, std::size_t max_dico_entries=std::numeric_limits< std::size_t >::max(), const allocator_type &alloc=allocator_type())
 default constructor with a discretized variable as translator More...
 
template<typename GUM_SCALAR >
 DBTranslator4DiscretizedVariable (const DiscretizedVariable< GUM_SCALAR > &var, std::size_t max_dico_entries=std::numeric_limits< std::size_t >::max(), const allocator_type &alloc=allocator_type())
 default constructor with a discretized variable as translator but without missing symbols More...
 
template<template< typename > class XALLOC>
 DBTranslator4DiscretizedVariable (const IDiscretizedVariable &var, const std::vector< std::string, XALLOC< std::string > > &missing_symbols, std::size_t max_dico_entries=std::numeric_limits< std::size_t >::max(), const allocator_type &alloc=allocator_type())
 default constructor with a IDiscretized variable as translator More...
 
 DBTranslator4DiscretizedVariable (const IDiscretizedVariable &var, std::size_t max_dico_entries=std::numeric_limits< std::size_t >::max(), const allocator_type &alloc=allocator_type())
 default constructor with a IDiscretized variable as translator but without missing symbols More...
 
 DBTranslator4DiscretizedVariable (const DBTranslator4DiscretizedVariable< ALLOC > &from)
 copy constructor More...
 
 DBTranslator4DiscretizedVariable (const DBTranslator4DiscretizedVariable< ALLOC > &from, const allocator_type &alloc)
 copy constructor with a given allocator More...
 
 DBTranslator4DiscretizedVariable (DBTranslator4DiscretizedVariable< ALLOC > &&from)
 move constructor More...
 
 DBTranslator4DiscretizedVariable (DBTranslator4DiscretizedVariable< ALLOC > &&from, const allocator_type &alloc)
 move constructor with a given allocator More...
 
virtual DBTranslator4DiscretizedVariable< ALLOC > * clone () const
 virtual copy constructor More...
 
virtual DBTranslator4DiscretizedVariable< ALLOC > * clone (const allocator_type &alloc) const
 virtual copy constructor with a given allocator More...
 
virtual ~DBTranslator4DiscretizedVariable ()
 destructor More...
 
Operators
DBTranslator4DiscretizedVariable< ALLOC > & operator= (const DBTranslator4DiscretizedVariable< ALLOC > &from)
 copy operator More...
 
DBTranslator4DiscretizedVariable< ALLOC > & operator= (DBTranslator4DiscretizedVariable< ALLOC > &&from)
 move operator More...
 
Accessors / Modifiers
virtual DBTranslatedValue translate (const std::string &str) final
 returns the translation of a string More...
 
virtual std::string translateBack (const DBTranslatedValue translated_val) const final
 returns the original value for a given translation More...
 
virtual std::size_t domainSize () const final
 returns the number of discretization intervals used for translations More...
 
virtual bool hasEditableDictionary () const final
 indicates that the translator is never in editable dictionary mode More...
 
virtual void setEditableDictionaryMode (bool new_mode) final
 sets/unset the editable dictionary mode More...
 
virtual bool needsReordering () const final
 indicates that the translations should never be reordered More...
 
virtual HashTable< std::size_t, std::size_t, ALLOC< std::pair< std::size_t, std::size_t > > > reorder () final
 returns an empty HashTable to indicate that no reordering is needed. More...
 
virtual const IDiscretizedVariablevariable () const final
 returns the variable stored into the translator More...
 
virtual DBTranslatedValue missingValue () const final
 returns the translation of a missing value More...
 
Operators
DBTranslatedValue operator<< (const std::string &str)
 alias for method translate More...
 
std::string operator>> (const DBTranslatedValue translated_val)
 alias for method translateBack More...
 
Accessors / Modifiers
const Set< std::string, ALLOC< std::string > > & missingSymbols () const
 returns the set of missing symbols taken into account by the translator More...
 
bool isMissingSymbol (const std::string &str) const
 indicates whether a string corresponds to a missing symbol More...
 
void setVariableName (const std::string &str) const
 sets the name of the variable stored into the translator More...
 
void setVariableDescription (const std::string &str) const
 sets the name of the variable stored into the translator More...
 
DBTranslatedValueType getValType () const
 returns the type of values handled by the translator More...
 
allocator_type getAllocator () const
 returns the allocator used by the translator More...
 
bool isMissingValue (const DBTranslatedValue &val) const
 indicates whether a translated value corresponds to a missing value More...
 

Public Types

using allocator_type = typename DBTranslator< ALLOC >::allocator_type
 type for the allocators passed in arguments of methods More...
 

Protected Attributes

bool is_dictionary_dynamic_
 indicates whether the dictionary can be updated or not More...
 
std::size_t max_dico_entries_
 the maximum number of entries that the dictionary is allowed to contain More...
 
Set< std::string, ALLOC< std::string > > missing_symbols_
 the set of missing symbols More...
 
Bijection< std::size_t, std::string, ALLOC< std::pair< float, std::string > > > back_dico_
 the bijection relating back translated values and their original strings. More...
 
DBTranslatedValueType val_type_
 the type of the values translated by the translator More...
 

Detailed Description

template<template< typename > class ALLOC = std::allocator>
class gum::learning::DBTranslator4DiscretizedVariable< ALLOC >

The databases' cell translators for discretized variables.

Translators are used by DatabaseTable instances to transform datasets' strings into DBTranslatedValue instances. The point is that strings are not adequate for fast learning, they need to be preprocessed into a type that can be analyzed quickly (the so-called DBTranslatedValue type).

A DBTranslator4DiscretizedVariable is a translator that contains and exploits a DiscretizedVariable for translations. Each time a string needs be translated, we ask the DiscretizedVariable which discretization interval contains the the number represented by the string. The DBTranslatedValue corresponding to the translation of the string contains in its discr_val field the index of this discretization interval.

Warning
Translators for discretized variables are not editable, that is, you must provide the const variable that will be used for translations. Enabling the editable mode would not make much sense.
Here is an example of how to use this class:
// create the translator, with possible missing symbols: "N/A" and "???"
// i.e., each time the translator reads a "N/A" or a "???" string, it
// won't translate it into a number but into a missing value.
std::vector<std::string> missing { "N/A", "???" };
var.addTick ( 1 );
var.addTick ( 3 );
var.addTick ( 10 );
// gets the DBTranslatedValue corresponding to some strings
auto val1 = translator.translate("5.2");
auto val2 = translator << "2";
// at this point, val1 and val2 are equal to
// gum::learning::DBTranslatedValue { std::size_t(1) } and
// gum::learning::DBTranslatedValue { std::size_t(0) } respectively
// because the first discretization interval corresponds to [1;3[ and
// the second one to [3;10[.
// if the string contains a number outside the domain of the
// DiscretizedVariable, then a gum::NotFound exception is raised:
auto val3 = translator << "17"; // NotFound raised
// add the numbers assigned to val1, val2
std::size_t sum = val1.discr_val + val2.discr_val;
// translate missing values: val4 and val5 will be equal to:
// DBTranslatedValue { std::numeric_limits<std::size_t>::max () }
auto val4 = translator << "N/A";
auto val5 = translator.translate ( "???" );
// the following instructions raise TypeError exceptions because the
// strings are not numbers
auto val6 = translator << "422x";
auto val7 = translator.translate ( "xxx" );
// given a DBTranslatedValue that is supposed to contain the index of
// a discretization interval, get the string representing the interval.
std::string str;
str = translator.translateBack ( val1 ); // str = "[3,10["
str = translator >> val2; // str = "[1;3["
str = translator >> gum::learning::DBTranslatedValue {std::size_t(1)};
// str = "[3;10["
// translate back missing values: the string will corresponds to one of
// the missing symbols known to the translator
str = translator >> val4; // str = "N/A" or "???"
str = translator >> val5; // str = "N/A" or "???"
// get the variable stored within the translator
dynamic_cast<const gum::DiscretizedVariable<float>*>
( translator.variable () );

Definition at line 120 of file DBTranslator4DiscretizedVariable.h.

Member Typedef Documentation

◆ allocator_type

template<template< typename > class ALLOC = std::allocator>
using gum::learning::DBTranslator4DiscretizedVariable< ALLOC >::allocator_type = typename DBTranslator< ALLOC >::allocator_type

type for the allocators passed in arguments of methods

Definition at line 123 of file DBTranslator4DiscretizedVariable.h.

Constructor & Destructor Documentation

◆ DBTranslator4DiscretizedVariable() [1/8]

template<template< typename > class ALLOC = std::allocator>
template<typename GUM_SCALAR , template< typename > class XALLOC>
gum::learning::DBTranslator4DiscretizedVariable< ALLOC >::DBTranslator4DiscretizedVariable ( const DiscretizedVariable< GUM_SCALAR > &  var,
const std::vector< std::string, XALLOC< std::string > > &  missing_symbols,
std::size_t  max_dico_entries = std::numeric_limits< std::size_t >::max(),
const allocator_type alloc = allocator_type() 
)

default constructor with a discretized variable as translator

Parameters
vara discretized variable which will be used for translations. The translator keeps a copy of this variable
missing_symbolsthe set of symbols in the dataset representing missing values
max_dico_entriesthe max number of entries that the dictionary can contain. During the construction, we check that the discretized variable passed in argument has fewer discretization intervals than the admissible dictionary size
allocThe allocator used to allocate memory for all the fields of the DBTranslator4DiscretizedVariable
Warning
If the variable contained into the translator has a label equal to a missing value symbol, the label will be taken into account in the translation, not the missing value.

◆ DBTranslator4DiscretizedVariable() [2/8]

template<template< typename > class ALLOC = std::allocator>
template<typename GUM_SCALAR >
gum::learning::DBTranslator4DiscretizedVariable< ALLOC >::DBTranslator4DiscretizedVariable ( const DiscretizedVariable< GUM_SCALAR > &  var,
std::size_t  max_dico_entries = std::numeric_limits< std::size_t >::max(),
const allocator_type alloc = allocator_type() 
)

default constructor with a discretized variable as translator but without missing symbols

Parameters
vara discretized variable which will be used for translations. The translator keeps a copy of this variable
max_dico_entriesthe max number of entries that the dictionary can contain. During the construction, we check that the discretized variable passed in argument has fewer discretization intervals than the admissible dictionary size
allocThe allocator used to allocate memory for all the fields of the DBTranslator4DiscretizedVariable
Warning
If the variable contained into the translator has a label equal to a missing value symbol, the label will be taken into account in the translation, not the missing value.

◆ DBTranslator4DiscretizedVariable() [3/8]

template<template< typename > class ALLOC = std::allocator>
template<template< typename > class XALLOC>
gum::learning::DBTranslator4DiscretizedVariable< ALLOC >::DBTranslator4DiscretizedVariable ( const IDiscretizedVariable var,
const std::vector< std::string, XALLOC< std::string > > &  missing_symbols,
std::size_t  max_dico_entries = std::numeric_limits< std::size_t >::max(),
const allocator_type alloc = allocator_type() 
)

default constructor with a IDiscretized variable as translator

Parameters
vara IDiscretized variable which will be used for translations. The translator keeps a copy of this variable
missing_symbolsthe set of symbols in the dataset representing missing values
max_dico_entriesthe max number of entries that the dictionary can contain. During the construction, we check that the discretized variable passed in argument has fewer discretization intervals than the admissible dictionary size
allocThe allocator used to allocate memory for all the fields of the DBTranslator4DiscretizedVariable
Warning
If the variable contained into the translator has a label equal to a missing value symbol, the label will be taken into account in the translation, not the missing value.

◆ DBTranslator4DiscretizedVariable() [4/8]

template<template< typename > class ALLOC = std::allocator>
gum::learning::DBTranslator4DiscretizedVariable< ALLOC >::DBTranslator4DiscretizedVariable ( const IDiscretizedVariable var,
std::size_t  max_dico_entries = std::numeric_limits< std::size_t >::max(),
const allocator_type alloc = allocator_type() 
)

default constructor with a IDiscretized variable as translator but without missing symbols

Parameters
vara discretized variable which will be used for translations. The translator keeps a copy of this variable
max_dico_entriesthe max number of entries that the dictionary can contain. During the construction, we check that the discretized variable passed in argument has fewer discretization intervals than the admissible dictionary size
allocThe allocator used to allocate memory for all the fields of the DBTranslator4DiscretizedVariable
Warning
If the variable contained into the translator has a label equal to a missing value symbol, the label will be taken into account in the translation, not the missing value.

◆ DBTranslator4DiscretizedVariable() [5/8]

template<template< typename > class ALLOC = std::allocator>
gum::learning::DBTranslator4DiscretizedVariable< ALLOC >::DBTranslator4DiscretizedVariable ( const DBTranslator4DiscretizedVariable< ALLOC > &  from)

copy constructor

◆ DBTranslator4DiscretizedVariable() [6/8]

template<template< typename > class ALLOC = std::allocator>
gum::learning::DBTranslator4DiscretizedVariable< ALLOC >::DBTranslator4DiscretizedVariable ( const DBTranslator4DiscretizedVariable< ALLOC > &  from,
const allocator_type alloc 
)

copy constructor with a given allocator

◆ DBTranslator4DiscretizedVariable() [7/8]

template<template< typename > class ALLOC = std::allocator>
gum::learning::DBTranslator4DiscretizedVariable< ALLOC >::DBTranslator4DiscretizedVariable ( DBTranslator4DiscretizedVariable< ALLOC > &&  from)

move constructor

◆ DBTranslator4DiscretizedVariable() [8/8]

template<template< typename > class ALLOC = std::allocator>
gum::learning::DBTranslator4DiscretizedVariable< ALLOC >::DBTranslator4DiscretizedVariable ( DBTranslator4DiscretizedVariable< ALLOC > &&  from,
const allocator_type alloc 
)

move constructor with a given allocator

◆ ~DBTranslator4DiscretizedVariable()

template<template< typename > class ALLOC = std::allocator>
virtual gum::learning::DBTranslator4DiscretizedVariable< ALLOC >::~DBTranslator4DiscretizedVariable ( )
virtual

destructor

Member Function Documentation

◆ clone() [1/2]

template<template< typename > class ALLOC = std::allocator>
virtual DBTranslator4DiscretizedVariable< ALLOC >* gum::learning::DBTranslator4DiscretizedVariable< ALLOC >::clone ( ) const
virtual

virtual copy constructor

Implements gum::learning::DBTranslator< ALLOC >.

◆ clone() [2/2]

template<template< typename > class ALLOC = std::allocator>
virtual DBTranslator4DiscretizedVariable< ALLOC >* gum::learning::DBTranslator4DiscretizedVariable< ALLOC >::clone ( const allocator_type alloc) const
virtual

virtual copy constructor with a given allocator

Implements gum::learning::DBTranslator< ALLOC >.

◆ domainSize()

template<template< typename > class ALLOC = std::allocator>
virtual std::size_t gum::learning::DBTranslator4DiscretizedVariable< ALLOC >::domainSize ( ) const
finalvirtual

returns the number of discretization intervals used for translations

Warning
Note that missing values are encoded as std::numeric_limits<>::max () and are not taken into account in the domain sizes.

Implements gum::learning::DBTranslator< ALLOC >.

◆ getAllocator()

template<template< typename > class ALLOC = std::allocator>
allocator_type gum::learning::DBTranslator< ALLOC >::getAllocator ( ) const
inherited

returns the allocator used by the translator

◆ getValType()

template<template< typename > class ALLOC = std::allocator>
DBTranslatedValueType gum::learning::DBTranslator< ALLOC >::getValType ( ) const
inherited

returns the type of values handled by the translator

Returns
either DBTranslatedValueType::DISCRETE if the translator includes a discrete variable or DBTranslatedValueType::CONTINUOUS if it contains a continuous variable. This is convenient to know how to interpret the DBTranslatedValue instances produced by the DBTranslator: either using their discr_val field or their cont_val field.

◆ hasEditableDictionary()

template<template< typename > class ALLOC = std::allocator>
virtual bool gum::learning::DBTranslator4DiscretizedVariable< ALLOC >::hasEditableDictionary ( ) const
finalvirtual

indicates that the translator is never in editable dictionary mode

Reimplemented from gum::learning::DBTranslator< ALLOC >.

◆ isMissingSymbol()

template<template< typename > class ALLOC = std::allocator>
bool gum::learning::DBTranslator< ALLOC >::isMissingSymbol ( const std::string &  str) const
inherited

indicates whether a string corresponds to a missing symbol

◆ isMissingValue()

template<template< typename > class ALLOC = std::allocator>
bool gum::learning::DBTranslator< ALLOC >::isMissingValue ( const DBTranslatedValue val) const
inherited

indicates whether a translated value corresponds to a missing value

◆ missingSymbols()

template<template< typename > class ALLOC = std::allocator>
const Set< std::string, ALLOC< std::string > >& gum::learning::DBTranslator< ALLOC >::missingSymbols ( ) const
inherited

returns the set of missing symbols taken into account by the translator

◆ missingValue()

template<template< typename > class ALLOC = std::allocator>
virtual DBTranslatedValue gum::learning::DBTranslator4DiscretizedVariable< ALLOC >::missingValue ( ) const
finalvirtual

returns the translation of a missing value

Implements gum::learning::DBTranslator< ALLOC >.

◆ needsReordering()

template<template< typename > class ALLOC = std::allocator>
virtual bool gum::learning::DBTranslator4DiscretizedVariable< ALLOC >::needsReordering ( ) const
finalvirtual

indicates that the translations should never be reordered

Implements gum::learning::DBTranslator< ALLOC >.

◆ operator<<()

template<template< typename > class ALLOC = std::allocator>
DBTranslatedValue gum::learning::DBTranslator< ALLOC >::operator<< ( const std::string &  str)
inherited

alias for method translate

◆ operator=() [1/2]

template<template< typename > class ALLOC = std::allocator>
DBTranslator4DiscretizedVariable< ALLOC >& gum::learning::DBTranslator4DiscretizedVariable< ALLOC >::operator= ( const DBTranslator4DiscretizedVariable< ALLOC > &  from)

copy operator

◆ operator=() [2/2]

template<template< typename > class ALLOC = std::allocator>
DBTranslator4DiscretizedVariable< ALLOC >& gum::learning::DBTranslator4DiscretizedVariable< ALLOC >::operator= ( DBTranslator4DiscretizedVariable< ALLOC > &&  from)

move operator

◆ operator>>()

template<template< typename > class ALLOC = std::allocator>
std::string gum::learning::DBTranslator< ALLOC >::operator>> ( const DBTranslatedValue  translated_val)
inherited

alias for method translateBack

◆ reorder()

template<template< typename > class ALLOC = std::allocator>
virtual HashTable< std::size_t, std::size_t, ALLOC< std::pair< std::size_t, std::size_t > > > gum::learning::DBTranslator4DiscretizedVariable< ALLOC >::reorder ( )
finalvirtual

returns an empty HashTable to indicate that no reordering is needed.

Implements gum::learning::DBTranslator< ALLOC >.

◆ setEditableDictionaryMode()

template<template< typename > class ALLOC = std::allocator>
virtual void gum::learning::DBTranslator4DiscretizedVariable< ALLOC >::setEditableDictionaryMode ( bool  new_mode)
finalvirtual

sets/unset the editable dictionary mode

Reimplemented from gum::learning::DBTranslator< ALLOC >.

◆ setVariableDescription()

template<template< typename > class ALLOC = std::allocator>
void gum::learning::DBTranslator< ALLOC >::setVariableDescription ( const std::string &  str) const
inherited

sets the name of the variable stored into the translator

◆ setVariableName()

template<template< typename > class ALLOC = std::allocator>
void gum::learning::DBTranslator< ALLOC >::setVariableName ( const std::string &  str) const
inherited

sets the name of the variable stored into the translator

◆ translate()

template<template< typename > class ALLOC = std::allocator>
virtual DBTranslatedValue gum::learning::DBTranslator4DiscretizedVariable< ALLOC >::translate ( const std::string &  str)
finalvirtual

returns the translation of a string

This method tries to translate a given string into the DBTranslatedValue that should be stored into a databaseTable. If the translator cannot find the translation in its current dictionary, then the translator raises either a TypeError if the string is not a number or a NotFound exception.

Warning
Note that missing values (i.e., string encoded as missing symbols) are translated as std::numeric_limits<std::size_t>::max ().
If the variable contained into the translator has a discretization interval that contains a missing value symbol, the interval will be taken into account in the translation, not the missing value.
Returns
the translated value of the string to be stored into a DatabaseTable
Exceptions
UnknownLabelInDatabaseis raised if the translation cannot be found.
TypeErroris raised if the translation cannot be found and the translator and the string does not correspond to a number.

Implements gum::learning::DBTranslator< ALLOC >.

◆ translateBack()

template<template< typename > class ALLOC = std::allocator>
virtual std::string gum::learning::DBTranslator4DiscretizedVariable< ALLOC >::translateBack ( const DBTranslatedValue  translated_val) const
finalvirtual

returns the original value for a given translation

Returns
the string that was translated into a given DBTranslatedValue.
Exceptions
UnknownLabelInDatabaseis raised if this original value cannot be found

Implements gum::learning::DBTranslator< ALLOC >.

◆ variable()

template<template< typename > class ALLOC = std::allocator>
virtual const IDiscretizedVariable* gum::learning::DBTranslator4DiscretizedVariable< ALLOC >::variable ( ) const
finalvirtual

returns the variable stored into the translator

Implements gum::learning::DBTranslator< ALLOC >.

Member Data Documentation

◆ back_dico_

template<template< typename > class ALLOC = std::allocator>
Bijection< std::size_t, std::string, ALLOC< std::pair< float, std::string > > > gum::learning::DBTranslator< ALLOC >::back_dico_
mutableprotectedinherited

the bijection relating back translated values and their original strings.

Note that the translated values considered here are of type std::size_t because only the values for discrete variables need be stored, those for continuous variables are actually identity mappings.

Warning
only the values of the random variable are stored into this bijection. Missing values are not considered here.

Definition at line 388 of file DBTranslator.h.

◆ is_dictionary_dynamic_

template<template< typename > class ALLOC = std::allocator>
bool gum::learning::DBTranslator< ALLOC >::is_dictionary_dynamic_
protectedinherited

indicates whether the dictionary can be updated or not

Definition at line 373 of file DBTranslator.h.

◆ max_dico_entries_

template<template< typename > class ALLOC = std::allocator>
std::size_t gum::learning::DBTranslator< ALLOC >::max_dico_entries_
protectedinherited

the maximum number of entries that the dictionary is allowed to contain

Definition at line 376 of file DBTranslator.h.

◆ missing_symbols_

template<template< typename > class ALLOC = std::allocator>
Set< std::string, ALLOC< std::string > > gum::learning::DBTranslator< ALLOC >::missing_symbols_
protectedinherited

the set of missing symbols

Definition at line 379 of file DBTranslator.h.

◆ val_type_

template<template< typename > class ALLOC = std::allocator>
DBTranslatedValueType gum::learning::DBTranslator< ALLOC >::val_type_
protectedinherited

the type of the values translated by the translator

Definition at line 391 of file DBTranslator.h.


The documentation for this class was generated from the following file: