aGrUM  0.14.2
DBTranslator4LabelizedVariable_tpl.h
Go to the documentation of this file.
1 /***************************************************************************
2  * Copyright (C) 2005 by Christophe GONZALES and Pierre-Henri WUILLEMIN *
3  * {prenom.nom}_at_lip6.fr *
4  * *
5  * This program is free software; you can redistribute it and/or modify *
6  * it under the terms of the GNU General Public License as published by *
7  * the Free Software Foundation; either version 2 of the License, or *
8  * (at your option) any later version. *
9  * *
10  * This program is distributed in the hope that it will be useful, *
11  * but WITHOUT ANY WARRANTY; without even the implied warranty of *
12  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the *
13  * GNU General Public License for more details. *
14  * *
15  * You should have received a copy of the GNU General Public License *
16  * along with this program; if not, write to the *
17  * Free Software Foundation, Inc., *
18  * 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. *
19  ***************************************************************************/
26 #include <utility>
27 #include <vector>
28 
31 
32 #ifndef DOXYGEN_SHOULD_SKIP_THIS
33 
34 namespace gum {
35 
36  namespace learning {
37 
38 
40  template < template < typename > class ALLOC >
41  template < template < typename > class XALLOC >
43  const std::vector< std::string, XALLOC< std::string > >& missing_symbols,
44  std::size_t max_dico_entries,
46  alloc) :
47  DBTranslator< ALLOC >(DBTranslatedValueType::DISCRETE,
48  missing_symbols,
49  true,
50  max_dico_entries,
51  alloc),
52  __variable("var", "", 0) {
53  GUM_CONSTRUCTOR(DBTranslator4LabelizedVariable);
54  }
55 
56 
58  template < template < typename > class ALLOC >
60  std::size_t max_dico_entries,
62  alloc) :
63  DBTranslator< ALLOC >(
64  DBTranslatedValueType::DISCRETE, true, max_dico_entries, alloc),
65  __variable("var", "", 0) {
66  GUM_CONSTRUCTOR(DBTranslator4LabelizedVariable);
67  }
68 
69 
71  template < template < typename > class ALLOC >
72  template < template < typename > class XALLOC >
74  const LabelizedVariable& var,
75  const std::vector< std::string, XALLOC< std::string > >& missing_symbols,
76  const bool editable_dictionary,
77  std::size_t max_dico_entries,
79  alloc) :
81  missing_symbols,
82  editable_dictionary,
83  max_dico_entries,
84  alloc),
85  __variable(var) {
86  // check that the variable has not too many entries
87  if (var.domainSize() > max_dico_entries) {
88  GUM_ERROR(SizeError,
89  "the dictionary induced by the variable is too large");
90  }
91 
92  // add the content of the variable into the back dictionary
93  std::size_t size = 0;
94  for (const auto& label : var.labels()) {
95  // if the label corresponds to a missing value, then remove it from
96  // the set of missing symbols.
97  if (this->_missing_symbols.exists(label)) {
98  this->_missing_symbols.erase(label);
99  }
100 
101  // insert the label into the back_dictionary
102  this->_back_dico.insert(size, label);
103  ++size;
104  }
105 
106  GUM_CONSTRUCTOR(DBTranslator4LabelizedVariable);
107  }
108 
109 
111  template < template < typename > class ALLOC >
113  const LabelizedVariable& var,
114  const bool editable_dictionary,
115  std::size_t max_dico_entries,
117  alloc) :
119  editable_dictionary,
120  max_dico_entries,
121  alloc),
122  __variable(var) {
123  // check that the variable has not too many entries
124  if (var.domainSize() > max_dico_entries) {
125  GUM_ERROR(SizeError,
126  "the dictionary induced by the variable is too large");
127  }
128 
129  // add the content of the variable into the back dictionary
130  std::size_t size = 0;
131  for (const auto& label : var.labels()) {
132  // insert the label into the back_dictionary
133  this->_back_dico.insert(size, label);
134  ++size;
135  }
136 
137  GUM_CONSTRUCTOR(DBTranslator4LabelizedVariable);
138  }
139 
140 
142  template < template < typename > class ALLOC >
144  const DBTranslator4LabelizedVariable< ALLOC >& from,
146  alloc) :
147  DBTranslator< ALLOC >(from, alloc),
148  __variable(from.__variable) {
149  GUM_CONS_CPY(DBTranslator4LabelizedVariable);
150  }
151 
152 
154  template < template < typename > class ALLOC >
156  const DBTranslator4LabelizedVariable< ALLOC >& from) :
157  DBTranslator4LabelizedVariable< ALLOC >(from, from.getAllocator()) {}
158 
159 
161  template < template < typename > class ALLOC >
163  DBTranslator4LabelizedVariable< ALLOC >&& from,
165  alloc) :
166  DBTranslator< ALLOC >(std::move(from), alloc),
167  __variable(std::move(from.__variable)) {
168  GUM_CONS_MOV(DBTranslator4LabelizedVariable);
169  }
170 
171 
173  template < template < typename > class ALLOC >
175  DBTranslator4LabelizedVariable< ALLOC >&& from) :
176  DBTranslator4LabelizedVariable< ALLOC >(std::move(from),
177  from.getAllocator()) {}
178 
179 
181  template < template < typename > class ALLOC >
182  DBTranslator4LabelizedVariable< ALLOC >*
185  alloc) const {
186  ALLOC< DBTranslator4LabelizedVariable< ALLOC > > allocator(alloc);
187  DBTranslator4LabelizedVariable< ALLOC >* translator = allocator.allocate(1);
188  try {
189  allocator.construct(translator, *this, alloc);
190  } catch (...) {
191  allocator.deallocate(translator, 1);
192  throw;
193  }
194  return translator;
195  }
196 
197 
199  template < template < typename > class ALLOC >
200  INLINE DBTranslator4LabelizedVariable< ALLOC >*
202  return clone(this->getAllocator());
203  }
204 
205 
207  template < template < typename > class ALLOC >
208  INLINE
210  GUM_DESTRUCTOR(DBTranslator4LabelizedVariable);
211  }
212 
213 
215  template < template < typename > class ALLOC >
216  DBTranslator4LabelizedVariable< ALLOC >&
218  operator=(const DBTranslator4LabelizedVariable< ALLOC >& from) {
219  if (this != &from) {
221  __variable = from.__variable;
222  }
223 
224  return *this;
225  }
226 
227 
229  template < template < typename > class ALLOC >
230  DBTranslator4LabelizedVariable< ALLOC >&
232  operator=(DBTranslator4LabelizedVariable< ALLOC >&& from) {
233  if (this != &from) {
234  DBTranslator< ALLOC >::operator=(std::move(from));
235  __variable = std::move(from.__variable);
236  }
237 
238  return *this;
239  }
240 
241 
243  template < template < typename > class ALLOC >
244  DBTranslatedValue
246  // try to get the index of str within the labelized variable. If this
247  // cannot be found, try to find if this corresponds to a missing value.
248  // Finally, if this is still not a missing value and, if enabled, try
249  // to add str as a new label
250  try {
251  return DBTranslatedValue{std::size_t(__variable[str])};
252  } catch (gum::Exception&) {
253  // check that this is not a missing value
254  if (this->isMissingSymbol(str)) {
255  return DBTranslatedValue{std::numeric_limits< std::size_t >::max()};
256  }
257 
258  // try to add str as a new value if possible
259  if (this->hasEditableDictionary()) {
260  const std::size_t size = __variable.domainSize();
261  if (size >= this->_max_dico_entries)
262  GUM_ERROR(SizeError,
263  "String \"" << str << "\" cannot be translated "
264  << "because the dictionary is already full");
265  __variable.addLabel(str);
266  this->_back_dico.insert(size, str);
267  return DBTranslatedValue{size};
268  } else
269  GUM_ERROR(UnknownLabelInDatabase,
270  "The translation of \"" << str << "\" could not be found");
271  }
272  }
273 
274 
276  template < template < typename > class ALLOC >
278  const DBTranslatedValue translated_val) const {
279  try {
280  return this->_back_dico.second(translated_val.discr_val);
281  } catch (Exception&) {
282  // check if this is a missing value
283  if ((translated_val.discr_val == std::numeric_limits< std::size_t >::max())
284  && !this->_missing_symbols.empty())
285  return *(this->_missing_symbols.begin());
286  else
287  GUM_ERROR(UnknownLabelInDatabase,
288  "The back translation of \"" << translated_val.discr_val
289  << "\" could not be found");
290  }
291  }
292 
293 
295  template < template < typename > class ALLOC >
297  // if the variable contains only numbers, they should be increasing
298  const auto& labels = __variable.labels();
299  float last_number = std::numeric_limits< float >::lowest();
300  float number;
301  bool only_numbers = true;
302  for (const auto& label : labels) {
303  if (!DBCell::isReal(label)) {
304  only_numbers = false;
305  break;
306  }
307  number = std::stof(label);
308  if (number < last_number) return true;
309  last_number = number;
310  }
311 
312  if (!only_numbers) {
313  // here we shall examine whether the strings are sorted by
314  // lexicographical order
315  const std::size_t size = labels.size();
316  for (std::size_t i = 1; i < size; ++i) {
317  if (labels[i] < labels[i - 1]) return true;
318  }
319  }
320 
321  return false;
322  }
323 
324 
326  template < template < typename > class ALLOC >
327  HashTable< std::size_t,
328  std::size_t,
329  ALLOC< std::pair< std::size_t, std::size_t > > >
331  // check whether the variable contains only numeric values. In this
332  // case, we have to sort the values by increasing number
333  const auto& labels = __variable.labels();
334  const std::size_t size = labels.size();
335 
336  bool only_numbers = true;
337  for (const auto& label : labels) {
338  if (!DBCell::isReal(label)) {
339  only_numbers = false;
340  break;
341  }
342  }
343 
344  // assign to each label its current index
345  std::vector< std::pair< std::size_t, std::string >,
346  ALLOC< std::pair< std::size_t, std::string > > >
347  xlabels;
348  xlabels.reserve(size);
349  for (std::size_t i = std::size_t(0); i < size; ++i)
350  xlabels.push_back(std::make_pair(i, labels[i]));
351 
352  // reorder by increasing order
353  if (only_numbers)
354  std::sort(xlabels.begin(),
355  xlabels.end(),
356  [](const std::pair< std::size_t, std::string >& a,
357  const std::pair< std::size_t, std::string >& b) -> bool {
358  return std::stof(a.second) < std::stof(b.second);
359  });
360  else
361  std::sort(xlabels.begin(),
362  xlabels.end(),
363  [](const std::pair< std::size_t, std::string >& a,
364  const std::pair< std::size_t, std::string >& b) -> bool {
365  return a.second < b.second;
366  });
367 
368  // check whether there were any modification
369  bool modifications = false;
370  for (std::size_t i = std::size_t(0); i < size; ++i) {
371  if (xlabels[i].first != i) {
372  modifications = true;
373  break;
374  }
375  }
376 
377  // if there were no modification, return an empty update hashtable
378  if (!modifications) {
379  return HashTable< std::size_t,
380  std::size_t,
381  ALLOC< std::pair< std::size_t, std::size_t > > >();
382  }
383 
384  // recreate the variable so that the labels correspond to the
385  // new ordering
386  __variable.eraseLabels();
387  for (auto& label : xlabels)
388  __variable.addLabel(label.second);
389 
390  // create the hashTable corresponding to the mapping from the old
391  // indices to the new one
392  this->_back_dico.clear();
393  HashTable< std::size_t,
394  std::size_t,
395  ALLOC< std::pair< std::size_t, std::size_t > > >
396  mapping((Size)size);
397  for (std::size_t i = std::size_t(0); i < size; ++i) {
398  mapping.insert(xlabels[i].first, i);
399  this->_back_dico.insert(i, xlabels[i].second);
400  }
401 
402  return mapping;
403  }
404 
405 
407  template < template < typename > class ALLOC >
408  INLINE std::size_t
410  return __variable.domainSize();
411  }
412 
413 
415  template < template < typename > class ALLOC >
416  INLINE const LabelizedVariable*
418  return &__variable;
419  }
420 
421 
423  template < template < typename > class ALLOC >
424  INLINE DBTranslatedValue
426  return DBTranslatedValue{std::numeric_limits< std::size_t >::max()};
427  }
428 
429 
430  } /* namespace learning */
431 
432 } /* namespace gum */
433 
434 
435 #endif /* DOXYGEN_SHOULD_SKIP_THIS */
Bijection< std::size_t, std::string, ALLOC< std::pair< float, std::string > > > _back_dico
the bijection relating back translated values and their original strings.
Definition: DBTranslator.h:393
void insert(const T1 &first, const T2 &second)
Inserts a new association in the gum::Bijection.
const T2 & second(const T1 &first) const
Returns the second value of a pair given its first value.
virtual const LabelizedVariable * variable() const final
returns the variable stored into the translator
virtual DBTranslatedValue missingValue() const final
returns the translation of a missing value
void clear()
Removes all the associations from the gum::Bijection.
virtual ~DBTranslator4LabelizedVariable()
destructor
std::size_t _max_dico_entries
the maximum number of entries that the dictionary is allowed to contain
Definition: DBTranslator.h:379
DBTranslator4LabelizedVariable(const std::vector< std::string, XALLOC< std::string > > &missing_symbols, std::size_t max_dico_entries=std::numeric_limits< std::size_t >::max(), const allocator_type &alloc=allocator_type())
default constructor without any initial variable
virtual std::size_t domainSize() const final
returns the domain size of a variable corresponding to the translations
STL namespace.
void erase(const Key &k)
Erases an element from the set.
Definition: set_tpl.h:653
virtual std::string translateBack(const DBTranslatedValue translated_val) const final
returns the original value for a given translation
gum is the global namespace for all aGrUM entities
Definition: agrum.h:25
DBTranslator(DBTranslatedValueType val_type, const std::vector< std::string, XALLOC< std::string > > &missing_symbols, const bool editable_dictionary=true, std::size_t max_dico_entries=std::numeric_limits< std::size_t >::max(), const allocator_type &alloc=allocator_type())
default constructor
iterator begin() const
The usual unsafe begin iterator to parse the set.
Definition: set_tpl.h:514
bool exists(const Key &k) const
Indicates whether a given elements belong to the set.
Definition: set_tpl.h:604
DBTranslator4LabelizedVariable< ALLOC > & operator=(const DBTranslator4LabelizedVariable< ALLOC > &from)
copy operator
allocator_type getAllocator() const
returns the allocator used by the translator
The class representing the original values of the cells of databases.
DBTranslatedValueType
The nature of the elements handled by translators (discrete, continuous).
static bool isReal(const std::string &str)
determine whether a string corresponds precisely to a real number
Set< std::string, ALLOC< std::string > > _missing_symbols
the set of missing symbols
Definition: DBTranslator.h:382
DBTranslator< ALLOC > & operator=(const DBTranslator< ALLOC > &from)
copy operator
virtual bool needsReordering() const final
indicates whether a reordering is needed to make the translations sorted
Base class for all aGrUM&#39;s exceptions.
Definition: exceptions.h:103
virtual DBTranslatedValue translate(const std::string &str) final
returns the translation of a string
virtual DBTranslator4LabelizedVariable< ALLOC > * clone() const
virtual copy constructor
virtual bool hasEditableDictionary() const
indicates whether the translator has an editable dictionary or not
bool isMissingSymbol(const std::string &str) const
indicates whether a string corresponds to a missing symbol
virtual HashTable< std::size_t, std::size_t, ALLOC< std::pair< std::size_t, std::size_t > > > reorder() final
performs a reordering of the dictionary and returns a mapping from the old translated values to the n...
std::size_t Size
In aGrUM, hashed values are unsigned long int.
Definition: types.h:45
The databases&#39; cell translators for labelized variables.
typename DBTranslator< ALLOC >::allocator_type allocator_type
type for the allocators passed in arguments of methods
#define GUM_ERROR(type, msg)
Definition: exceptions.h:52