aGrUM  0.14.2
DBRowGeneratorEM_tpl.h
Go to the documentation of this file.
1 /***************************************************************************
2  * Copyright (C) 2005 by Christophe GONZALES and Pierre-Henri WUILLEMIN *
3  * {prenom.nom}_at_lip6.fr *
4  * *
5  * This program is free software; you can redistribute it and/or modify *
6  * it under the terms of the GNU General Public License as published by *
7  * the Free Software Foundation; either version 2 of the License, or *
8  * (at your option) any later version. *
9  * *
10  * This program is distributed in the hope that it will be useful, *
11  * but WITHOUT ANY WARRANTY; without even the implied warranty of *
12  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the *
13  * GNU General Public License for more details. *
14  * *
15  * You should have received a copy of the GNU General Public License *
16  * along with this program; if not, write to the *
17  * Free Software Foundation, Inc., *
18  * 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. *
19  ***************************************************************************/
26 
27 #ifndef DOXYGEN_SHOULD_SKIP_THIS
28 
29 namespace gum {
30 
31  namespace learning {
32 
34  template < typename GUM_SCALAR, template < typename > class ALLOC >
38  }
39 
40 
42  template < typename GUM_SCALAR, template < typename > class ALLOC >
44  const std::vector< DBTranslatedValueType, ALLOC< DBTranslatedValueType > >
45  column_types,
46  const BayesNet< GUM_SCALAR >& bn,
47  const Bijection< NodeId, std::size_t, ALLOC< std::size_t > >&
48  nodeId2columns,
50  alloc) :
51  DBRowGeneratorWithBN< GUM_SCALAR, ALLOC >(
52  column_types,
53  bn,
55  nodeId2columns,
56  alloc),
57  __filled_row1(bn.size(), 1.0, alloc),
58  __filled_row2(bn.size(), 1.0, alloc) {
59  setBayesNet(bn);
60 
61  GUM_CONSTRUCTOR(DBRowGeneratorEM);
62  }
63 
64 
66  template < typename GUM_SCALAR, template < typename > class ALLOC >
68  const DBRowGeneratorEM< GUM_SCALAR, ALLOC >& from,
70  alloc) :
71  DBRowGeneratorWithBN< GUM_SCALAR, ALLOC >(from, alloc),
72  __input_row(from.__input_row), __missing_cols(from.__missing_cols),
73  __nb_miss(from.__nb_miss), __joint_proba(from.__joint_proba),
74  __filled_row1(from.__filled_row1), __filled_row2(from.__filled_row2),
75  __use_filled_row1(from.__use_filled_row1),
76  __original_weight(from.__original_weight) {
77  if (from.__joint_inst != nullptr) {
78  __joint_inst = new Instantiation(__joint_proba);
79  const auto& var_seq = __joint_inst->variablesSequence();
80  const std::size_t size = var_seq.size();
81  for (std::size_t i = std::size_t(0); i < size; ++i) {
82  __joint_inst->chgVal(Idx(i), from.__joint_inst->val(i));
83  }
84  }
85 
86  GUM_CONS_CPY(DBRowGeneratorEM);
87  }
88 
89 
91  template < typename GUM_SCALAR, template < typename > class ALLOC >
93  const DBRowGeneratorEM< GUM_SCALAR, ALLOC >& from) :
94  DBRowGeneratorEM< GUM_SCALAR, ALLOC >(from, from.getAllocator()) {}
95 
96 
98  template < typename GUM_SCALAR, template < typename > class ALLOC >
100  DBRowGeneratorEM< GUM_SCALAR, ALLOC >&& from,
102  alloc) :
103  DBRowGeneratorWithBN< GUM_SCALAR, ALLOC >(std::move(from), alloc),
104  __input_row(from.__input_row),
105  __missing_cols(std::move(from.__missing_cols)), __nb_miss(from.__nb_miss),
106  __joint_proba(std::move(from.__joint_proba)),
107  __filled_row1(std::move(from.__filled_row1)),
108  __filled_row2(std::move(from.__filled_row2)),
109  __use_filled_row1(from.__use_filled_row1),
110  __original_weight(from.__original_weight) {
111  if (from.__joint_inst != nullptr) {
112  __joint_inst = new Instantiation(__joint_proba);
113  const auto& var_seq = __joint_inst->variablesSequence();
114  const std::size_t size = var_seq.size();
115  for (std::size_t i = std::size_t(0); i < size; ++i) {
116  __joint_inst->chgVal(Idx(i), from.__joint_inst->val(i));
117  }
118  }
119 
120  GUM_CONS_MOV(DBRowGeneratorEM);
121  }
122 
123 
125  template < typename GUM_SCALAR, template < typename > class ALLOC >
127  DBRowGeneratorEM< GUM_SCALAR, ALLOC >&& from) :
128  DBRowGeneratorEM< GUM_SCALAR, ALLOC >(std::move(from),
129  from.getAllocator()) {}
130 
131 
133  template < typename GUM_SCALAR, template < typename > class ALLOC >
134  DBRowGeneratorEM< GUM_SCALAR, ALLOC >*
137  alloc) const {
138  ALLOC< DBRowGeneratorEM< GUM_SCALAR, ALLOC > > allocator(alloc);
139  DBRowGeneratorEM< GUM_SCALAR, ALLOC >* generator = allocator.allocate(1);
140  try {
141  allocator.construct(generator, *this, alloc);
142  } catch (...) {
143  allocator.deallocate(generator, 1);
144  throw;
145  }
146  return generator;
147  }
148 
149 
151  template < typename GUM_SCALAR, template < typename > class ALLOC >
152  DBRowGeneratorEM< GUM_SCALAR, ALLOC >*
154  return clone(this->getAllocator());
155  }
156 
157 
159  template < typename GUM_SCALAR, template < typename > class ALLOC >
161  if (__joint_inst != nullptr) delete __joint_inst;
162  GUM_DESTRUCTOR(DBRowGeneratorEM);
163  }
164 
165 
167  template < typename GUM_SCALAR, template < typename > class ALLOC >
168  DBRowGeneratorEM< GUM_SCALAR, ALLOC >& DBRowGeneratorEM< GUM_SCALAR, ALLOC >::
169  operator=(const DBRowGeneratorEM< GUM_SCALAR, ALLOC >& from) {
170  if (this != &from) {
172  __input_row = from.__input_row;
173  __missing_cols = from.__missing_cols;
174  __nb_miss = from.__nb_miss;
175  __joint_proba = from.__joint_proba;
176  __filled_row1 = from.__filled_row1;
177  __filled_row2 = from.__filled_row2;
178  __use_filled_row1 = from.__use_filled_row1;
179  __original_weight = from.__original_weight;
180 
181  if (__joint_inst != nullptr) {
182  delete __joint_inst;
183  __joint_inst = nullptr;
184  }
185 
186  if (from.__joint_inst != nullptr) {
187  __joint_inst = new Instantiation(__joint_proba);
188  const auto& var_seq = __joint_inst->variablesSequence();
189  const std::size_t size = var_seq.size();
190  for (std::size_t i = std::size_t(0); i < size; ++i) {
191  __joint_inst->chgVal(Idx(i), from.__joint_inst->val(i));
192  }
193  }
194  }
195 
196  return *this;
197  }
198 
199 
201  template < typename GUM_SCALAR, template < typename > class ALLOC >
202  DBRowGeneratorEM< GUM_SCALAR, ALLOC >& DBRowGeneratorEM< GUM_SCALAR, ALLOC >::
203  operator=(DBRowGeneratorEM< GUM_SCALAR, ALLOC >&& from) {
204  if (this != &from) {
206  __input_row = from.__input_row;
207  __missing_cols = std::move(from.__missing_cols);
208  __nb_miss = from.__nb_miss;
209  __joint_proba = std::move(from.__joint_proba);
210  __filled_row1 = std::move(from.__filled_row1);
211  __filled_row2 = std::move(from.__filled_row2);
212  __use_filled_row1 = from.__use_filled_row1;
213  __original_weight = from.__original_weight;
214 
215  if (__joint_inst != nullptr) {
216  delete __joint_inst;
217  __joint_inst = nullptr;
218  }
219 
220  if (from.__joint_inst != nullptr) {
221  __joint_inst = new Instantiation(__joint_proba);
222  const auto& var_seq = __joint_inst->variablesSequence();
223  const std::size_t size = var_seq.size();
224  for (std::size_t i = std::size_t(0); i < size; ++i) {
225  __joint_inst->chgVal(Idx(i), from.__joint_inst->val(i));
226  }
227  }
228  }
229 
230  return *this;
231  }
232 
233 
235  template < typename GUM_SCALAR, template < typename > class ALLOC >
236  INLINE const DBRow< DBTranslatedValue, ALLOC >&
238  this->decreaseRemainingRows();
239 
240  // if everything is observed, return the input row
241  if (__input_row != nullptr) return *__input_row;
242 
243  if (__use_filled_row1) {
244  // get the weight of the row from the joint probability
245  __filled_row1.setWeight(__joint_proba.get(*__joint_inst)
246  * __original_weight);
247 
248  // fill the values of the row
249  for (std::size_t i = std::size_t(0); i < __nb_miss; ++i)
250  __filled_row1[__missing_cols[i]].discr_val = __joint_inst->val(i);
251 
252  __joint_inst->inc();
253  __use_filled_row1 = false;
254 
255  return __filled_row1;
256  } else {
257  // get the weight of the row from the joint probability
258  __filled_row2.setWeight(__joint_proba.get(*__joint_inst)
259  * __original_weight);
260 
261  // fill the values of the row
262  for (std::size_t i = std::size_t(0); i < __nb_miss; ++i)
263  __filled_row2[__missing_cols[i]].discr_val = __joint_inst->val(i);
264 
265  __joint_inst->inc();
266  __use_filled_row1 = true;
267 
268  return __filled_row2;
269  }
270  }
271 
272 
274  template < typename GUM_SCALAR, template < typename > class ALLOC >
276  const DBRow< DBTranslatedValue, ALLOC >& row) {
277  // check if there are unobserved values among the columns of interest.
278  // If this is the case, set them as targets
279  bool found_unobserved = false;
280  const auto& xrow = row.row();
281  for (const auto col : this->_columns_of_interest) {
282  switch (this->_column_types[col]) {
284  if (xrow[col].discr_val == std::numeric_limits< std::size_t >::max()) {
285  if (!found_unobserved) {
286  __missing_cols.clear();
287  found_unobserved = true;
288  }
289  __missing_cols.push_back(col);
290  }
291  break;
292 
294  GUM_ERROR(NotImplementedYet,
295  "The BDRowGeneratorEM does not handle yet continuous "
296  << "variables. But the variable in column" << col
297  << " is continuous.");
298  break;
299 
300  default:
301  GUM_ERROR(NotImplementedYet,
302  "DBTranslatedValueType " << int(this->_column_types[col])
303  << " is not supported yet");
304  }
305  }
306 
307  // if there is no unobserved value, make the __input_row point to the row
308  if (!found_unobserved) {
309  __input_row = &row;
310  return std::size_t(1);
311  }
312 
313  __input_row = nullptr;
314  __nb_miss = __missing_cols.size();
315  __original_weight = row.weight();
316 
317  // here, there are missing symbols, so we should compute the distribution
318  // of the missing values. For this purpose, we use Variable Elimination
319  VariableElimination< GUM_SCALAR > ve(this->_bn);
320 
321  // add the targets and fill the output row with the observed values
322  NodeSet target_set(__nb_miss);
323  if (this->_nodeId2columns.empty()) {
324  std::size_t i = std::size_t(0);
325  bool end_miss = false;
326  for (const auto col : this->_columns_of_interest) {
327  if (!end_miss && (col == __missing_cols[i])) {
328  target_set.insert(NodeId(col));
329  ++i;
330  if (i == __nb_miss) end_miss = true;
331  } else {
332  __filled_row1[col].discr_val = xrow[col].discr_val;
333  __filled_row2[col].discr_val = xrow[col].discr_val;
334  }
335  }
336  } else {
337  std::size_t i = std::size_t(0);
338  bool end_miss = false;
339  for (const auto col : this->_columns_of_interest) {
340  if (!end_miss && (col == __missing_cols[i])) {
341  target_set.insert(this->_nodeId2columns.first(col));
342  ++i;
343  if (i == __nb_miss) end_miss = true;
344  } else {
345  __filled_row1[col].discr_val = xrow[col].discr_val;
346  __filled_row2[col].discr_val = xrow[col].discr_val;
347  }
348  }
349  }
350 
351  ve.addJointTarget(target_set);
352 
353  // add the evidence and the target
354  const std::size_t row_size = xrow.size();
355  if (this->_nodeId2columns.empty()) {
356  for (std::size_t col = std::size_t(0); col < row_size; ++col) {
357  switch (this->_column_types[col]) {
359  // only observed values are evidence
360  if (xrow[col].discr_val
361  != std::numeric_limits< std::size_t >::max()) {
362  ve.addEvidence(NodeId(col), xrow[col].discr_val);
363  }
364  break;
365 
367  GUM_ERROR(NotImplementedYet,
368  "The BDRowGeneratorEM does not handle yet continuous "
369  << "variables. But the variable in column" << col
370  << " is continuous.");
371  break;
372 
373  default:
374  GUM_ERROR(NotImplementedYet,
375  "DBTranslatedValueType " << int(this->_column_types[col])
376  << " is not supported yet");
377  }
378  }
379  } else {
380  for (std::size_t col = std::size_t(0); col < row_size; ++col) {
381  switch (this->_column_types[col]) {
383  // only observed values are evidence
384  if (xrow[col].discr_val
385  != std::numeric_limits< std::size_t >::max()) {
386  ve.addEvidence(this->_nodeId2columns.first(col),
387  xrow[col].discr_val);
388  }
389  break;
390 
392  GUM_ERROR(NotImplementedYet,
393  "The BDRowGeneratorEM does not handle yet continuous "
394  << "variables. But the variable in column" << col
395  << " is continuous.");
396  break;
397 
398  default:
399  GUM_ERROR(NotImplementedYet,
400  "DBTranslatedValueType " << int(this->_column_types[col])
401  << " is not supported yet");
402  }
403  }
404  }
405 
406  // get the potential of the target set
407  Potential< GUM_SCALAR >& pot =
408  const_cast< Potential< GUM_SCALAR >& >(ve.jointPosterior(target_set));
409  __joint_proba = std::move(pot);
410  if (__joint_inst != nullptr) delete __joint_inst;
411  __joint_inst = new Instantiation(__joint_proba);
412 
413  // get the mapping between variables of the joint proba and the
414  // columns in the database
415  const auto& var_sequence = __joint_proba.variablesSequence();
416  if (this->_nodeId2columns.empty()) {
417  for (std::size_t i = std::size_t(0); i < __nb_miss; ++i) {
418  __missing_cols[i] = std::size_t(this->_bn->nodeId(*(var_sequence[i])));
419  }
420  } else {
421  for (std::size_t i = std::size_t(0); i < __nb_miss; ++i) {
422  __missing_cols[i] =
423  this->_nodeId2columns.second(this->_bn->nodeId(*(var_sequence[i])));
424  }
425  }
426 
427  return std::size_t(__joint_proba.domainSize());
428  }
429 
430 
432  template < typename GUM_SCALAR, template < typename > class ALLOC >
434  const BayesNet< GUM_SCALAR >& new_bn) {
435  // check that if nodeId2columns is not empty, then all the columns
436  // correspond to nodes of the BN
437  if (!this->_nodeId2columns.empty()) {
438  const DAG& dag = new_bn.dag();
439  for (auto iter = this->_nodeId2columns.begin();
440  iter != this->_nodeId2columns.end();
441  ++iter) {
442  if (!dag.existsNode(iter.first())) {
443  GUM_ERROR(IdError,
444  "Column "
445  << iter.second()
446  << " of the database is associated to Node ID "
447  << iter.first()
448  << ", which does not belong to the Bayesian network");
449  }
450  }
451  }
452 
454 
455  // we determine the size of the filled rows
456  std::size_t size = std::size_t(0);
457  if (this->_nodeId2columns.empty()) {
458  for (auto node : new_bn.dag())
459  if (std::size_t(node) > size) size = std::size_t(node);
460  } else {
461  for (auto iter = this->_nodeId2columns.begin();
462  iter != this->_nodeId2columns.end();
463  ++iter) {
464  if (iter.second() > size) size = iter.second();
465  }
466  }
467  __filled_row1.resize(size + 1);
468  __filled_row2.resize(size + 1);
469  }
470 
471  } /* namespace learning */
472 
473 } /* namespace gum */
474 
475 #endif /* DOXYGEN_SHOULD_SKIP_THIS */
std::vector< std::size_t, ALLOC< std::size_t > > _columns_of_interest
the set of columns of interest
const T2 & second(const T1 &first) const
Returns the second value of a pair given its first value.
DBRowGeneratorGoal
the type of things that a DBRowGenerator is designed for
const T1 & first(const T2 &second) const
Returns the first value of a pair given its second value.
void decreaseRemainingRows()
decrease the number of remaining output rows
Set< NodeId > NodeSet
Some typdefs and define for shortcuts ...
DBRowGeneratorEM(const std::vector< DBTranslatedValueType, ALLOC< DBTranslatedValueType > > column_types, const BayesNet< GUM_SCALAR > &bn, const Bijection< NodeId, std::size_t, ALLOC< std::size_t > > &nodeId2columns=Bijection< NodeId, std::size_t, ALLOC< std::size_t > >(), const allocator_type &alloc=allocator_type())
default constructor
const BayesNet< GUM_SCALAR > * _bn
the Bayesian network used to fill the unobserved values
iterator begin() const
Returns the unsafe iterator at the beginning of the gum::Bijection.
STL namespace.
gum is the global namespace for all aGrUM entities
Definition: agrum.h:25
const iterator & end() const noexcept
Returns the unsafe iterator at the end of the gum::Bijection.
virtual DBRowGeneratorEM< GUM_SCALAR, ALLOC > * clone() const override final
virtual copy constructor
bool empty() const noexcept
Returns true if the gum::Bijection doesn&#39;t contain any association.
DBTranslatedValueType
The nature of the elements handled by translators (discrete, continuous).
allocator_type getAllocator() const
returns the allocator used
DBRowGeneratorWithBN< GUM_SCALAR, ALLOC > & operator=(const DBRowGeneratorWithBN< GUM_SCALAR, ALLOC > &from)
copy operator
Bijection< NodeId, std::size_t, ALLOC< std::size_t > > _nodeId2columns
the mapping betwen the BN&#39;s node ids and the database&#39;s columns
virtual void setBayesNet(const BayesNet< GUM_SCALAR > &new_bn) override final
assign a new Bayes net to the generator
virtual std::size_t _computeRows(const DBRow< DBTranslatedValue, ALLOC > &row) override final
computes the rows it will provide as output
virtual void setBayesNet(const BayesNet< GUM_SCALAR > &new_bn)
assign a new Bayes net to the generator
DBRowGeneratorWithBN(const std::vector< DBTranslatedValueType, ALLOC< DBTranslatedValueType > > column_types, const BayesNet< GUM_SCALAR > &bn, const DBRowGeneratorGoal goal, const Bijection< NodeId, std::size_t, ALLOC< std::size_t > > &nodeId2columns=Bijection< NodeId, std::size_t, ALLOC< std::size_t > >(), const allocator_type &alloc=allocator_type())
default constructor
virtual const DBRow< DBTranslatedValue, ALLOC > & generate() override final
generates one ouput DBRow for each DBRow passed to method setInputRow
std::vector< DBTranslatedValueType, ALLOC< DBTranslatedValueType > > _column_types
the types of the columns in the DatabaseTable
Size Idx
Type for indexes.
Definition: types.h:50
DBRowGeneratorEM< GUM_SCALAR, ALLOC > & operator=(const DBRowGeneratorEM< GUM_SCALAR, ALLOC > &from)
copy operator
A DBRowGenerator class that returns exactly the rows it gets in input.
Size NodeId
Type for node ids.
Definition: graphElements.h:97
#define GUM_ERROR(type, msg)
Definition: exceptions.h:52
allocator_type getAllocator() const
returns the allocator used
ALLOC< DBTranslatedValue > allocator_type
type for the allocators passed in arguments of methods