aGrUM  0.16.0
DBRowGeneratorEM_tpl.h
Go to the documentation of this file.
1 
29 
30 #ifndef DOXYGEN_SHOULD_SKIP_THIS
31 
32 namespace gum {
33 
34  namespace learning {
35 
37  template < typename GUM_SCALAR, template < typename > class ALLOC >
41  }
42 
43 
45  template < typename GUM_SCALAR, template < typename > class ALLOC >
47  const std::vector< DBTranslatedValueType, ALLOC< DBTranslatedValueType > >
48  column_types,
49  const BayesNet< GUM_SCALAR >& bn,
50  const Bijection< NodeId, std::size_t, ALLOC< std::size_t > >&
51  nodeId2columns,
53  alloc) :
54  DBRowGeneratorWithBN< GUM_SCALAR, ALLOC >(
55  column_types,
56  bn,
58  nodeId2columns,
59  alloc),
60  __filled_row1(bn.size(), 1.0, alloc),
61  __filled_row2(bn.size(), 1.0, alloc) {
62  setBayesNet(bn);
63 
64  GUM_CONSTRUCTOR(DBRowGeneratorEM);
65  }
66 
67 
69  template < typename GUM_SCALAR, template < typename > class ALLOC >
71  const DBRowGeneratorEM< GUM_SCALAR, ALLOC >& from,
73  alloc) :
74  DBRowGeneratorWithBN< GUM_SCALAR, ALLOC >(from, alloc),
75  __input_row(from.__input_row), __missing_cols(from.__missing_cols),
76  __nb_miss(from.__nb_miss), __joint_proba(from.__joint_proba),
77  __filled_row1(from.__filled_row1), __filled_row2(from.__filled_row2),
78  __use_filled_row1(from.__use_filled_row1),
79  __original_weight(from.__original_weight) {
80  if (from.__joint_inst != nullptr) {
81  __joint_inst = new Instantiation(__joint_proba);
82  const auto& var_seq = __joint_inst->variablesSequence();
83  const std::size_t size = var_seq.size();
84  for (std::size_t i = std::size_t(0); i < size; ++i) {
85  __joint_inst->chgVal(Idx(i), from.__joint_inst->val(i));
86  }
87  }
88 
89  GUM_CONS_CPY(DBRowGeneratorEM);
90  }
91 
92 
94  template < typename GUM_SCALAR, template < typename > class ALLOC >
96  const DBRowGeneratorEM< GUM_SCALAR, ALLOC >& from) :
97  DBRowGeneratorEM< GUM_SCALAR, ALLOC >(from, from.getAllocator()) {}
98 
99 
101  template < typename GUM_SCALAR, template < typename > class ALLOC >
103  DBRowGeneratorEM< GUM_SCALAR, ALLOC >&& from,
105  alloc) :
106  DBRowGeneratorWithBN< GUM_SCALAR, ALLOC >(std::move(from), alloc),
107  __input_row(from.__input_row),
108  __missing_cols(std::move(from.__missing_cols)), __nb_miss(from.__nb_miss),
109  __joint_proba(std::move(from.__joint_proba)),
110  __filled_row1(std::move(from.__filled_row1)),
111  __filled_row2(std::move(from.__filled_row2)),
112  __use_filled_row1(from.__use_filled_row1),
113  __original_weight(from.__original_weight) {
114  if (from.__joint_inst != nullptr) {
115  __joint_inst = new Instantiation(__joint_proba);
116  const auto& var_seq = __joint_inst->variablesSequence();
117  const std::size_t size = var_seq.size();
118  for (std::size_t i = std::size_t(0); i < size; ++i) {
119  __joint_inst->chgVal(Idx(i), from.__joint_inst->val(i));
120  }
121  }
122 
123  GUM_CONS_MOV(DBRowGeneratorEM);
124  }
125 
126 
128  template < typename GUM_SCALAR, template < typename > class ALLOC >
130  DBRowGeneratorEM< GUM_SCALAR, ALLOC >&& from) :
131  DBRowGeneratorEM< GUM_SCALAR, ALLOC >(std::move(from),
132  from.getAllocator()) {}
133 
134 
136  template < typename GUM_SCALAR, template < typename > class ALLOC >
137  DBRowGeneratorEM< GUM_SCALAR, ALLOC >*
140  alloc) const {
141  ALLOC< DBRowGeneratorEM< GUM_SCALAR, ALLOC > > allocator(alloc);
142  DBRowGeneratorEM< GUM_SCALAR, ALLOC >* generator = allocator.allocate(1);
143  try {
144  allocator.construct(generator, *this, alloc);
145  } catch (...) {
146  allocator.deallocate(generator, 1);
147  throw;
148  }
149  return generator;
150  }
151 
152 
154  template < typename GUM_SCALAR, template < typename > class ALLOC >
155  DBRowGeneratorEM< GUM_SCALAR, ALLOC >*
157  return clone(this->getAllocator());
158  }
159 
160 
162  template < typename GUM_SCALAR, template < typename > class ALLOC >
164  if (__joint_inst != nullptr) delete __joint_inst;
165  GUM_DESTRUCTOR(DBRowGeneratorEM);
166  }
167 
168 
170  template < typename GUM_SCALAR, template < typename > class ALLOC >
171  DBRowGeneratorEM< GUM_SCALAR, ALLOC >& DBRowGeneratorEM< GUM_SCALAR, ALLOC >::
172  operator=(const DBRowGeneratorEM< GUM_SCALAR, ALLOC >& from) {
173  if (this != &from) {
175  __input_row = from.__input_row;
176  __missing_cols = from.__missing_cols;
177  __nb_miss = from.__nb_miss;
178  __joint_proba = from.__joint_proba;
179  __filled_row1 = from.__filled_row1;
180  __filled_row2 = from.__filled_row2;
181  __use_filled_row1 = from.__use_filled_row1;
182  __original_weight = from.__original_weight;
183 
184  if (__joint_inst != nullptr) {
185  delete __joint_inst;
186  __joint_inst = nullptr;
187  }
188 
189  if (from.__joint_inst != nullptr) {
190  __joint_inst = new Instantiation(__joint_proba);
191  const auto& var_seq = __joint_inst->variablesSequence();
192  const std::size_t size = var_seq.size();
193  for (std::size_t i = std::size_t(0); i < size; ++i) {
194  __joint_inst->chgVal(Idx(i), from.__joint_inst->val(i));
195  }
196  }
197  }
198 
199  return *this;
200  }
201 
202 
204  template < typename GUM_SCALAR, template < typename > class ALLOC >
205  DBRowGeneratorEM< GUM_SCALAR, ALLOC >& DBRowGeneratorEM< GUM_SCALAR, ALLOC >::
206  operator=(DBRowGeneratorEM< GUM_SCALAR, ALLOC >&& from) {
207  if (this != &from) {
209  __input_row = from.__input_row;
210  __missing_cols = std::move(from.__missing_cols);
211  __nb_miss = from.__nb_miss;
212  __joint_proba = std::move(from.__joint_proba);
213  __filled_row1 = std::move(from.__filled_row1);
214  __filled_row2 = std::move(from.__filled_row2);
215  __use_filled_row1 = from.__use_filled_row1;
216  __original_weight = from.__original_weight;
217 
218  if (__joint_inst != nullptr) {
219  delete __joint_inst;
220  __joint_inst = nullptr;
221  }
222 
223  if (from.__joint_inst != nullptr) {
224  __joint_inst = new Instantiation(__joint_proba);
225  const auto& var_seq = __joint_inst->variablesSequence();
226  const std::size_t size = var_seq.size();
227  for (std::size_t i = std::size_t(0); i < size; ++i) {
228  __joint_inst->chgVal(Idx(i), from.__joint_inst->val(i));
229  }
230  }
231  }
232 
233  return *this;
234  }
235 
236 
238  template < typename GUM_SCALAR, template < typename > class ALLOC >
239  INLINE const DBRow< DBTranslatedValue, ALLOC >&
241  this->decreaseRemainingRows();
242 
243  // if everything is observed, return the input row
244  if (__input_row != nullptr) return *__input_row;
245 
246  if (__use_filled_row1) {
247  // get the weight of the row from the joint probability
248  __filled_row1.setWeight(__joint_proba.get(*__joint_inst)
249  * __original_weight);
250 
251  // fill the values of the row
252  for (std::size_t i = std::size_t(0); i < __nb_miss; ++i)
253  __filled_row1[__missing_cols[i]].discr_val = __joint_inst->val(i);
254 
255  __joint_inst->inc();
256  __use_filled_row1 = false;
257 
258  return __filled_row1;
259  } else {
260  // get the weight of the row from the joint probability
261  __filled_row2.setWeight(__joint_proba.get(*__joint_inst)
262  * __original_weight);
263 
264  // fill the values of the row
265  for (std::size_t i = std::size_t(0); i < __nb_miss; ++i)
266  __filled_row2[__missing_cols[i]].discr_val = __joint_inst->val(i);
267 
268  __joint_inst->inc();
269  __use_filled_row1 = true;
270 
271  return __filled_row2;
272  }
273  }
274 
275 
277  template < typename GUM_SCALAR, template < typename > class ALLOC >
279  const DBRow< DBTranslatedValue, ALLOC >& row) {
280  // check if there are unobserved values among the columns of interest.
281  // If this is the case, set them as targets
282  bool found_unobserved = false;
283  const auto& xrow = row.row();
284  for (const auto col : this->_columns_of_interest) {
285  switch (this->_column_types[col]) {
287  if (xrow[col].discr_val == std::numeric_limits< std::size_t >::max()) {
288  if (!found_unobserved) {
289  __missing_cols.clear();
290  found_unobserved = true;
291  }
292  __missing_cols.push_back(col);
293  }
294  break;
295 
297  GUM_ERROR(NotImplementedYet,
298  "The BDRowGeneratorEM does not handle yet continuous "
299  << "variables. But the variable in column" << col
300  << " is continuous.");
301  break;
302 
303  default:
304  GUM_ERROR(NotImplementedYet,
305  "DBTranslatedValueType " << int(this->_column_types[col])
306  << " is not supported yet");
307  }
308  }
309 
310  // if there is no unobserved value, make the __input_row point to the row
311  if (!found_unobserved) {
312  __input_row = &row;
313  return std::size_t(1);
314  }
315 
316  __input_row = nullptr;
317  __nb_miss = __missing_cols.size();
318  __original_weight = row.weight();
319 
320  // here, there are missing symbols, so we should compute the distribution
321  // of the missing values. For this purpose, we use Variable Elimination
322  VariableElimination< GUM_SCALAR > ve(this->_bn);
323 
324  // add the targets and fill the output row with the observed values
325  NodeSet target_set(__nb_miss);
326  if (this->_nodeId2columns.empty()) {
327  std::size_t i = std::size_t(0);
328  bool end_miss = false;
329  for (const auto col : this->_columns_of_interest) {
330  if (!end_miss && (col == __missing_cols[i])) {
331  target_set.insert(NodeId(col));
332  ++i;
333  if (i == __nb_miss) end_miss = true;
334  } else {
335  __filled_row1[col].discr_val = xrow[col].discr_val;
336  __filled_row2[col].discr_val = xrow[col].discr_val;
337  }
338  }
339  } else {
340  std::size_t i = std::size_t(0);
341  bool end_miss = false;
342  for (const auto col : this->_columns_of_interest) {
343  if (!end_miss && (col == __missing_cols[i])) {
344  target_set.insert(this->_nodeId2columns.first(col));
345  ++i;
346  if (i == __nb_miss) end_miss = true;
347  } else {
348  __filled_row1[col].discr_val = xrow[col].discr_val;
349  __filled_row2[col].discr_val = xrow[col].discr_val;
350  }
351  }
352  }
353 
354  ve.addJointTarget(target_set);
355 
356  // add the evidence and the target
357  const std::size_t row_size = xrow.size();
358  if (this->_nodeId2columns.empty()) {
359  for (std::size_t col = std::size_t(0); col < row_size; ++col) {
360  switch (this->_column_types[col]) {
362  // only observed values are evidence
363  if (xrow[col].discr_val
364  != std::numeric_limits< std::size_t >::max()) {
365  ve.addEvidence(NodeId(col), xrow[col].discr_val);
366  }
367  break;
368 
370  GUM_ERROR(NotImplementedYet,
371  "The BDRowGeneratorEM does not handle yet continuous "
372  << "variables. But the variable in column" << col
373  << " is continuous.");
374  break;
375 
376  default:
377  GUM_ERROR(NotImplementedYet,
378  "DBTranslatedValueType " << int(this->_column_types[col])
379  << " is not supported yet");
380  }
381  }
382  } else {
383  for (std::size_t col = std::size_t(0); col < row_size; ++col) {
384  switch (this->_column_types[col]) {
386  // only observed values are evidence
387  if (xrow[col].discr_val
388  != std::numeric_limits< std::size_t >::max()) {
389  ve.addEvidence(this->_nodeId2columns.first(col),
390  xrow[col].discr_val);
391  }
392  break;
393 
395  GUM_ERROR(NotImplementedYet,
396  "The BDRowGeneratorEM does not handle yet continuous "
397  << "variables. But the variable in column" << col
398  << " is continuous.");
399  break;
400 
401  default:
402  GUM_ERROR(NotImplementedYet,
403  "DBTranslatedValueType " << int(this->_column_types[col])
404  << " is not supported yet");
405  }
406  }
407  }
408 
409  // get the potential of the target set
410  Potential< GUM_SCALAR >& pot =
411  const_cast< Potential< GUM_SCALAR >& >(ve.jointPosterior(target_set));
412  __joint_proba = std::move(pot);
413  if (__joint_inst != nullptr) delete __joint_inst;
414  __joint_inst = new Instantiation(__joint_proba);
415 
416  // get the mapping between variables of the joint proba and the
417  // columns in the database
418  const auto& var_sequence = __joint_proba.variablesSequence();
419  if (this->_nodeId2columns.empty()) {
420  for (std::size_t i = std::size_t(0); i < __nb_miss; ++i) {
421  __missing_cols[i] = std::size_t(this->_bn->nodeId(*(var_sequence[i])));
422  }
423  } else {
424  for (std::size_t i = std::size_t(0); i < __nb_miss; ++i) {
425  __missing_cols[i] =
426  this->_nodeId2columns.second(this->_bn->nodeId(*(var_sequence[i])));
427  }
428  }
429 
430  return std::size_t(__joint_proba.domainSize());
431  }
432 
433 
435  template < typename GUM_SCALAR, template < typename > class ALLOC >
437  const BayesNet< GUM_SCALAR >& new_bn) {
438  // check that if nodeId2columns is not empty, then all the columns
439  // correspond to nodes of the BN
440  if (!this->_nodeId2columns.empty()) {
441  const DAG& dag = new_bn.dag();
442  for (auto iter = this->_nodeId2columns.begin();
443  iter != this->_nodeId2columns.end();
444  ++iter) {
445  if (!dag.existsNode(iter.first())) {
446  GUM_ERROR(IdError,
447  "Column "
448  << iter.second()
449  << " of the database is associated to Node ID "
450  << iter.first()
451  << ", which does not belong to the Bayesian network");
452  }
453  }
454  }
455 
457 
458  // we determine the size of the filled rows
459  std::size_t size = std::size_t(0);
460  if (this->_nodeId2columns.empty()) {
461  for (auto node : new_bn.dag())
462  if (std::size_t(node) > size) size = std::size_t(node);
463  } else {
464  for (auto iter = this->_nodeId2columns.begin();
465  iter != this->_nodeId2columns.end();
466  ++iter) {
467  if (iter.second() > size) size = iter.second();
468  }
469  }
470  __filled_row1.resize(size + 1);
471  __filled_row2.resize(size + 1);
472  }
473 
474  } /* namespace learning */
475 
476 } /* namespace gum */
477 
478 #endif /* DOXYGEN_SHOULD_SKIP_THIS */
std::vector< std::size_t, ALLOC< std::size_t > > _columns_of_interest
the set of columns of interest
const T2 & second(const T1 &first) const
Returns the second value of a pair given its first value.
DBRowGeneratorGoal
the type of things that a DBRowGenerator is designed for
const T1 & first(const T2 &second) const
Returns the first value of a pair given its second value.
void decreaseRemainingRows()
decrease the number of remaining output rows
Set< NodeId > NodeSet
Some typdefs and define for shortcuts ...
DBRowGeneratorEM(const std::vector< DBTranslatedValueType, ALLOC< DBTranslatedValueType > > column_types, const BayesNet< GUM_SCALAR > &bn, const Bijection< NodeId, std::size_t, ALLOC< std::size_t > > &nodeId2columns=Bijection< NodeId, std::size_t, ALLOC< std::size_t > >(), const allocator_type &alloc=allocator_type())
default constructor
const BayesNet< GUM_SCALAR > * _bn
the Bayesian network used to fill the unobserved values
iterator begin() const
Returns the unsafe iterator at the beginning of the gum::Bijection.
STL namespace.
Copyright 2005-2019 Pierre-Henri WUILLEMIN et Christophe GONZALES (LIP6) {prenom.nom}_at_lip6.fr.
Definition: agrum.h:25
const iterator & end() const noexcept
Returns the unsafe iterator at the end of the gum::Bijection.
virtual DBRowGeneratorEM< GUM_SCALAR, ALLOC > * clone() const override final
virtual copy constructor
bool empty() const noexcept
Returns true if the gum::Bijection doesn&#39;t contain any association.
DBTranslatedValueType
The nature of the elements handled by translators (discrete, continuous).
allocator_type getAllocator() const
returns the allocator used
DBRowGeneratorWithBN< GUM_SCALAR, ALLOC > & operator=(const DBRowGeneratorWithBN< GUM_SCALAR, ALLOC > &from)
copy operator
Bijection< NodeId, std::size_t, ALLOC< std::size_t > > _nodeId2columns
the mapping betwen the BN&#39;s node ids and the database&#39;s columns
virtual void setBayesNet(const BayesNet< GUM_SCALAR > &new_bn) override final
assign a new Bayes net to the generator
virtual std::size_t _computeRows(const DBRow< DBTranslatedValue, ALLOC > &row) override final
computes the rows it will provide as output
virtual void setBayesNet(const BayesNet< GUM_SCALAR > &new_bn)
assign a new Bayes net to the generator
DBRowGeneratorWithBN(const std::vector< DBTranslatedValueType, ALLOC< DBTranslatedValueType > > column_types, const BayesNet< GUM_SCALAR > &bn, const DBRowGeneratorGoal goal, const Bijection< NodeId, std::size_t, ALLOC< std::size_t > > &nodeId2columns=Bijection< NodeId, std::size_t, ALLOC< std::size_t > >(), const allocator_type &alloc=allocator_type())
default constructor
virtual const DBRow< DBTranslatedValue, ALLOC > & generate() override final
generates one ouput DBRow for each DBRow passed to method setInputRow
std::vector< DBTranslatedValueType, ALLOC< DBTranslatedValueType > > _column_types
the types of the columns in the DatabaseTable
Size Idx
Type for indexes.
Definition: types.h:53
DBRowGeneratorEM< GUM_SCALAR, ALLOC > & operator=(const DBRowGeneratorEM< GUM_SCALAR, ALLOC > &from)
copy operator
Copyright 2005-2019 Pierre-Henri WUILLEMIN et Christophe GONZALES (LIP6) {prenom.nom}_at_lip6.fr.
Size NodeId
Type for node ids.
Definition: graphElements.h:98
#define GUM_ERROR(type, msg)
Definition: exceptions.h:55
allocator_type getAllocator() const
returns the allocator used
ALLOC< DBTranslatedValue > allocator_type
type for the allocators passed in arguments of methods