aGrUM  0.20.2
a C++ library for (probabilistic) graphical models
aprioriDirichletFromDatabase_tpl.h
Go to the documentation of this file.
1 /**
2  *
3  * Copyright 2005-2020 Pierre-Henri WUILLEMIN(@LIP6) & Christophe GONZALES(@AMU)
4  * info_at_agrum_dot_org
5  *
6  * This library is free software: you can redistribute it and/or modify
7  * it under the terms of the GNU Lesser General Public License as published by
8  * the Free Software Foundation, either version 3 of the License, or
9  * (at your option) any later version.
10  *
11  * This library is distributed in the hope that it will be useful,
12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14  * GNU Lesser General Public License for more details.
15  *
16  * You should have received a copy of the GNU Lesser General Public License
17  * along with this library. If not, see <http://www.gnu.org/licenses/>.
18  *
19  */
20 
21 
22 /** @file
23  * @brief A dirichlet priori: computes its N'_ijk from a database
24  *
25  * @author Christophe GONZALES(@AMU) and Pierre-Henri WUILLEMIN(@LIP6)
26  */
27 #ifndef DOXYGEN_SHOULD_SKIP_THIS
28 
29 namespace gum {
30 
31  namespace learning {
32 
33 
34  /// default constructor
35  template < template < typename > class ALLOC >
36  AprioriDirichletFromDatabase< ALLOC >::AprioriDirichletFromDatabase(
37  const DatabaseTable< ALLOC >& learning_db,
38  const DBRowGeneratorParser< ALLOC >& apriori_parser,
39  const Bijection< NodeId, std::size_t, ALLOC< std::size_t > >&
40  nodeId2columns,
41  const typename AprioriDirichletFromDatabase< ALLOC >::allocator_type&
42  alloc) :
43  Apriori< ALLOC >(apriori_parser.database(),
44  Bijection< NodeId, std::size_t, ALLOC< std::size_t > >(),
45  alloc),
46  counter__(
47  apriori_parser,
48  std::vector< std::pair< std::size_t, std::size_t >,
49  ALLOC< std::pair< std::size_t, std::size_t > > >(alloc),
50  nodeId2columns,
51  alloc) {
52  // we check that the variables in the learning database also exist in the
53  // apriori database and that they are precisely equal.
54  const DatabaseTable< ALLOC >& apriori_db = apriori_parser.database();
55  const auto& apriori_names = apriori_db.variableNames();
56  const std::size_t apriori_size = apriori_names.size();
57  HashTable< std::string, std::size_t > names2col(apriori_size);
58  for (std::size_t i = std::size_t(0); i < apriori_size; ++i)
59  names2col.insert(apriori_names[i], i);
60 
61  const auto& learning_names = learning_db.variableNames();
62  const std::size_t learning_size = learning_names.size();
63  HashTable< std::size_t, std::size_t > learning2apriori_index(learning_size);
64  bool different_index = false;
65  for (std::size_t i = std::size_t(0); i < learning_size; ++i) {
66  // get the column of the variable in the apriori database
67  std::size_t apriori_col;
68  try {
69  apriori_col = names2col[learning_names[i]];
70  } catch (...) {
71  GUM_ERROR(DatabaseError,
72  "Variable " << learning_names[i]
73  << " could not be found in the apriori database");
74  }
75 
76  // check that both variables are the same
77  const Variable& learning_var = learning_db.variable(i);
78  const Variable& apriori_var = apriori_db.variable(apriori_col);
79  if (learning_var.varType() != apriori_var.varType()) {
80  GUM_ERROR(DatabaseError,
81  "Variable "
82  << learning_names[i]
83  << " has not the same type in the learning database "
84  " and the apriori database");
85  }
86  if (learning_var.domain() != apriori_var.domain()) {
87  GUM_ERROR(DatabaseError,
88  "Variable " << learning_names[i] << " has domain "
89  << learning_var.domain()
90  << " in the learning database and domain "
91  << apriori_var.domain()
92  << " in the aprioi database");
93  }
94 
95  // save the mapping from i to col
96  learning2apriori_index.insert(i, apriori_col);
97  if (i != apriori_col) different_index = true;
98  }
99 
100  // here we are guaranteed that the variables in the learning database
101  // have their equivalent in the a priori database. Now, we should
102  // fill the bijection from ids to columns
103  if (!different_index) {
104  this->nodeId2columns_ = nodeId2columns;
105  } else {
106  if (nodeId2columns.empty()) {
107  for (std::size_t i = std::size_t(0); i < learning_size; ++i) {
108  this->nodeId2columns_.insert(NodeId(i), learning2apriori_index[i]);
109  }
110  } else {
111  for (auto iter = nodeId2columns.begin(); iter != nodeId2columns.end();
112  ++iter) {
113  this->nodeId2columns_.insert(iter.first(),
114  learning2apriori_index[iter.second()]);
115  }
116  }
117  }
118 
119  // recreate the record counter with the appropriate node2col mapping
120  std::vector< std::pair< std::size_t, std::size_t >,
121  ALLOC< std::pair< std::size_t, std::size_t > > >
122  ranges(alloc);
123  RecordCounter< ALLOC > good_counter(apriori_parser,
124  ranges,
125  this->nodeId2columns_,
126  alloc);
127  counter__ = std::move(good_counter);
128 
129  if (apriori_db.nbRows() == std::size_t(0))
130  internal_weight__ = 0.0;
131  else
132  internal_weight__ = this->weight_ / apriori_db.nbRows();
133 
134  GUM_CONSTRUCTOR(AprioriDirichletFromDatabase);
135  }
136 
137 
138  /// copy constructor with a given allocator
139  template < template < typename > class ALLOC >
143  alloc) :
144  Apriori< ALLOC >(from, alloc),
148  }
149 
150 
151  /// copy constructor
152  template < template < typename > class ALLOC >
156 
157 
158  /// move constructor with a given allocator
159  template < template < typename > class ALLOC >
163  alloc) :
164  Apriori< ALLOC >(std::move(from), alloc),
168  }
169 
170 
171  /// move constructor
172  template < template < typename > class ALLOC >
176  from.getAllocator()) {}
177 
178 
179  /// virtual copy constructor with a given allocator
180  template < template < typename > class ALLOC >
184  alloc) const {
187  try {
188  allocator.construct(apriori, *this, alloc);
189  } catch (...) {
191  throw;
192  }
193 
194  return apriori;
195  }
196 
197 
198  /// virtual copy constructor
199  template < template < typename > class ALLOC >
202  return clone(this->getAllocator());
203  }
204 
205 
206  /// destructor
207  template < template < typename > class ALLOC >
210  }
211 
212 
213  /// copy operator
214  template < template < typename > class ALLOC >
218  if (this != &from) {
219  Apriori< ALLOC >::operator=(from);
222  }
223  return *this;
224  }
225 
226 
227  /// move operator
228  template < template < typename > class ALLOC >
232  if (this != &from) {
233  Apriori< ALLOC >::operator=(std::move(from));
236  }
237  return *this;
238  }
239 
240 
241  /// indicates whether an apriori is of a certain type
242  template < template < typename > class ALLOC >
243  INLINE bool
246  }
247 
248 
249  /// returns the type of the apriori
250  template < template < typename > class ALLOC >
251  INLINE const std::string&
253  return AprioriDirichletType::type;
254  }
255 
256 
257  /// indicates whether the apriori is potentially informative
258  template < template < typename > class ALLOC >
260  return (this->weight_ != 0.0);
261  }
262 
263 
264  /// sets the weight of the a priori (kind of effective sample size)
265  template < template < typename > class ALLOC >
266  INLINE void
269  if (counter__.database().nbRows() == 0.0)
270  internal_weight__ = 0.0;
271  else
273  }
274 
275 
276  /// returns the apriori vector all the variables in the idset
277  template < template < typename > class ALLOC >
279  const IdCondSet< ALLOC >& idset,
280  std::vector< double, ALLOC< double > >& counts) {
281  if (this->weight_ == 0.0) return;
282 
283  const auto& apriori = counter__.counts(idset);
284  const std::size_t size = apriori.size();
285  if (internal_weight__ != 1.0) {
286  for (std::size_t i = std::size_t(0); i < size; ++i) {
288  }
289  } else {
290  for (std::size_t i = std::size_t(0); i < size; ++i) {
291  counts[i] += apriori[i];
292  }
293  }
294  }
295 
296 
297  /// returns the apriori vector over only the conditioning set of an idset
298  template < template < typename > class ALLOC >
300  const IdCondSet< ALLOC >& idset,
301  std::vector< double, ALLOC< double > >& counts) {
302  if (internal_weight__ == 0.0) return;
303 
305  const std::size_t size = apriori.size();
306  if (internal_weight__ != 1.0) {
307  for (std::size_t i = std::size_t(0); i < size; ++i) {
309  }
310  } else {
311  for (std::size_t i = std::size_t(0); i < size; ++i) {
312  counts[i] += apriori[i];
313  }
314  }
315  }
316 
317 
318  } /* namespace learning */
319 
320 } /* namespace gum */
321 
322 #endif /* DOXYGEN_SHOULD_SKIP_THIS */
INLINE void emplace(Args &&... args)
Definition: set_tpl.h:669
Database(const std::string &filename, const BayesNet< GUM_SCALAR > &bn, const std::vector< std::string > &missing_symbols)