aGrUM  0.20.3
a C++ library for (probabilistic) graphical models
aprioriDirichletFromDatabase_tpl.h
Go to the documentation of this file.
1 /**
2  *
3  * Copyright (c) 2005-2021 by Pierre-Henri WUILLEMIN(@LIP6) & Christophe GONZALES(@AMU)
4  * info_at_agrum_dot_org
5  *
6  * This library is free software: you can redistribute it and/or modify
7  * it under the terms of the GNU Lesser General Public License as published by
8  * the Free Software Foundation, either version 3 of the License, or
9  * (at your option) any later version.
10  *
11  * This library is distributed in the hope that it will be useful,
12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14  * GNU Lesser General Public License for more details.
15  *
16  * You should have received a copy of the GNU Lesser General Public License
17  * along with this library. If not, see <http://www.gnu.org/licenses/>.
18  *
19  */
20 
21 
22 /** @file
23  * @brief A dirichlet priori: computes its N'_ijk from a database
24  *
25  * @author Christophe GONZALES(@AMU) and Pierre-Henri WUILLEMIN(@LIP6)
26  */
27 #ifndef DOXYGEN_SHOULD_SKIP_THIS
28 
29 namespace gum {
30 
31  namespace learning {
32 
33 
34  /// default constructor
35  template < template < typename > class ALLOC >
36  AprioriDirichletFromDatabase< ALLOC >::AprioriDirichletFromDatabase(
37  const DatabaseTable< ALLOC >& learning_db,
38  const DBRowGeneratorParser< ALLOC >& apriori_parser,
39  const Bijection< NodeId, std::size_t, ALLOC< std::size_t > >& nodeId2columns,
40  const typename AprioriDirichletFromDatabase< ALLOC >::allocator_type& alloc) :
41  Apriori< ALLOC >(apriori_parser.database(),
42  Bijection< NodeId, std::size_t, ALLOC< std::size_t > >(),
43  alloc),
44  _counter_(apriori_parser,
45  std::vector< std::pair< std::size_t, std::size_t >,
46  ALLOC< std::pair< std::size_t, std::size_t > > >(alloc),
47  nodeId2columns,
48  alloc) {
49  // we check that the variables in the learning database also exist in the
50  // apriori database and that they are precisely equal.
51  const DatabaseTable< ALLOC >& apriori_db = apriori_parser.database();
52  const auto& apriori_names = apriori_db.variableNames();
53  const std::size_t apriori_size = apriori_names.size();
54  HashTable< std::string, std::size_t > names2col(apriori_size);
55  for (std::size_t i = std::size_t(0); i < apriori_size; ++i)
56  names2col.insert(apriori_names[i], i);
57 
58  const auto& learning_names = learning_db.variableNames();
59  const std::size_t learning_size = learning_names.size();
60  HashTable< std::size_t, std::size_t > learning2apriori_index(learning_size);
61  bool different_index = false;
62  for (std::size_t i = std::size_t(0); i < learning_size; ++i) {
63  // get the column of the variable in the apriori database
64  std::size_t apriori_col;
65  try {
66  apriori_col = names2col[learning_names[i]];
67  } catch (...) {
68  GUM_ERROR(DatabaseError,
69  "Variable " << learning_names[i]
70  << " could not be found in the apriori database");
71  }
72 
73  // check that both variables are the same
74  const Variable& learning_var = learning_db.variable(i);
75  const Variable& apriori_var = apriori_db.variable(apriori_col);
76  if (learning_var.varType() != apriori_var.varType()) {
77  GUM_ERROR(DatabaseError,
78  "Variable " << learning_names[i]
79  << " has not the same type in the learning database "
80  " and the apriori database");
81  }
82  if (learning_var.domain() != apriori_var.domain()) {
83  GUM_ERROR(DatabaseError,
84  "Variable " << learning_names[i] << " has domain " << learning_var.domain()
85  << " in the learning database and domain " << apriori_var.domain()
86  << " in the aprioi database");
87  }
88 
89  // save the mapping from i to col
90  learning2apriori_index.insert(i, apriori_col);
91  if (i != apriori_col) different_index = true;
92  }
93 
94  // here we are guaranteed that the variables in the learning database
95  // have their equivalent in the a priori database. Now, we should
96  // fill the bijection from ids to columns
97  if (!different_index) {
98  this->nodeId2columns_ = nodeId2columns;
99  } else {
100  if (nodeId2columns.empty()) {
101  for (std::size_t i = std::size_t(0); i < learning_size; ++i) {
102  this->nodeId2columns_.insert(NodeId(i), learning2apriori_index[i]);
103  }
104  } else {
105  for (auto iter = nodeId2columns.begin(); iter != nodeId2columns.end(); ++iter) {
106  this->nodeId2columns_.insert(iter.first(), learning2apriori_index[iter.second()]);
107  }
108  }
109  }
110 
111  // recreate the record counter with the appropriate node2col mapping
112  std::vector< std::pair< std::size_t, std::size_t >,
113  ALLOC< std::pair< std::size_t, std::size_t > > >
114  ranges(alloc);
115  RecordCounter< ALLOC > good_counter(apriori_parser, ranges, this->nodeId2columns_, alloc);
116  _counter_ = std::move(good_counter);
117 
118  if (apriori_db.nbRows() == std::size_t(0))
119  _internal_weight_ = 0.0;
120  else
121  _internal_weight_ = this->weight_ / apriori_db.nbRows();
122 
123  GUM_CONSTRUCTOR(AprioriDirichletFromDatabase);
124  }
125 
126 
127  /// copy constructor with a given allocator
128  template < template < typename > class ALLOC >
132  Apriori< ALLOC >(from, alloc),
135  }
136 
137 
138  /// copy constructor
139  template < template < typename > class ALLOC >
143 
144 
145  /// move constructor with a given allocator
146  template < template < typename > class ALLOC >
150  Apriori< ALLOC >(std::move(from), alloc),
153  }
154 
155 
156  /// move constructor
157  template < template < typename > class ALLOC >
161 
162 
163  /// virtual copy constructor with a given allocator
164  template < template < typename > class ALLOC >
166  const typename AprioriDirichletFromDatabase< ALLOC >::allocator_type& alloc) const {
169  try {
170  allocator.construct(apriori, *this, alloc);
171  } catch (...) {
173  throw;
174  }
175 
176  return apriori;
177  }
178 
179 
180  /// virtual copy constructor
181  template < template < typename > class ALLOC >
184  return clone(this->getAllocator());
185  }
186 
187 
188  /// destructor
189  template < template < typename > class ALLOC >
192  }
193 
194 
195  /// copy operator
196  template < template < typename > class ALLOC >
199  if (this != &from) {
200  Apriori< ALLOC >::operator=(from);
203  }
204  return *this;
205  }
206 
207 
208  /// move operator
209  template < template < typename > class ALLOC >
212  if (this != &from) {
213  Apriori< ALLOC >::operator=(std::move(from));
216  }
217  return *this;
218  }
219 
220 
221  /// indicates whether an apriori is of a certain type
222  template < template < typename > class ALLOC >
225  }
226 
227 
228  /// returns the type of the apriori
229  template < template < typename > class ALLOC >
231  return AprioriDirichletType::type;
232  }
233 
234 
235  /// indicates whether the apriori is potentially informative
236  template < template < typename > class ALLOC >
238  return (this->weight_ != 0.0);
239  }
240 
241 
242  /// sets the weight of the a priori (kind of effective sample size)
243  template < template < typename > class ALLOC >
244  INLINE void AprioriDirichletFromDatabase< ALLOC >::setWeight(const double weight) {
246  if (_counter_.database().nbRows() == 0.0)
247  _internal_weight_ = 0.0;
248  else
250  }
251 
252 
253  /// returns the apriori vector all the variables in the idset
254  template < template < typename > class ALLOC >
256  const IdCondSet< ALLOC >& idset,
257  std::vector< double, ALLOC< double > >& counts) {
258  if (this->weight_ == 0.0) return;
259 
260  const auto& apriori = _counter_.counts(idset);
261  const std::size_t size = apriori.size();
262  if (_internal_weight_ != 1.0) {
263  for (std::size_t i = std::size_t(0); i < size; ++i) {
265  }
266  } else {
267  for (std::size_t i = std::size_t(0); i < size; ++i) {
268  counts[i] += apriori[i];
269  }
270  }
271  }
272 
273 
274  /// returns the apriori vector over only the conditioning set of an idset
275  template < template < typename > class ALLOC >
277  const IdCondSet< ALLOC >& idset,
278  std::vector< double, ALLOC< double > >& counts) {
279  if (_internal_weight_ == 0.0) return;
280 
282  const std::size_t size = apriori.size();
283  if (_internal_weight_ != 1.0) {
284  for (std::size_t i = std::size_t(0); i < size; ++i) {
286  }
287  } else {
288  for (std::size_t i = std::size_t(0); i < size; ++i) {
289  counts[i] += apriori[i];
290  }
291  }
292  }
293 
294 
295  } /* namespace learning */
296 
297 } /* namespace gum */
298 
299 #endif /* DOXYGEN_SHOULD_SKIP_THIS */
INLINE void emplace(Args &&... args)
Definition: set_tpl.h:643
Database(const std::string &filename, const BayesNet< GUM_SCALAR > &bn, const std::vector< std::string > &missing_symbols)