aGrUM  0.20.3
a C++ library for (probabilistic) graphical models
independenceTest_tpl.h
Go to the documentation of this file.
1 /**
2  *
3  * Copyright (c) 2005-2021 by Pierre-Henri WUILLEMIN(@LIP6) & Christophe GONZALES(@AMU)
4  * info_at_agrum_dot_org
5  *
6  * This library is free software: you can redistribute it and/or modify
7  * it under the terms of the GNU Lesser General Public License as published by
8  * the Free Software Foundation, either version 3 of the License, or
9  * (at your option) any later version.
10  *
11  * This library is distributed in the hope that it will be useful,
12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14  * GNU Lesser General Public License for more details.
15  *
16  * You should have received a copy of the GNU Lesser General Public License
17  * along with this library. If not, see <http://www.gnu.org/licenses/>.
18  *
19  */
20 
21 
22 /** @file
23  * @brief the base class for all the independence tests used for learning
24  *
25  * @author Christophe GONZALES(@AMU) and Pierre-Henri WUILLEMIN(@LIP6)
26  */
27 #ifndef DOXYGEN_SHOULD_SKIP_THIS
28 
29 namespace gum {
30 
31  namespace learning {
32 
33  /// returns the allocator used by the independence test
34  template < template < typename > class ALLOC >
36  IndependenceTest< ALLOC >::getAllocator() const {
37  return counter_.getAllocator();
38  }
39 
40 
41  /// default constructor
42  template < template < typename > class ALLOC >
45  const Apriori< ALLOC >& apriori,
46  const std::vector< std::pair< std::size_t, std::size_t >,
47  ALLOC< std::pair< std::size_t, std::size_t > > >& ranges,
49  const typename IndependenceTest< ALLOC >::allocator_type& alloc) :
53  }
54 
55 
56  /// default constructor
57  template < template < typename > class ALLOC >
60  const Apriori< ALLOC >& apriori,
62  const typename IndependenceTest< ALLOC >::allocator_type& alloc) :
66  }
67 
68 
69  /// copy constructor with a given allocator
70  template < template < typename > class ALLOC >
72  const IndependenceTest< ALLOC >& from,
73  const typename IndependenceTest< ALLOC >::allocator_type& alloc) :
77  }
78 
79 
80  /// copy constructor
81  template < template < typename > class ALLOC >
84 
85 
86  /// move constructor
87  template < template < typename > class ALLOC >
90  const typename IndependenceTest< ALLOC >::allocator_type& alloc) :
94  from.apriori_ = nullptr;
96  }
97 
98 
99  /// move constructor
100  template < template < typename > class ALLOC >
103 
104 
105  /// destructor
106  template < template < typename > class ALLOC >
108  if (apriori_ != nullptr) {
109  ALLOC< Apriori< ALLOC > > allocator(this->getAllocator());
112  }
114  }
115 
116 
117  /// copy operator
118  template < template < typename > class ALLOC >
121  if (this != &from) {
125 
126  if (apriori_ != nullptr) {
127  ALLOC< Apriori< ALLOC > > allocator(this->getAllocator());
130  }
131 
134  cache_ = std::move(new_cache);
135 
137  }
138  return *this;
139  }
140 
141 
142  /// move operator
143  template < template < typename > class ALLOC >
146  if (this != &from) {
148 
150  cache_ = std::move(from.cache_);
152  }
153  return *this;
154  }
155 
156 
157  /// changes the max number of threads used to parse the database
158  template < template < typename > class ALLOC >
161  }
162 
163 
164  /// returns the number of threads used to parse the database
165  template < template < typename > class ALLOC >
167  return counter_.nbThreads();
168  }
169 
170 
171  /** @brief changes the number min of rows a thread should process in a
172  * multithreading context */
173  template < template < typename > class ALLOC >
176  }
177 
178 
179  /// returns the minimum of rows that each thread should process
180  template < template < typename > class ALLOC >
182  return counter_.minNbRowsPerThread();
183  }
184 
185 
186  /// sets new ranges to perform the countings used by the score
187  /** @param ranges a set of pairs {(X1,Y1),...,(Xn,Yn)} of database's rows
188  * indices. The countings are then performed only on the union of the
189  * rows [Xi,Yi), i in {1,...,n}. This is useful, e.g, when performing
190  * cross validation tasks, in which part of the database should be ignored.
191  * An empty set of ranges is equivalent to an interval [X,Y) ranging over
192  * the whole database. */
193  template < template < typename > class ALLOC >
194  template < template < typename > class XALLOC >
196  const std::vector< std::pair< std::size_t, std::size_t >,
197  XALLOC< std::pair< std::size_t, std::size_t > > >& new_ranges) {
198  std::vector< std::pair< std::size_t, std::size_t >,
199  ALLOC< std::pair< std::size_t, std::size_t > > >
200  old_ranges = ranges();
202  if (old_ranges != ranges()) clear();
203  }
204 
205 
206  /// reset the ranges to the one range corresponding to the whole database
207  template < template < typename > class ALLOC >
208  void IndependenceTest< ALLOC >::clearRanges() {
209  std::vector< std::pair< std::size_t, std::size_t >,
210  ALLOC< std::pair< std::size_t, std::size_t > > >
211  old_ranges = ranges();
213  if (old_ranges != ranges()) clear();
214  }
215 
216 
217  /// returns the current ranges
218  template < template < typename > class ALLOC >
219  INLINE const std::vector< std::pair< std::size_t, std::size_t >,
220  ALLOC< std::pair< std::size_t, std::size_t > > >&
221  IndependenceTest< ALLOC >::ranges() const {
222  return counter_.ranges();
223  }
224 
225 
226  /// returns the score of a pair of nodes
227  template < template < typename > class ALLOC >
228  INLINE double IndependenceTest< ALLOC >::score(const NodeId var1, const NodeId var2) {
229  IdCondSet< ALLOC > idset(var1, var2, empty_ids_, false, true, this->getAllocator());
230  if (use_cache_) {
231  try {
232  return cache_.score(idset);
233  } catch (NotFound&) {}
234  double the_score = score_(idset);
236  return the_score;
237  } else {
238  return score_(std::move(idset));
239  }
240  }
241 
242 
243  /// returns the score of a pair of nodes given some other nodes
244  template < template < typename > class ALLOC >
245  INLINE double
247  const NodeId var2,
248  const std::vector< NodeId, ALLOC< NodeId > >& rhs_ids) {
249  IdCondSet< ALLOC > idset(var1, var2, rhs_ids, false, false, this->getAllocator());
250  if (use_cache_) {
251  try {
252  return cache_.score(idset);
253  } catch (NotFound&) {}
254  double the_score = score_(idset);
256  return the_score;
257  } else {
258  return score_(idset);
259  }
260  }
261 
262 
263  /// clears all the data structures from memory
264  template < template < typename > class ALLOC >
265  INLINE void IndependenceTest< ALLOC >::clear() {
266  counter_.clear();
267  cache_.clear();
268  }
269 
270 
271  /// clears the current cache (clear nodesets as well)
272  template < template < typename > class ALLOC >
274  cache_.clear();
275  }
276 
277 
278  /// turn on/off the use of a cache of the previously computed score
279  template < template < typename > class ALLOC >
280  INLINE void IndependenceTest< ALLOC >::useCache(const bool on_off) {
281  use_cache_ = on_off;
282  }
283 
284 
285  /// return the mapping between the columns of the database and the node ids
286  template < template < typename > class ALLOC >
287  INLINE const Bijection< NodeId, std::size_t, ALLOC< std::size_t > >&
289  return counter_.nodeId2Columns();
290  }
291 
292 
293  /// return the database used by the score
294  template < template < typename > class ALLOC >
295  INLINE const DatabaseTable< ALLOC >& IndependenceTest< ALLOC >::database() const {
296  return counter_.database();
297  }
298 
299 
300  /// returns a counting vector where variables are marginalized from N_xyz
301  /** @param node_2_marginalize indicates which node(s) shall be marginalized:
302  * - 0 means that X should be marginalized
303  * - 1 means that Y should be marginalized
304  * - 2 means that Z should be marginalized
305  */
306  template < template < typename > class ALLOC >
307  std::vector< double, ALLOC< double > > IndependenceTest< ALLOC >::marginalize_(
309  const std::size_t X_size,
310  const std::size_t Y_size,
311  const std::size_t Z_size,
312  const std::vector< double, ALLOC< double > >& N_xyz) const {
313  // determine the size of the output vector
315  if (node_2_marginalize == std::size_t(0))
316  out_size *= Y_size;
317  else if (node_2_marginalize == std::size_t(1))
318  out_size *= X_size;
319 
320  // allocate the output vector
321  std::vector< double, ALLOC< double > > res(out_size, 0.0);
322 
323  // fill the vector:
324  if (node_2_marginalize == std::size_t(0)) { // marginalize X
325  for (std::size_t yz = std::size_t(0), xyz = std::size_t(0); yz < out_size; ++yz) {
326  for (std::size_t x = std::size_t(0); x < X_size; ++x, ++xyz) {
327  res[yz] += N_xyz[xyz];
328  }
329  }
330  } else if (node_2_marginalize == std::size_t(1)) { // marginalize Y
331  for (std::size_t z = std::size_t(0), xyz = std::size_t(0), beg_xz = std::size_t(0);
332  z < Z_size;
333  ++z, beg_xz += X_size) {
334  for (std::size_t y = std::size_t(0); y < Y_size; ++y) {
335  for (std::size_t x = std::size_t(0), xz = beg_xz; x < X_size; ++x, ++xz, ++xyz) {
336  res[xz] += N_xyz[xyz];
337  }
338  }
339  }
340  } else if (node_2_marginalize == std::size_t(2)) { // marginalize X and Y
341  const std::size_t XY_size = X_size * Y_size;
342  for (std::size_t z = std::size_t(0), xyz = std::size_t(0); z < out_size; ++z) {
343  for (std::size_t xy = std::size_t(0); xy < XY_size; ++xy, ++xyz) {
344  res[z] += N_xyz[xyz];
345  }
346  }
347  } else {
349  "_marginalize not implemented for nodeset " << node_2_marginalize);
350  }
351 
352  return res;
353  }
354 
355  } /* namespace learning */
356 
357 } /* namespace gum */
358 
359 #endif /* DOXYGEN_SHOULD_SKIP_THIS */
INLINE void emplace(Args &&... args)
Definition: set_tpl.h:643
Database(const std::string &filename, const BayesNet< GUM_SCALAR > &bn, const std::vector< std::string > &missing_symbols)