aGrUM  0.20.2
a C++ library for (probabilistic) graphical models
independenceTest_tpl.h
Go to the documentation of this file.
1 /**
2  *
3  * Copyright 2005-2020 Pierre-Henri WUILLEMIN(@LIP6) & Christophe GONZALES(@AMU)
4  * info_at_agrum_dot_org
5  *
6  * This library is free software: you can redistribute it and/or modify
7  * it under the terms of the GNU Lesser General Public License as published by
8  * the Free Software Foundation, either version 3 of the License, or
9  * (at your option) any later version.
10  *
11  * This library is distributed in the hope that it will be useful,
12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14  * GNU Lesser General Public License for more details.
15  *
16  * You should have received a copy of the GNU Lesser General Public License
17  * along with this library. If not, see <http://www.gnu.org/licenses/>.
18  *
19  */
20 
21 
22 /** @file
23  * @brief the base class for all the independence tests used for learning
24  *
25  * @author Christophe GONZALES(@AMU) and Pierre-Henri WUILLEMIN(@LIP6)
26  */
27 #ifndef DOXYGEN_SHOULD_SKIP_THIS
28 
29 namespace gum {
30 
31  namespace learning {
32 
33  /// returns the allocator used by the independence test
34  template < template < typename > class ALLOC >
36  IndependenceTest< ALLOC >::getAllocator() const {
37  return counter_.getAllocator();
38  }
39 
40 
41  /// default constructor
42  template < template < typename > class ALLOC >
45  const Apriori< ALLOC >& apriori,
46  const std::vector< std::pair< std::size_t, std::size_t >,
47  ALLOC< std::pair< std::size_t, std::size_t > > >& ranges,
48  const Bijection< NodeId, std::size_t, ALLOC< std::size_t > >&
50  const typename IndependenceTest< ALLOC >::allocator_type& alloc) :
54  }
55 
56 
57  /// default constructor
58  template < template < typename > class ALLOC >
61  const Apriori< ALLOC >& apriori,
62  const Bijection< NodeId, std::size_t, ALLOC< std::size_t > >&
64  const typename IndependenceTest< ALLOC >::allocator_type& alloc) :
68  }
69 
70 
71  /// copy constructor with a given allocator
72  template < template < typename > class ALLOC >
74  const IndependenceTest< ALLOC >& from,
75  const typename IndependenceTest< ALLOC >::allocator_type& alloc) :
80  }
81 
82 
83  /// copy constructor
84  template < template < typename > class ALLOC >
86  const IndependenceTest< ALLOC >& from) :
88 
89 
90  /// move constructor
91  template < template < typename > class ALLOC >
94  const typename IndependenceTest< ALLOC >::allocator_type& alloc) :
98  from.apriori_ = nullptr;
100  }
101 
102 
103  /// move constructor
104  template < template < typename > class ALLOC >
106  IndependenceTest< ALLOC >&& from) :
108 
109 
110  /// destructor
111  template < template < typename > class ALLOC >
113  if (apriori_ != nullptr) {
114  ALLOC< Apriori< ALLOC > > allocator(this->getAllocator());
117  }
119  }
120 
121 
122  /// copy operator
123  template < template < typename > class ALLOC >
125  const IndependenceTest< ALLOC >& from) {
126  if (this != &from) {
130 
131  if (apriori_ != nullptr) {
132  ALLOC< Apriori< ALLOC > > allocator(this->getAllocator());
135  }
136 
139  cache_ = std::move(new_cache);
140 
142  }
143  return *this;
144  }
145 
146 
147  /// move operator
148  template < template < typename > class ALLOC >
151  if (this != &from) {
153 
155  cache_ = std::move(from.cache_);
157  }
158  return *this;
159  }
160 
161 
162  /// changes the max number of threads used to parse the database
163  template < template < typename > class ALLOC >
166  }
167 
168 
169  /// returns the number of threads used to parse the database
170  template < template < typename > class ALLOC >
172  return counter_.nbThreads();
173  }
174 
175 
176  /** @brief changes the number min of rows a thread should process in a
177  * multithreading context */
178  template < template < typename > class ALLOC >
180  const std::size_t nb) const {
182  }
183 
184 
185  /// returns the minimum of rows that each thread should process
186  template < template < typename > class ALLOC >
188  return counter_.minNbRowsPerThread();
189  }
190 
191 
192  /// sets new ranges to perform the countings used by the score
193  /** @param ranges a set of pairs {(X1,Y1),...,(Xn,Yn)} of database's rows
194  * indices. The countings are then performed only on the union of the
195  * rows [Xi,Yi), i in {1,...,n}. This is useful, e.g, when performing
196  * cross validation tasks, in which part of the database should be ignored.
197  * An empty set of ranges is equivalent to an interval [X,Y) ranging over
198  * the whole database. */
199  template < template < typename > class ALLOC >
200  template < template < typename > class XALLOC >
202  const std::vector< std::pair< std::size_t, std::size_t >,
203  XALLOC< std::pair< std::size_t, std::size_t > > >&
204  new_ranges) {
205  std::vector< std::pair< std::size_t, std::size_t >,
206  ALLOC< std::pair< std::size_t, std::size_t > > >
207  old_ranges = ranges();
209  if (old_ranges != ranges()) clear();
210  }
211 
212 
213  /// reset the ranges to the one range corresponding to the whole database
214  template < template < typename > class ALLOC >
215  void IndependenceTest< ALLOC >::clearRanges() {
216  std::vector< std::pair< std::size_t, std::size_t >,
217  ALLOC< std::pair< std::size_t, std::size_t > > >
218  old_ranges = ranges();
220  if (old_ranges != ranges()) clear();
221  }
222 
223 
224  /// returns the current ranges
225  template < template < typename > class ALLOC >
226  INLINE const std::vector< std::pair< std::size_t, std::size_t >,
227  ALLOC< std::pair< std::size_t, std::size_t > > >&
228  IndependenceTest< ALLOC >::ranges() const {
229  return counter_.ranges();
230  }
231 
232 
233  /// returns the score of a pair of nodes
234  template < template < typename > class ALLOC >
235  INLINE double IndependenceTest< ALLOC >::score(const NodeId var1,
236  const NodeId var2) {
238  var2,
239  empty_ids_,
240  false,
241  true,
242  this->getAllocator());
243  if (use_cache_) {
244  try {
245  return cache_.score(idset);
246  } catch (NotFound&) {}
247  double the_score = score_(idset);
249  return the_score;
250  } else {
251  return score_(std::move(idset));
252  }
253  }
254 
255 
256  /// returns the score of a pair of nodes given some other nodes
257  template < template < typename > class ALLOC >
258  INLINE double IndependenceTest< ALLOC >::score(
259  const NodeId var1,
260  const NodeId var2,
261  const std::vector< NodeId, ALLOC< NodeId > >& rhs_ids) {
263  var2,
264  rhs_ids,
265  false,
266  false,
267  this->getAllocator());
268  if (use_cache_) {
269  try {
270  return cache_.score(idset);
271  } catch (NotFound&) {}
272  double the_score = score_(idset);
274  return the_score;
275  } else {
276  return score_(idset);
277  }
278  }
279 
280 
281  /// clears all the data structures from memory
282  template < template < typename > class ALLOC >
283  INLINE void IndependenceTest< ALLOC >::clear() {
284  counter_.clear();
285  cache_.clear();
286  }
287 
288 
289  /// clears the current cache (clear nodesets as well)
290  template < template < typename > class ALLOC >
292  cache_.clear();
293  }
294 
295 
296  /// turn on/off the use of a cache of the previously computed score
297  template < template < typename > class ALLOC >
298  INLINE void IndependenceTest< ALLOC >::useCache(const bool on_off) {
299  use_cache_ = on_off;
300  }
301 
302 
303  /// return the mapping between the columns of the database and the node ids
304  template < template < typename > class ALLOC >
305  INLINE const Bijection< NodeId, std::size_t, ALLOC< std::size_t > >&
307  return counter_.nodeId2Columns();
308  }
309 
310 
311  /// return the database used by the score
312  template < template < typename > class ALLOC >
313  INLINE const DatabaseTable< ALLOC >&
314  IndependenceTest< ALLOC >::database() const {
315  return counter_.database();
316  }
317 
318 
319  /// returns a counting vector where variables are marginalized from N_xyz
320  /** @param node_2_marginalize indicates which node(s) shall be marginalized:
321  * - 0 means that X should be marginalized
322  * - 1 means that Y should be marginalized
323  * - 2 means that Z should be marginalized
324  */
325  template < template < typename > class ALLOC >
326  std::vector< double, ALLOC< double > > IndependenceTest< ALLOC >::marginalize_(
328  const std::size_t X_size,
329  const std::size_t Y_size,
330  const std::size_t Z_size,
331  const std::vector< double, ALLOC< double > >& N_xyz) const {
332  // determine the size of the output vector
334  if (node_2_marginalize == std::size_t(0))
335  out_size *= Y_size;
336  else if (node_2_marginalize == std::size_t(1))
337  out_size *= X_size;
338 
339  // allocate the output vector
340  std::vector< double, ALLOC< double > > res(out_size, 0.0);
341 
342  // fill the vector:
343  if (node_2_marginalize == std::size_t(0)) { // marginalize X
344  for (std::size_t yz = std::size_t(0), xyz = std::size_t(0); yz < out_size;
345  ++yz) {
346  for (std::size_t x = std::size_t(0); x < X_size; ++x, ++xyz) {
347  res[yz] += N_xyz[xyz];
348  }
349  }
350  } else if (node_2_marginalize == std::size_t(1)) { // marginalize Y
351  for (std::size_t z = std::size_t(0),
352  xyz = std::size_t(0),
353  beg_xz = std::size_t(0);
354  z < Z_size;
355  ++z, beg_xz += X_size) {
356  for (std::size_t y = std::size_t(0); y < Y_size; ++y) {
357  for (std::size_t x = std::size_t(0), xz = beg_xz; x < X_size;
358  ++x, ++xz, ++xyz) {
359  res[xz] += N_xyz[xyz];
360  }
361  }
362  }
363  } else if (node_2_marginalize == std::size_t(2)) { // marginalize X and Y
364  const std::size_t XY_size = X_size * Y_size;
365  for (std::size_t z = std::size_t(0), xyz = std::size_t(0); z < out_size;
366  ++z) {
367  for (std::size_t xy = std::size_t(0); xy < XY_size; ++xy, ++xyz) {
368  res[z] += N_xyz[xyz];
369  }
370  }
371  } else {
373  "_marginalize not implemented for nodeset "
374  << node_2_marginalize);
375  }
376 
377  return res;
378  }
379 
380  } /* namespace learning */
381 
382 } /* namespace gum */
383 
384 #endif /* DOXYGEN_SHOULD_SKIP_THIS */
INLINE void emplace(Args &&... args)
Definition: set_tpl.h:669
Database(const std::string &filename, const BayesNet< GUM_SCALAR > &bn, const std::vector< std::string > &missing_symbols)