aGrUM  0.14.2
correctedMutualInformation_tpl.h
Go to the documentation of this file.
1 /***************************************************************************
2  * Copyright (C) 2005 by Christophe GONZALES and Pierre-Henri WUILLEMIN *
3  * {prenom.nom}_at_lip6.fr *
4  * *
5  * This program is free software; you can redistribute it and/or modify *
6  * it under the terms of the GNU General Public License as published by *
7  * the Free Software Foundation; either version 2 of the License, or *
8  * (at your option) any later version. *
9  * *
10  * This program is distributed in the hope that it will be useful, *
11  * but WITHOUT ANY WARRANTY; without even the implied warranty of *
12  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the *
13  * GNU General Public License for more details. *
14  * *
15  * You should have received a copy of the GNU General Public License *
16  * along with this program; if not, write to the *
17  * Free Software Foundation, Inc., *
18  * 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. *
19  ***************************************************************************/
27 #ifndef DOXYGEN_SHOULD_SKIP_THIS
28 
29 namespace gum {
30 
31  namespace learning {
32 
34  template < template < typename > class ALLOC >
37  return __NH.getAllocator();
38  }
39 
40 
42  template < template < typename > class ALLOC >
44  const DBRowGeneratorParser< ALLOC >& parser,
45  const Apriori< ALLOC >& apriori,
46  const std::vector< std::pair< std::size_t, std::size_t >,
47  ALLOC< std::pair< std::size_t, std::size_t > > >& ranges,
48  const Bijection< NodeId, std::size_t, ALLOC< std::size_t > >&
49  nodeId2columns,
51  __NH(parser, apriori, ranges, nodeId2columns, alloc),
52  __k_NML(parser, apriori, ranges, nodeId2columns, alloc),
53  __score_MDL(parser, apriori, ranges, nodeId2columns, alloc),
54  __ICache(alloc), __KCache(alloc) {
55  GUM_CONSTRUCTOR(CorrectedMutualInformation);
56  }
57 
58 
60  template < template < typename > class ALLOC >
62  const DBRowGeneratorParser< ALLOC >& parser,
63  const Apriori< ALLOC >& apriori,
64  const Bijection< NodeId, std::size_t, ALLOC< std::size_t > >&
65  nodeId2columns,
67  __NH(parser, apriori, nodeId2columns, alloc),
68  __k_NML(parser, apriori, nodeId2columns, alloc),
69  __score_MDL(parser, apriori, nodeId2columns, alloc), __ICache(alloc),
70  __KCache(alloc) {
71  GUM_CONSTRUCTOR(CorrectedMutualInformation);
72  }
73 
74 
76  template < template < typename > class ALLOC >
78  const CorrectedMutualInformation< ALLOC >& from,
80  __NH(from.__NH, alloc),
81  __k_NML(from.__k_NML, alloc), __score_MDL(from.__score_MDL, alloc),
82  __kmode(from.__kmode), __use_ICache(from.__use_ICache),
83  __use_HCache(from.__use_HCache), __use_KCache(from.__use_KCache),
84  __use_CnrCache(from.__use_CnrCache), __ICache(from.__ICache, alloc),
85  __KCache(from.__KCache, alloc) {
86  GUM_CONS_CPY(CorrectedMutualInformation);
87  }
88 
89 
91  template < template < typename > class ALLOC >
93  const CorrectedMutualInformation< ALLOC >& from) :
95 
96 
98  template < template < typename > class ALLOC >
100  CorrectedMutualInformation< ALLOC >&& from,
102  __NH(std::move(from.__NH), alloc),
103  __k_NML(std::move(from.__k_NML), alloc),
104  __score_MDL(std::move(from.__score_MDL), alloc), __kmode(from.__kmode),
105  __use_ICache(from.__use_ICache), __use_HCache(from.__use_HCache),
106  __use_KCache(from.__use_KCache), __use_CnrCache(from.__use_CnrCache),
107  __ICache(std::move(from.__ICache), alloc),
108  __KCache(std::move(from.__KCache), alloc) {
109  GUM_CONS_MOV(CorrectedMutualInformation);
110  }
111 
112 
114  template < template < typename > class ALLOC >
116  CorrectedMutualInformation< ALLOC >&& from) :
117  CorrectedMutualInformation(std::move(from), from.getAllocator()) {}
118 
119 
121  template < template < typename > class ALLOC >
122  CorrectedMutualInformation< ALLOC >*
125  alloc) const {
126  ALLOC< CorrectedMutualInformation< ALLOC > > allocator(alloc);
127  CorrectedMutualInformation< ALLOC >* new_score = allocator.allocate(1);
128  try {
129  allocator.construct(new_score, *this, alloc);
130  } catch (...) {
131  allocator.deallocate(new_score, 1);
132  throw;
133  }
134 
135  return new_score;
136  }
137 
138 
140  template < template < typename > class ALLOC >
141  CorrectedMutualInformation< ALLOC >*
143  return clone(this->getAllocator());
144  }
145 
146 
148  template < template < typename > class ALLOC >
150  // for debugging purposes
151  GUM_DESTRUCTOR(CorrectedMutualInformation);
152  }
153 
154 
156  template < template < typename > class ALLOC >
157  CorrectedMutualInformation< ALLOC >& CorrectedMutualInformation< ALLOC >::
158  operator=(const CorrectedMutualInformation< ALLOC >& from) {
159  if (this != &from) {
160  __NH = from.__NH;
161  __k_NML = from.__k_NML;
162  __score_MDL = from.__score_MDL;
163  __kmode = from.__kmode;
164  __use_ICache = from.__use_ICache;
165  __use_HCache = from.__use_HCache;
166  __use_KCache = from.__use_KCache;
167  __use_CnrCache = from.__use_CnrCache;
168  __ICache = from.__ICache;
169  __KCache = from.__KCache;
170  }
171  return *this;
172  }
173 
174 
176  template < template < typename > class ALLOC >
177  CorrectedMutualInformation< ALLOC >& CorrectedMutualInformation< ALLOC >::
178  operator=(CorrectedMutualInformation< ALLOC >&& from) {
179  if (this != &from) {
180  __NH = std::move(from.__NH);
181  __k_NML = std::move(from.__k_NML);
182  __score_MDL = std::move(from.__score_MDL);
183  __kmode = from.__kmode;
184  __use_ICache = from.__use_ICache;
185  __use_HCache = from.__use_HCache;
186  __use_KCache = from.__use_KCache;
187  __use_CnrCache = from.__use_CnrCache;
188  __ICache = std::move(from.__ICache);
189  __KCache = std::move(from.__KCache);
190  }
191  return *this;
192  }
193 
194 
196  template < template < typename > class ALLOC >
197  INLINE void CorrectedMutualInformation< ALLOC >::useCache(bool on_off) {
198  useICache(on_off);
199  useHCache(on_off);
200  useKCache(on_off);
201  useCnrCache(on_off);
202  }
203 
204 
206  template < template < typename > class ALLOC >
207  INLINE void CorrectedMutualInformation< ALLOC >::useICache(bool on_off) {
208  if (!on_off) __ICache.clear();
209  __use_ICache = on_off;
210  }
211 
212 
214  template < template < typename > class ALLOC >
215  INLINE void CorrectedMutualInformation< ALLOC >::useHCache(bool on_off) {
216  if (!on_off) __NH.clearCache();
217  __use_HCache = on_off;
218  __NH.useCache(on_off);
219  }
220 
221 
223  template < template < typename > class ALLOC >
224  INLINE void CorrectedMutualInformation< ALLOC >::useKCache(bool on_off) {
225  if (!on_off) __KCache.clear();
226  __use_KCache = on_off;
227  }
228 
229 
231  template < template < typename > class ALLOC >
232  INLINE void CorrectedMutualInformation< ALLOC >::useCnrCache(bool on_off) {
233  if (!on_off) __k_NML.clearCache();
234  __use_CnrCache = on_off;
235  __k_NML.useCache(on_off);
236  }
237 
238 
240  template < template < typename > class ALLOC >
242  __NH.clear();
243  __k_NML.clear();
244  __score_MDL.clear();
245  clearCache();
246  }
247 
248 
250  template < template < typename > class ALLOC >
252  __NH.clearCache();
253  __k_NML.clearCache();
254  __ICache.clear();
255  __KCache.clear();
256  }
257 
258 
260  template < template < typename > class ALLOC >
262  __ICache.clear();
263  }
264 
265 
267  template < template < typename > class ALLOC >
269  __NH.clearCache();
270  }
271 
272 
274  template < template < typename > class ALLOC >
276  __KCache.clear();
277  }
278 
279 
281  template < template < typename > class ALLOC >
283  __k_NML.clearCache();
284  }
285 
286 
288  template < template < typename > class ALLOC >
289  void
291  __NH.setMaxNbThreads(nb);
292  __k_NML.setMaxNbThreads(nb);
293  __score_MDL.setMaxNbThreads(nb);
294  }
295 
296 
298  template < template < typename > class ALLOC >
300  return __NH.nbThreads();
301  }
302 
303 
306  template < template < typename > class ALLOC >
308  const std::size_t nb) const {
309  __NH.setMinNbRowsPerThread(nb);
310  __k_NML.setMinNbRowsPerThread(nb);
311  __score_MDL.setMinNbRowsPerThread(nb);
312  }
313 
314 
316  template < template < typename > class ALLOC >
317  INLINE std::size_t
319  return __NH.minNbRowsPerThread();
320  }
321 
322 
324 
330  template < template < typename > class ALLOC >
331  template < template < typename > class XALLOC >
333  const std::vector< std::pair< std::size_t, std::size_t >,
334  XALLOC< std::pair< std::size_t, std::size_t > > >&
335  new_ranges) {
336  std::vector< std::pair< std::size_t, std::size_t >,
337  ALLOC< std::pair< std::size_t, std::size_t > > >
338  old_ranges = ranges();
339 
340  __NH.setRanges(new_ranges);
341  __k_NML.setRanges(new_ranges);
342  __score_MDL.setRanges(new_ranges);
343 
344  if (old_ranges != ranges()) clear();
345  }
346 
347 
349  template < template < typename > class ALLOC >
351  std::vector< std::pair< std::size_t, std::size_t >,
352  ALLOC< std::pair< std::size_t, std::size_t > > >
353  old_ranges = ranges();
354  __NH.clearRanges();
355  __k_NML.clearRanges();
356  __score_MDL.clearRanges();
357  if (old_ranges != ranges()) clear();
358  }
359 
360 
362  template < template < typename > class ALLOC >
363  INLINE const std::vector< std::pair< std::size_t, std::size_t >,
364  ALLOC< std::pair< std::size_t, std::size_t > > >&
366  return __NH.ranges();
367  }
368 
369 
371  template < template < typename > class ALLOC >
373  clearCache();
374  __kmode = KModeTypes::MDL;
375  }
376 
377 
379  template < template < typename > class ALLOC >
381  clearCache();
382  __kmode = KModeTypes::NML;
383  }
384 
385 
387  template < template < typename > class ALLOC >
389  clearCache();
390  __kmode = KModeTypes::NoCorr;
391  }
392 
393 
395  template < template < typename > class ALLOC >
397  NodeId var2) {
398  return score(var1, var2, __empty_conditioning_set);
399  }
400 
401 
403  template < template < typename > class ALLOC >
405  NodeId var1,
406  NodeId var2,
407  const std::vector< NodeId, ALLOC< NodeId > >& conditioning_ids) {
408  return __NI_score(var1, var2, conditioning_ids)
409  - __K_score(var1, var2, conditioning_ids);
410  }
411 
412 
414  template < template < typename > class ALLOC >
416  NodeId var2,
417  NodeId var3) {
418  return score(var1, var2, var3, __empty_conditioning_set);
419  }
420 
421 
423  template < template < typename > class ALLOC >
425  NodeId var1,
426  NodeId var2,
427  NodeId var3,
428  const std::vector< NodeId, ALLOC< NodeId > >& conditioning_ids) {
429  return __NI_score(var1, var2, var3, conditioning_ids)
430  + __K_score(var1, var2, var3, conditioning_ids);
431  }
432 
433 
435  template < template < typename > class ALLOC >
436  double CorrectedMutualInformation< ALLOC >::__NI_score(
437  NodeId var_x,
438  NodeId var_y,
439  const std::vector< NodeId, ALLOC< NodeId > >& vars_z) {
440  /*
441  * We have a few partial entropies to compute in order to have the
442  * 2-point mutual information:
443  * I(x;y) = H(x) + H(y) - H(x,y)
444  * correspondingly
445  * I(x;y) = Hx + Hy - Hxy
446  * or
447  * I(x;y|z) = H(x,z) + H(y,z) - H(z) - H(x,y,z)
448  * correspondingly
449  * I(x;y|z) = Hxz + Hyz - Hz - Hxyz
450  * Note that Entropy H is equal to 1/N times the log2Likelihood,
451  * where N is the size of the database.
452  * Remember that we return N times I(x;y|z)
453  */
454 
455  // if the score has already been computed, get its value
456  const IdSet< ALLOC > idset_xyz(var_x, var_y, vars_z, false, false);
457  if (__use_ICache) {
458  try {
459  return __ICache.score(idset_xyz);
460  } catch (const NotFound&) {}
461  }
462 
463  // compute the score
464 
465  // here, we distinguish nodesets with conditioning nodes from those
466  // without conditioning nodes
467  double score;
468  if (!vars_z.empty()) {
469  std::vector< NodeId, ALLOC< NodeId > > vars(vars_z);
470  // std::sort(vars.begin(), vars.end());
471  vars.push_back(var_x);
472  vars.push_back(var_y);
473  const double NHxyz = -__NH.score(IdSet< ALLOC >(vars, false, true));
474 
475  vars.pop_back();
476  const double NHxz = -__NH.score(IdSet< ALLOC >(vars, false, true));
477 
478  vars.pop_back();
479  vars.push_back(var_y);
480  const double NHyz = -__NH.score(IdSet< ALLOC >(vars, false, true));
481 
482  vars.pop_back();
483  const double NHz = -__NH.score(IdSet< ALLOC >(vars, false, true));
484 
485  const double NHxz_NHyz = NHxz + NHyz;
486  double NHz_NHxyz = NHz + NHxyz;
487 
488  // avoid numeric instability due to rounding errors
489  double ratio = 1;
490  if (NHxz_NHyz > 0) {
491  ratio = (NHxz_NHyz - NHz_NHxyz) / NHxz_NHyz;
492  } else if (NHz_NHxyz > 0) {
493  ratio = (NHxz_NHyz - NHz_NHxyz) / NHz_NHxyz;
494  }
495  if (ratio < 0) ratio = -ratio;
496  if (ratio < __threshold) {
497  NHz_NHxyz = NHxz_NHyz; // ensure that the score is equal to 0
498  }
499 
500  score = NHxz_NHyz - NHz_NHxyz;
501  } else {
502  const double NHxy = -__NH.score(
503  IdSet< ALLOC >(var_x, var_y, __empty_conditioning_set, true, false));
504  const double NHx = -__NH.score(var_x);
505  const double NHy = -__NH.score(var_y);
506 
507  double NHx_NHy = NHx + NHy;
508 
509  // avoid numeric instability due to rounding errors
510  double ratio = 1;
511  if (NHx_NHy > 0) {
512  ratio = (NHx_NHy - NHxy) / NHx_NHy;
513  } else if (NHxy > 0) {
514  ratio = (NHx_NHy - NHxy) / NHxy;
515  }
516  if (ratio < 0) ratio = -ratio;
517  if (ratio < __threshold) {
518  NHx_NHy = NHxy; // ensure that the score is equal to 0
519  }
520 
521  score = NHx_NHy - NHxy;
522  }
523 
524 
525  // shall we put the score into the cache?
526  if (__use_ICache) { __ICache.insert(idset_xyz, score); }
527 
528  return score;
529  }
530 
531 
533  template < template < typename > class ALLOC >
534  INLINE double CorrectedMutualInformation< ALLOC >::__NI_score(
535  NodeId var_x,
536  NodeId var_y,
537  NodeId var_z,
538  const std::vector< NodeId, ALLOC< NodeId > >& ui_ids) {
539  // conditional 3-point mutual information formula:
540  // I(x;y;z|{ui}) = I(x;y|{ui}) - I(x;y|z,{ui})
541  std::vector< NodeId, ALLOC< NodeId > > uiz_ids = ui_ids;
542  uiz_ids.push_back(var_z);
543  return __NI_score(var_x, var_y, ui_ids) - __NI_score(var_x, var_y, uiz_ids);
544  }
545 
546 
548  template < template < typename > class ALLOC >
549  double CorrectedMutualInformation< ALLOC >::__K_score(
550  NodeId var1,
551  NodeId var2,
552  const std::vector< NodeId, ALLOC< NodeId > >& conditioning_ids) {
553  // if no penalty, return 0
554  if (__kmode == KModeTypes::NoCorr) return 0.0;
555 
556 
557  // If using the K cache, verify whether the set isn't already known
558  IdSet< ALLOC > idset;
559  if (__use_KCache) {
560  idset = std::move(IdSet< ALLOC >(var1, var2, conditioning_ids, false));
561  try {
562  return __KCache.score(idset);
563  } catch (const NotFound&) {}
564  }
565 
566  // compute the score
567  double score;
568  size_t rx, ry, rui;
569  switch (__kmode) {
570  case KModeTypes::MDL: {
571  const auto& database = __NH.database();
572  const auto& node2cols = __NH.nodeId2Columns();
573 
574  rui = 1;
575  if (!node2cols.empty()) {
576  rx = database.domainSize(node2cols.second(var1));
577  ry = database.domainSize(node2cols.second(var2));
578  for (const NodeId i : conditioning_ids) {
579  rui *= database.domainSize(node2cols.second(i));
580  }
581  } else {
582  rx = database.domainSize(var1);
583  ry = database.domainSize(var2);
584  for (const NodeId i : conditioning_ids) {
585  rui *= database.domainSize(i);
586  }
587  }
588 
589  // compute the size of the database, including the a priori
590  if (!__use_KCache) {
591  idset = std::move(IdSet< ALLOC >(var1, var2, conditioning_ids, false));
592  }
593  const double N = __score_MDL.N(idset);
594 
595  score = 0.5 * (rx - 1) * (ry - 1) * rui * std::log2(N);
596  } break;
597 
598  case KModeTypes::NML:
599  score = __k_NML.score(var1, var2, conditioning_ids);
600  break;
601 
602  default:
603  GUM_ERROR(NotImplementedYet,
604  "CorrectedMutualInformation mode does "
605  "not support yet this correction");
606  }
607 
608  // shall we put the score into the cache?
609  if (__use_KCache) { __KCache.insert(idset, score); }
610  return score;
611  }
612 
613 
615  template < template < typename > class ALLOC >
616  INLINE double CorrectedMutualInformation< ALLOC >::__K_score(
617  NodeId var1,
618  NodeId var2,
619  NodeId var3,
620  const std::vector< NodeId, ALLOC< NodeId > >& ui_ids) {
621  // k(x;y;z|ui) = k(x;y|ui,z) - k(x;y|ui)
622  std::vector< NodeId, ALLOC< NodeId > > uiz_ids = ui_ids;
623  uiz_ids.push_back(var3);
624  return __K_score(var1, var2, uiz_ids) - __K_score(var1, var2, ui_ids);
625  }
626 
627 
628  } /* namespace learning */
629 
630 } /* namespace gum */
631 
632 #endif /* DOXYGEN_SHOULD_SKIP_THIS */
void useNML()
use the kNML penalty function
void clearRanges()
reset the ranges to the one range corresponding to the whole database
void useCnrCache(bool on_off)
turn on/off the use of the CnrCache (the cache for the Cnr formula)
virtual void useCache(bool on_off)
turn on/off the use of all the caches
void useHCache(bool on_off)
turn on/off the use of the HCache (the cache for the entropies)
virtual ~CorrectedMutualInformation()
destructor
virtual void clear()
clears all the data structures from memory
void clearICache()
clears the ICache (the mutual information cache)
CorrectedMutualInformation< ALLOC > & operator=(const CorrectedMutualInformation< ALLOC > &from)
copy operator
ALLOC< NodeId > allocator_type
type for the allocators passed in arguments of methods
allocator_type getAllocator() const
returns the allocator used by the score
STL namespace.
void useKCache(bool on_off)
turn on/off the use of the KCache (the cache for the penalties)
virtual CorrectedMutualInformation< ALLOC > * clone() const
virtual copy constructor
virtual void clearCache()
clears all the current caches
gum is the global namespace for all aGrUM entities
Definition: agrum.h:25
virtual std::size_t minNbRowsPerThread() const
returns the minimum of rows that each thread should process
const std::vector< std::pair< std::size_t, std::size_t >, ALLOC< std::pair< std::size_t, std::size_t > > > & ranges() const
returns the current ranges
double score(NodeId var1, NodeId var2)
returns the 2-point mutual information corresponding to a given nodeset
void setRanges(const std::vector< std::pair< std::size_t, std::size_t >, XALLOC< std::pair< std::size_t, std::size_t > > > &new_ranges)
sets new ranges to perform the countings used by the mutual information
CorrectedMutualInformation(const DBRowGeneratorParser< ALLOC > &parser, const Apriori< ALLOC > &apriori, const std::vector< std::pair< std::size_t, std::size_t >, ALLOC< std::pair< std::size_t, std::size_t > > > &ranges, const Bijection< NodeId, std::size_t, ALLOC< std::size_t > > &nodeId2columns=Bijection< NodeId, std::size_t, ALLOC< std::size_t > >(), const allocator_type &alloc=allocator_type())
default constructor
void clearKCache()
clears the KCache (the cache for the penalties)
void clearHCache()
clears the HCache (the cache for the entropies)
virtual std::size_t nbThreads() const
returns the number of threads used to parse the database
virtual void setMinNbRowsPerThread(const std::size_t nb) const
changes the number min of rows a thread should process in a multithreading context ...
void useMDL()
use the MDL penalty function
void useICache(bool on_off)
turn on/off the use of the ICache (the mutual information cache)
void clearCnrCache()
clears the CnrCache (the cache for the Cnr formula)
Size NodeId
Type for node ids.
Definition: graphElements.h:97
#define GUM_ERROR(type, msg)
Definition: exceptions.h:52
void useNoCorr()
use no correction/penalty function
virtual void setMaxNbThreads(std::size_t nb) const
changes the max number of threads used to parse the database