aGrUM  0.20.3
a C++ library for (probabilistic) graphical models
variableLog2ParamComplexity_tpl.h
Go to the documentation of this file.
1 /**
2  *
3  * Copyright (c) 2005-2021 by Pierre-Henri WUILLEMIN(@LIP6) & Christophe GONZALES(@AMU)
4  * info_at_agrum_dot_org
5  *
6  * This library is free software: you can redistribute it and/or modify
7  * it under the terms of the GNU Lesser General Public License as published by
8  * the Free Software Foundation, either version 3 of the License, or
9  * (at your option) any later version.
10  *
11  * This library is distributed in the hope that it will be useful,
12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14  * GNU Lesser General Public License for more details.
15  *
16  * You should have received a copy of the GNU Lesser General Public License
17  * along with this library. If not, see <http://www.gnu.org/licenses/>.
18  *
19  */
20 
21 
22 /** @file
23  * @brief the class for computing the log2 of the parametric complexity of
24  * an r-ary multinomial variable
25  *
26  * @author Christophe GONZALES(@AMU) and Pierre-Henri WUILLEMIN(@LIP6)
27  */
28 
29 #ifndef DOXYGEN_SHOULD_SKIP_THIS
30 
31 # include <agrum/tools/core/math/variableLog2ParamComplexity.h>
32 # include <agrum/tools/core/math/gammaLog2.h>
33 # include <iostream>
34 # include <sstream>
35 
36 
37 namespace gum {
38 
39 
40  /// returns the allocator used by the parameterized complexity class
41  template < template < typename > class ALLOC >
42  INLINE typename VariableLog2ParamComplexity< ALLOC >::allocator_type
43  VariableLog2ParamComplexity< ALLOC >::getAllocator() const {
44  return *this;
45  }
46 
47 
48  /// default constructor
49  template < template < typename > class ALLOC >
50  INLINE VariableLog2ParamComplexity< ALLOC >::VariableLog2ParamComplexity(
51  const allocator_type& alloc) :
52  VariableLog2ParamComplexity< ALLOC >::allocator_type(alloc),
53  _cache_(1000) {
54  GUM_CONSTRUCTOR(VariableLog2ParamComplexity);
55  }
56 
57 
58  /// copy constructor with a given allocator
59  template < template < typename > class ALLOC >
60  INLINE VariableLog2ParamComplexity< ALLOC >::VariableLog2ParamComplexity(
61  const VariableLog2ParamComplexity< ALLOC >& from,
62  const typename VariableLog2ParamComplexity< ALLOC >::allocator_type& alloc) :
63  VariableLog2ParamComplexity< ALLOC >::allocator_type(alloc),
64  _use_cache_(from._use_cache_), _cache_(from._cache_) {
65  GUM_CONS_CPY(VariableLog2ParamComplexity);
66  }
67 
68 
69  /// copy constructor
70  template < template < typename > class ALLOC >
71  INLINE VariableLog2ParamComplexity< ALLOC >::VariableLog2ParamComplexity(
72  const VariableLog2ParamComplexity< ALLOC >& from) :
73  VariableLog2ParamComplexity(from, this->getAllocator()) {}
74 
75 
76  /// move constructor with a given allocator
77  template < template < typename > class ALLOC >
78  INLINE VariableLog2ParamComplexity< ALLOC >::VariableLog2ParamComplexity(
79  VariableLog2ParamComplexity< ALLOC >&& from,
80  const typename VariableLog2ParamComplexity< ALLOC >::allocator_type& alloc) :
81  VariableLog2ParamComplexity< ALLOC >::allocator_type(alloc),
82  _use_cache_(from._use_cache_), _cache_(std::move(from._cache_)) {
83  GUM_CONS_MOV(VariableLog2ParamComplexity);
84  }
85 
86 
87  /// move constructor
88  template < template < typename > class ALLOC >
89  INLINE VariableLog2ParamComplexity< ALLOC >::VariableLog2ParamComplexity(
90  VariableLog2ParamComplexity< ALLOC >&& from) :
91  VariableLog2ParamComplexity(std::move(from), this->getAllocator()) {}
92 
93 
94  /// virtual copy constructor
95  template < template < typename > class ALLOC >
96  VariableLog2ParamComplexity< ALLOC >* VariableLog2ParamComplexity< ALLOC >::clone(
97  const typename VariableLog2ParamComplexity< ALLOC >::allocator_type& alloc) const {
98  ALLOC< VariableLog2ParamComplexity< ALLOC > > allocator(alloc);
99  VariableLog2ParamComplexity< ALLOC >* table = allocator.allocate(1);
100  try {
101  allocator.construct(table, *this);
102  } catch (...) {
103  allocator.deallocate(table, 1);
104  throw;
105  }
106  return table;
107  }
108 
109 
110  /// virtual copy constructor
111  template < template < typename > class ALLOC >
112  VariableLog2ParamComplexity< ALLOC >* VariableLog2ParamComplexity< ALLOC >::clone() const {
113  return clone(this->getAllocator());
114  }
115 
116 
117  /// destructor
118  template < template < typename > class ALLOC >
119  INLINE VariableLog2ParamComplexity< ALLOC >::~VariableLog2ParamComplexity() {
120  GUM_DESTRUCTOR(VariableLog2ParamComplexity);
121  }
122 
123 
124  /// copy operator
125  template < template < typename > class ALLOC >
126  INLINE VariableLog2ParamComplexity< ALLOC >& VariableLog2ParamComplexity< ALLOC >::operator=(
127  const VariableLog2ParamComplexity< ALLOC >& from) {
128  return *this;
129  }
130 
131 
132  /// move operator
133  template < template < typename > class ALLOC >
134  INLINE VariableLog2ParamComplexity< ALLOC >&
135  VariableLog2ParamComplexity< ALLOC >::operator=(VariableLog2ParamComplexity< ALLOC >&& from) {
136  return *this;
137  }
138 
139 
140  /// indicates whether we wish to use a cache for the Cnr
141  template < template < typename > class ALLOC >
142  INLINE void VariableLog2ParamComplexity< ALLOC >::useCache(const bool on_off) {
143  _use_cache_ = on_off;
144  }
145 
146 
147  /// clears the current cache
148  template < template < typename > class ALLOC >
149  INLINE void VariableLog2ParamComplexity< ALLOC >::clearCache() {
150  _cache_.clear();
151  }
152 
153 
154  /// returns the value of the log in base 2 of Cnr
155  template < template < typename > class ALLOC >
156  double VariableLog2ParamComplexity< ALLOC >::log2Cnr(const std::size_t r, const double n) {
157  // we know that c_n^1 = 1 for all values of n
158  // in addition, c_0^r = 1 for all values of r
159  // finally, it is easy to see that c_1^r = r for all r
160  if (r == std::size_t(1)) return 0.0; // log2(1)
161  if (n == 0.0) return 0.0; // log2(1)
162  if (n == 1.0) return std::log2((double)r); // log2(r)
163 
164  if (n < 0.0) {
165  GUM_ERROR(OutOfBounds,
166  "In the penalty of the fNML score, n must be greater "
167  << "than or equal to 0. But, here, n = " << n);
168  }
169 
170  if (n < VariableLog2ParamComplexityCTableNSize) {
171  // check if we can find the value we look for in precomputed table
172  // ScorefNMLVariableLog2ParamComplexity
173  std::size_t xn = (std::size_t)n;
174  if (r - 2 < VariableLog2ParamComplexityCTableRSize) {
175  return VariableLog2ParamComplexityCTable[r - 2][xn];
176  } else {
177  // try to find the value in the cache
178  if (_use_cache_) {
179  try {
180  return _cache_[std::pair< std::size_t, double >{r, n}];
181  } catch (NotFound&) {}
182  }
183 
184  // use Equation (13) of the paper to compute the value of cnr:
185  // C_n^r = C_n^{r-1} + (n / (r-2)) C_n^{r-2}
186  // as we handle only log2's of C_n^r, we have the following:
187  // let k_r be such that C_n^{r-2} = k_r * C_n^{r-1}
188  // log2 ( C_n^r ) = log2 ( C_n^{r-1} + k_r * (n/(r-2)) * C_n^{r-1} )
189  // = log2 ( C_n^{r-1} ) + log2 ( 1 + k_r * (n/(r-2)) )
190  // as k_r = C_n^{r-2} / C_n^{r-1}, we have that
191  // log2(k_r) = log2 ( C_n^{r-2} ) - log2 ( C_n^{r-1} )
192  // so, k_r = exp ( (log2(cn_^{r-2}) - log2(C_n^{r-1})) * log(2) )
193  // now, let q_r = 1 + k_r * (n/(r-2)), then
194  // C_n^r = C_n^{r-1} * q_r, or, equivalently,
195  // log2(C_n^r) = log2(C_n^{r-1}) + log2(q_r)
196  // Now, we can use the same method to compute C_n^{r+1}:
197  // k_{r+1} = C_n^{r-1} / C_n^r = 1 / q_r
198  // q_{r+1} = 1 + k_{r+1} * (n/(r-1))
199  // C_n^{r+1} = C_n^r * q_{r+1}
200  double log2Cnr1 = VariableLog2ParamComplexityCTable[3][xn]; // log(C_n^5)
201  double log2Cnr2 = VariableLog2ParamComplexityCTable[2][xn]; // log(C_n^4)
202  double log2Cnr = 0.0;
203  double k_r = std::exp((log2Cnr2 - log2Cnr1) * M_LN2);
204  double q_r = 1.0 + k_r * n / (6.0 - 2.0); // we first compute C_n^6
205  for (std::size_t i = std::size_t(6); i <= r; ++i) {
206  log2Cnr = log2Cnr1 + std::log2(q_r);
207  log2Cnr1 = log2Cnr;
208  k_r = 1.0 / q_r;
209  q_r = 1.0 + k_r * (n / (i - 1.0));
210  }
211 
212  // if we use a cache, update it
213  if (_use_cache_) { _cache_.insert(std::pair< std::size_t, double >{r, n}, log2Cnr); }
214 
215  return log2Cnr;
216  }
217  } else {
218  // try to find the value in the cache
219  if (_use_cache_) {
220  try {
221  return _cache_[std::pair< std::size_t, double >{r, n}];
222  } catch (NotFound&) {}
223  }
224 
225  // compute the corrected Szpankowski approximation of cn2 (see the
226  // documentation of constants cst1, cst2, cst3 in the ScorefNML header)
227  double log2Cnr1 = 0.5 * std::log2(n) + _cst1_ + _cst2_ / std::sqrt(n) + _cst3_ / n;
228  if (r == std::size_t(2)) return log2Cnr1;
229 
230  // the value of log2(cn1), which is always equal to 0
231  double log2Cnr2 = 0.0;
232 
233  // use Equation (13) of the paper to compute the value of cnr
234  // (see the detail of the formulas in the above if statement)
235  double k_r = std::exp((log2Cnr2 - log2Cnr1) * M_LN2);
236  double q_r = 1.0 + k_r * n / (3.0 - 2.0); // we first compute C_n^3
237  double log2Cnr = 0.0;
238  for (std::size_t i = std::size_t(3); i <= r; ++i) {
239  log2Cnr = log2Cnr1 + std::log2(q_r);
240  log2Cnr1 = log2Cnr;
241  k_r = 1.0 / q_r;
242  q_r = 1.0 + k_r * (n / (i - 1.0));
243  }
244 
245  // if we use a cache, update it
246  if (_use_cache_) { _cache_.insert(std::pair< std::size_t, double >{r, n}, log2Cnr); }
247 
248  return log2Cnr;
249  }
250  }
251 
252 
253  /// the function used to write the cpp file with the values of log2(Cnr)
254  template < template < typename > class ALLOC >
255  void VariableLog2ParamComplexity< ALLOC >::CnrToFile(const std::string& filename) {
256  // save all the value of cn2
257  std::vector< long double > cn2_table(VariableLog2ParamComplexityCTableNSize);
258  cn2_table[0] = 1;
259  cn2_table[1] = 2;
260 
261  // for every value of n less than Szpankowski_threshold, we compute the
262  // value of C_n^2 and write it into cn2_table
263  GammaLog2 gamma_log2;
264  for (double n = 2; n < VariableLog2ParamComplexityCTableNSize; ++n) {
265  // here, note that, in Silander, Roos, Kontkanen and Myllymaki (2007)
266  // "Factorized Normalized Maximum Likelihood Criterion for Learning
267  // Bayesian network Structures" paper, there is an uppercase N in the
268  // formula, but this should be a lowercase n. In addition, we will loop
269  // only on h=1 to n-1 and add to 2.0 the value computed to take into
270  // account of h=0 and h=n.
271  long double cn2 = 2;
272  for (double h = 1; h < n; ++h) {
273  long double elt = (gamma_log2(n + 1) - gamma_log2(h + 1) - gamma_log2((n - h) + 1)) * M_LN2
274  + h * std::log(h / n) + (n - h) * std::log((n - h) / n);
275  cn2 += std::exp(elt);
276  }
277 
278  // const double logCn2 = (double) std::log2 ( cn2 );
279  cn2_table[n] = cn2;
280  }
281 
282  // write the header of the output file
283  std::ofstream outfile(filename);
284  if (!outfile.is_open()) { GUM_ERROR(IOError, "It is impossible to open file " << filename) }
285  outfile.precision(20);
286  outfile << "namespace gum {\n\n";
287  /*
288  outfile << " // the size in r of VariableLog2ParamComplexityCTable\n";
289  outfile << " const std::size_t VariableLog2ParamComplexityCTableRSize = "
290  << "4;\n\n";
291  outfile << " // the size in n of VariableLog2ParamComplexityCTable\n";
292  outfile << " const std::size_t VariableLog2ParamComplexityCTableNSize = "
293  << VariableLog2ParamComplexityCTableNSize << ";\n\n";
294  */
295  outfile << " // the CTable cache for log2(Cnr), n < " << VariableLog2ParamComplexityCTableNSize
296  << " and r in {2,3,4,5}\n";
297  outfile << " const double VariableLog2ParamComplexityCTable[4]["
298  << VariableLog2ParamComplexityCTableNSize << "] = {\n";
299 
300  // write the values of Cn2:
301  outfile << " { ";
302  bool first = true;
303  for (const auto cn2: cn2_table) {
304  if (first)
305  first = false;
306  else
307  outfile << ",\n ";
308  const double logCn2 = (double)std::log2(cn2);
309  outfile << logCn2;
310  }
311  outfile << " },\n";
312 
313  // write the values of cn3, which are equal to cn2 + n
314  outfile << " { ";
315  for (std::size_t i = std::size_t(0); i < VariableLog2ParamComplexityCTableNSize; ++i) {
316  if (i > std::size_t(0)) outfile << ",\n ";
317  const double logCn3 = (double)std::log2(cn2_table[i] + i);
318  outfile << logCn3;
319  }
320  outfile << " },\n";
321 
322  // write the values of cn4, which are equal to cn2 * (1 + n/2) + n
323  outfile << " { ";
324  for (std::size_t i = std::size_t(0); i < VariableLog2ParamComplexityCTableNSize; ++i) {
325  if (i > std::size_t(0)) outfile << ",\n ";
326  const double logCn4 = (double)std::log2(cn2_table[i] * (1.0 + i / 2.0) + i);
327  outfile << logCn4;
328  }
329  outfile << " },\n";
330 
331  // write the values of cn5, which are equal to cn2 * (1 + 5n/6) + n + n^2/3
332  outfile << " { ";
333  for (std::size_t i = std::size_t(0); i < VariableLog2ParamComplexityCTableNSize; ++i) {
334  if (i > std::size_t(0)) outfile << ",\n ";
335  const double logCn5
336  = (double)std::log2(cn2_table[i] * (1.0 + 5.0 * i / 6.0) + i + i * i / 3.0);
337  outfile << logCn5;
338  }
339  outfile << " }\n";
340 
341  // write the footer and close the file
342  outfile << " };\n\n";
343  outfile << "} /* namespace gum */\n";
344  outfile.close();
345  }
346 
347 
348 } /* namespace gum */
349 
350 #endif /* DOXYGEN_SHOULD_SKIP_THIS */