aGrUM  0.20.2
a C++ library for (probabilistic) graphical models
variableLog2ParamComplexity_tpl.h
Go to the documentation of this file.
1 /**
2  *
3  * Copyright 2005-2020 Pierre-Henri WUILLEMIN(@LIP6) & Christophe GONZALES(@AMU)
4  * info_at_agrum_dot_org
5  *
6  * This library is free software: you can redistribute it and/or modify
7  * it under the terms of the GNU Lesser General Public License as published by
8  * the Free Software Foundation, either version 3 of the License, or
9  * (at your option) any later version.
10  *
11  * This library is distributed in the hope that it will be useful,
12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14  * GNU Lesser General Public License for more details.
15  *
16  * You should have received a copy of the GNU Lesser General Public License
17  * along with this library. If not, see <http://www.gnu.org/licenses/>.
18  *
19  */
20 
21 
22 /** @file
23  * @brief the class for computing the log2 of the parametric complexity of
24  * an r-ary multinomial variable
25  *
26  * @author Christophe GONZALES(@AMU) and Pierre-Henri WUILLEMIN(@LIP6)
27  */
28 
29 #ifndef DOXYGEN_SHOULD_SKIP_THIS
30 
31 # include <agrum/tools/core/math/variableLog2ParamComplexity.h>
32 # include <agrum/tools/core/math/gammaLog2.h>
33 # include <iostream>
34 # include <sstream>
35 
36 
37 namespace gum {
38 
39 
40  /// returns the allocator used by the parameterized complexity class
41  template < template < typename > class ALLOC >
42  INLINE typename VariableLog2ParamComplexity< ALLOC >::allocator_type
43  VariableLog2ParamComplexity< ALLOC >::getAllocator() const {
44  return *this;
45  }
46 
47 
48  /// default constructor
49  template < template < typename > class ALLOC >
50  INLINE VariableLog2ParamComplexity< ALLOC >::VariableLog2ParamComplexity(
51  const allocator_type& alloc) :
52  VariableLog2ParamComplexity< ALLOC >::allocator_type(alloc),
53  cache__(1000) {
54  GUM_CONSTRUCTOR(VariableLog2ParamComplexity);
55  }
56 
57 
58  /// copy constructor with a given allocator
59  template < template < typename > class ALLOC >
60  INLINE VariableLog2ParamComplexity< ALLOC >::VariableLog2ParamComplexity(
61  const VariableLog2ParamComplexity< ALLOC >& from,
62  const typename VariableLog2ParamComplexity< ALLOC >::allocator_type& alloc) :
63  VariableLog2ParamComplexity< ALLOC >::allocator_type(alloc),
64  use_cache__(from.use_cache__), cache__(from.cache__) {
65  GUM_CONS_CPY(VariableLog2ParamComplexity);
66  }
67 
68 
69  /// copy constructor
70  template < template < typename > class ALLOC >
71  INLINE VariableLog2ParamComplexity< ALLOC >::VariableLog2ParamComplexity(
72  const VariableLog2ParamComplexity< ALLOC >& from) :
73  VariableLog2ParamComplexity(from, this->getAllocator()) {}
74 
75 
76  /// move constructor with a given allocator
77  template < template < typename > class ALLOC >
78  INLINE VariableLog2ParamComplexity< ALLOC >::VariableLog2ParamComplexity(
79  VariableLog2ParamComplexity< ALLOC >&& from,
80  const typename VariableLog2ParamComplexity< ALLOC >::allocator_type& alloc) :
81  VariableLog2ParamComplexity< ALLOC >::allocator_type(alloc),
82  use_cache__(from.use_cache__), cache__(std::move(from.cache__)) {
83  GUM_CONS_MOV(VariableLog2ParamComplexity);
84  }
85 
86 
87  /// move constructor
88  template < template < typename > class ALLOC >
89  INLINE VariableLog2ParamComplexity< ALLOC >::VariableLog2ParamComplexity(
90  VariableLog2ParamComplexity< ALLOC >&& from) :
91  VariableLog2ParamComplexity(std::move(from), this->getAllocator()) {}
92 
93 
94  /// virtual copy constructor
95  template < template < typename > class ALLOC >
96  VariableLog2ParamComplexity< ALLOC >*
97  VariableLog2ParamComplexity< ALLOC >::clone(
98  const typename VariableLog2ParamComplexity< ALLOC >::allocator_type& alloc)
99  const {
100  ALLOC< VariableLog2ParamComplexity< ALLOC > > allocator(alloc);
101  VariableLog2ParamComplexity< ALLOC >* table = allocator.allocate(1);
102  try {
103  allocator.construct(table, *this);
104  } catch (...) {
105  allocator.deallocate(table, 1);
106  throw;
107  }
108  return table;
109  }
110 
111 
112  /// virtual copy constructor
113  template < template < typename > class ALLOC >
114  VariableLog2ParamComplexity< ALLOC >*
115  VariableLog2ParamComplexity< ALLOC >::clone() const {
116  return clone(this->getAllocator());
117  }
118 
119 
120  /// destructor
121  template < template < typename > class ALLOC >
122  INLINE VariableLog2ParamComplexity< ALLOC >::~VariableLog2ParamComplexity() {
123  GUM_DESTRUCTOR(VariableLog2ParamComplexity);
124  }
125 
126 
127  /// copy operator
128  template < template < typename > class ALLOC >
129  INLINE VariableLog2ParamComplexity< ALLOC >&
130  VariableLog2ParamComplexity< ALLOC >::operator=(
131  const VariableLog2ParamComplexity< ALLOC >& from) {
132  return *this;
133  }
134 
135 
136  /// move operator
137  template < template < typename > class ALLOC >
138  INLINE VariableLog2ParamComplexity< ALLOC >&
139  VariableLog2ParamComplexity< ALLOC >::operator=(
140  VariableLog2ParamComplexity< ALLOC >&& from) {
141  return *this;
142  }
143 
144 
145  /// indicates whether we wish to use a cache for the Cnr
146  template < template < typename > class ALLOC >
147  INLINE void VariableLog2ParamComplexity< ALLOC >::useCache(const bool on_off) {
148  use_cache__ = on_off;
149  }
150 
151 
152  /// clears the current cache
153  template < template < typename > class ALLOC >
154  INLINE void VariableLog2ParamComplexity< ALLOC >::clearCache() {
155  cache__.clear();
156  }
157 
158 
159  /// returns the value of the log in base 2 of Cnr
160  template < template < typename > class ALLOC >
161  double VariableLog2ParamComplexity< ALLOC >::log2Cnr(const std::size_t r,
162  const double n) {
163  // we know that c_n^1 = 1 for all values of n
164  // in addition, c_0^r = 1 for all values of r
165  // finally, it is easy to see that c_1^r = r for all r
166  if (r == std::size_t(1)) return 0.0; // log2(1)
167  if (n == 0.0) return 0.0; // log2(1)
168  if (n == 1.0) return std::log2((double)r); // log2(r)
169 
170  if (n < 0.0) {
171  GUM_ERROR(OutOfBounds,
172  "In the penalty of the fNML score, n must be greater "
173  << "than or equal to 0. But, here, n = " << n);
174  }
175 
176  if (n < VariableLog2ParamComplexityCTableNSize) {
177  // check if we can find the value we look for in precomputed table
178  // ScorefNMLVariableLog2ParamComplexity
179  std::size_t xn = (std::size_t)n;
180  if (r - 2 < VariableLog2ParamComplexityCTableRSize) {
181  return VariableLog2ParamComplexityCTable[r - 2][xn];
182  } else {
183  // try to find the value in the cache
184  if (use_cache__) {
185  try {
186  return cache__[std::pair< std::size_t, double >{r, n}];
187  } catch (NotFound&) {}
188  }
189 
190  // use Equation (13) of the paper to compute the value of cnr:
191  // C_n^r = C_n^{r-1} + (n / (r-2)) C_n^{r-2}
192  // as we handle only log2's of C_n^r, we have the following:
193  // let k_r be such that C_n^{r-2} = k_r * C_n^{r-1}
194  // log2 ( C_n^r ) = log2 ( C_n^{r-1} + k_r * (n/(r-2)) * C_n^{r-1} )
195  // = log2 ( C_n^{r-1} ) + log2 ( 1 + k_r * (n/(r-2)) )
196  // as k_r = C_n^{r-2} / C_n^{r-1}, we have that
197  // log2(k_r) = log2 ( C_n^{r-2} ) - log2 ( C_n^{r-1} )
198  // so, k_r = exp ( (log2(cn_^{r-2}) - log2(C_n^{r-1})) * log(2) )
199  // now, let q_r = 1 + k_r * (n/(r-2)), then
200  // C_n^r = C_n^{r-1} * q_r, or, equivalently,
201  // log2(C_n^r) = log2(C_n^{r-1}) + log2(q_r)
202  // Now, we can use the same method to compute C_n^{r+1}:
203  // k_{r+1} = C_n^{r-1} / C_n^r = 1 / q_r
204  // q_{r+1} = 1 + k_{r+1} * (n/(r-1))
205  // C_n^{r+1} = C_n^r * q_{r+1}
206  double log2Cnr1 = VariableLog2ParamComplexityCTable[3][xn]; // log(C_n^5)
207  double log2Cnr2 = VariableLog2ParamComplexityCTable[2][xn]; // log(C_n^4)
208  double log2Cnr = 0.0;
209  double k_r = std::exp((log2Cnr2 - log2Cnr1) * M_LN2);
210  double q_r = 1.0 + k_r * n / (6.0 - 2.0); // we first compute C_n^6
211  for (std::size_t i = std::size_t(6); i <= r; ++i) {
212  log2Cnr = log2Cnr1 + std::log2(q_r);
213  log2Cnr1 = log2Cnr;
214  k_r = 1.0 / q_r;
215  q_r = 1.0 + k_r * (n / (i - 1.0));
216  }
217 
218  // if we use a cache, update it
219  if (use_cache__) {
220  cache__.insert(std::pair< std::size_t, double >{r, n}, log2Cnr);
221  }
222 
223  return log2Cnr;
224  }
225  } else {
226  // try to find the value in the cache
227  if (use_cache__) {
228  try {
229  return cache__[std::pair< std::size_t, double >{r, n}];
230  } catch (NotFound&) {}
231  }
232 
233  // compute the corrected Szpankowski approximation of cn2 (see the
234  // documentation of constants cst1, cst2, cst3 in the ScorefNML header)
235  double log2Cnr1
236  = 0.5 * std::log2(n) + cst1__ + cst2__ / std::sqrt(n) + cst3__ / n;
237  if (r == std::size_t(2)) return log2Cnr1;
238 
239  // the value of log2(cn1), which is always equal to 0
240  double log2Cnr2 = 0.0;
241 
242  // use Equation (13) of the paper to compute the value of cnr
243  // (see the detail of the formulas in the above if statement)
244  double k_r = std::exp((log2Cnr2 - log2Cnr1) * M_LN2);
245  double q_r = 1.0 + k_r * n / (3.0 - 2.0); // we first compute C_n^3
246  double log2Cnr = 0.0;
247  for (std::size_t i = std::size_t(3); i <= r; ++i) {
248  log2Cnr = log2Cnr1 + std::log2(q_r);
249  log2Cnr1 = log2Cnr;
250  k_r = 1.0 / q_r;
251  q_r = 1.0 + k_r * (n / (i - 1.0));
252  }
253 
254  // if we use a cache, update it
255  if (use_cache__) {
256  cache__.insert(std::pair< std::size_t, double >{r, n}, log2Cnr);
257  }
258 
259  return log2Cnr;
260  }
261  }
262 
263 
264  /// the function used to write the cpp file with the values of log2(Cnr)
265  template < template < typename > class ALLOC >
266  void
267  VariableLog2ParamComplexity< ALLOC >::CnrToFile(const std::string& filename) {
268  // save all the value of cn2
269  std::vector< long double > cn2_table(VariableLog2ParamComplexityCTableNSize);
270  cn2_table[0] = 1;
271  cn2_table[1] = 2;
272 
273  // for every value of n less than Szpankowski_threshold, we compute the
274  // value of C_n^2 and write it into cn2_table
275  GammaLog2 gamma_log2;
276  for (double n = 2; n < VariableLog2ParamComplexityCTableNSize; ++n) {
277  // here, note that, in Silander, Roos, Kontkanen and Myllymaki (2007)
278  // "Factorized Normalized Maximum Likelihood Criterion for Learning
279  // Bayesian network Structures" paper, there is an uppercase N in the
280  // formula, but this should be a lowercase n. In addition, we will loop
281  // only on h=1 to n-1 and add to 2.0 the value computed to take into
282  // account of h=0 and h=n.
283  long double cn2 = 2;
284  for (double h = 1; h < n; ++h) {
285  long double elt
286  = (gamma_log2(n + 1) - gamma_log2(h + 1) - gamma_log2((n - h) + 1))
287  * M_LN2
288  + h * std::log(h / n) + (n - h) * std::log((n - h) / n);
289  cn2 += std::exp(elt);
290  }
291 
292  // const double logCn2 = (double) std::log2 ( cn2 );
293  cn2_table[n] = cn2;
294  }
295 
296  // write the header of the output file
297  std::ofstream outfile(filename);
298  if (!outfile.is_open()) {
299  GUM_ERROR(IOError, "It is impossible to open file " << filename);
300  }
301  outfile.precision(20);
302  outfile << "namespace gum {\n\n";
303  /*
304  outfile << " // the size in r of VariableLog2ParamComplexityCTable\n";
305  outfile << " const std::size_t VariableLog2ParamComplexityCTableRSize = "
306  << "4;\n\n";
307  outfile << " // the size in n of VariableLog2ParamComplexityCTable\n";
308  outfile << " const std::size_t VariableLog2ParamComplexityCTableNSize = "
309  << VariableLog2ParamComplexityCTableNSize << ";\n\n";
310  */
311  outfile << " // the CTable cache for log2(Cnr), n < "
312  << VariableLog2ParamComplexityCTableNSize << " and r in {2,3,4,5}\n";
313  outfile << " const double VariableLog2ParamComplexityCTable[4]["
314  << VariableLog2ParamComplexityCTableNSize << "] = {\n";
315 
316  // write the values of Cn2:
317  outfile << " { ";
318  bool first = true;
319  for (const auto cn2: cn2_table) {
320  if (first)
321  first = false;
322  else
323  outfile << ",\n ";
324  const double logCn2 = (double)std::log2(cn2);
325  outfile << logCn2;
326  }
327  outfile << " },\n";
328 
329  // write the values of cn3, which are equal to cn2 + n
330  outfile << " { ";
331  for (std::size_t i = std::size_t(0);
332  i < VariableLog2ParamComplexityCTableNSize;
333  ++i) {
334  if (i > std::size_t(0)) outfile << ",\n ";
335  const double logCn3 = (double)std::log2(cn2_table[i] + i);
336  outfile << logCn3;
337  }
338  outfile << " },\n";
339 
340  // write the values of cn4, which are equal to cn2 * (1 + n/2) + n
341  outfile << " { ";
342  for (std::size_t i = std::size_t(0);
343  i < VariableLog2ParamComplexityCTableNSize;
344  ++i) {
345  if (i > std::size_t(0)) outfile << ",\n ";
346  const double logCn4 = (double)std::log2(cn2_table[i] * (1.0 + i / 2.0) + i);
347  outfile << logCn4;
348  }
349  outfile << " },\n";
350 
351  // write the values of cn5, which are equal to cn2 * (1 + 5n/6) + n + n^2/3
352  outfile << " { ";
353  for (std::size_t i = std::size_t(0);
354  i < VariableLog2ParamComplexityCTableNSize;
355  ++i) {
356  if (i > std::size_t(0)) outfile << ",\n ";
357  const double logCn5 = (double)std::log2(cn2_table[i] * (1.0 + 5.0 * i / 6.0)
358  + i + i * i / 3.0);
359  outfile << logCn5;
360  }
361  outfile << " }\n";
362 
363  // write the footer and close the file
364  outfile << " };\n\n";
365  outfile << "} /* namespace gum */\n";
366  outfile.close();
367  }
368 
369 
370 } /* namespace gum */
371 
372 #endif /* DOXYGEN_SHOULD_SKIP_THIS */