aGrUM  0.14.2
variableLog2ParamComplexity_tpl.h
Go to the documentation of this file.
1 /***************************************************************************
2  * Copyright (C) 2005 by Christophe GONZALES and Pierre-Henri WUILLEMIN *
3  * {prenom.nom}_at_lip6.fr *
4  * *
5  * This program is free software; you can redistribute it and/or modify *
6  * it under the terms of the GNU General Public License as published by *
7  * the Free Software Foundation; either version 2 of the License, or *
8  * (at your option) any later version. *
9  * *
10  * This program is distributed in the hope that it will be useful, *
11  * but WITHOUT ANY WARRANTY; without even the implied warranty of *
12  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the *
13  * GNU General Public License for more details. *
14  * *
15  * You should have received a copy of the GNU General Public License *
16  * along with this program; if not, write to the *
17  * Free Software Foundation, Inc., *
18  * 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. *
19  ***************************************************************************/
27 #ifndef DOXYGEN_SHOULD_SKIP_THIS
28 
31 # include <iostream>
32 # include <sstream>
33 
34 
35 namespace gum {
36 
37 
39  template < template < typename > class ALLOC >
42  return *this;
43  }
44 
45 
47  template < template < typename > class ALLOC >
49  const allocator_type& alloc) :
50  VariableLog2ParamComplexity< ALLOC >::allocator_type(alloc),
51  __cache(1000) {
52  GUM_CONSTRUCTOR(VariableLog2ParamComplexity);
53  }
54 
55 
57  template < template < typename > class ALLOC >
59  const VariableLog2ParamComplexity< ALLOC >& from,
62  __use_cache(from.__use_cache), __cache(from.__cache) {
63  GUM_CONS_CPY(VariableLog2ParamComplexity);
64  }
65 
66 
68  template < template < typename > class ALLOC >
70  const VariableLog2ParamComplexity< ALLOC >& from) :
72 
73 
75  template < template < typename > class ALLOC >
77  VariableLog2ParamComplexity< ALLOC >&& from,
80  __use_cache(from.__use_cache), __cache(std::move(from.__cache)) {
81  GUM_CONS_MOV(VariableLog2ParamComplexity);
82  }
83 
84 
86  template < template < typename > class ALLOC >
88  VariableLog2ParamComplexity< ALLOC >&& from) :
89  VariableLog2ParamComplexity(std::move(from), this->getAllocator()) {}
90 
91 
93  template < template < typename > class ALLOC >
94  VariableLog2ParamComplexity< ALLOC >*
97  const {
98  ALLOC< VariableLog2ParamComplexity< ALLOC > > allocator(alloc);
99  VariableLog2ParamComplexity< ALLOC >* table = allocator.allocate(1);
100  try {
101  allocator.construct(table, *this);
102  } catch (...) {
103  allocator.deallocate(table, 1);
104  throw;
105  }
106  return table;
107  }
108 
109 
111  template < template < typename > class ALLOC >
112  VariableLog2ParamComplexity< ALLOC >*
114  return clone(this->getAllocator());
115  }
116 
117 
119  template < template < typename > class ALLOC >
121  GUM_DESTRUCTOR(VariableLog2ParamComplexity);
122  }
123 
124 
126  template < template < typename > class ALLOC >
127  INLINE VariableLog2ParamComplexity< ALLOC >&
129  operator=(const VariableLog2ParamComplexity< ALLOC >& from) {
130  return *this;
131  }
132 
133 
135  template < template < typename > class ALLOC >
136  INLINE VariableLog2ParamComplexity< ALLOC >&
138  operator=(VariableLog2ParamComplexity< ALLOC >&& from) {
139  return *this;
140  }
141 
142 
144  template < template < typename > class ALLOC >
145  INLINE void VariableLog2ParamComplexity< ALLOC >::useCache(const bool on_off) {
146  __use_cache = on_off;
147  }
148 
149 
151  template < template < typename > class ALLOC >
153  __cache.clear();
154  }
155 
156 
158  template < template < typename > class ALLOC >
159  double VariableLog2ParamComplexity< ALLOC >::log2Cnr(const std::size_t r,
160  const double n) {
161  // we know that c_n^1 = 1 for all values of n
162  // in addition, c_0^r = 1 for all values of r
163  // finally, it is easy to see that c_1^r = r for all r
164  if (r == std::size_t(1)) return 0.0; // log2(1)
165  if (n == 0.0) return 0.0; // log2(1)
166  if (n == 1.0) return std::log2((double)r); // log2(r)
167 
168  if (n < 0.0) {
169  GUM_ERROR(OutOfBounds,
170  "In the penalty of the fNML score, n must be greater "
171  << "than or equal to 0. But, here, n = " << n);
172  }
173 
175  // check if we can find the value we look for in precomputed table
176  // ScorefNMLVariableLog2ParamComplexity
177  std::size_t xn = (std::size_t)n;
179  return VariableLog2ParamComplexityCTable[r - 2][xn];
180  } else {
181  // try to find the value in the cache
182  if (__use_cache) {
183  try {
184  return __cache[std::pair< std::size_t, double >{r, n}];
185  } catch (NotFound&) {}
186  }
187 
188  // use Equation (13) of the paper to compute the value of cnr:
189  // C_n^r = C_n^{r-1} + (n / (r-2)) C_n^{r-2}
190  // as we handle only log2's of C_n^r, we have the following:
191  // let k_r be such that C_n^{r-2} = k_r * C_n^{r-1}
192  // log2 ( C_n^r ) = log2 ( C_n^{r-1} + k_r * (n/(r-2)) * C_n^{r-1} )
193  // = log2 ( C_n^{r-1} ) + log2 ( 1 + k_r * (n/(r-2)) )
194  // as k_r = C_n^{r-2} / C_n^{r-1}, we have that
195  // log2(k_r) = log2 ( C_n^{r-2} ) - log2 ( C_n^{r-1} )
196  // so, k_r = exp ( (log2(cn_^{r-2}) - log2(C_n^{r-1})) * log(2) )
197  // now, let q_r = 1 + k_r * (n/(r-2)), then
198  // C_n^r = C_n^{r-1} * q_r, or, equivalently,
199  // log2(C_n^r) = log2(C_n^{r-1}) + log2(q_r)
200  // Now, we can use the same method to compute C_n^{r+1}:
201  // k_{r+1} = C_n^{r-1} / C_n^r = 1 / q_r
202  // q_{r+1} = 1 + k_{r+1} * (n/(r-1))
203  // C_n^{r+1} = C_n^r * q_{r+1}
204  double log2Cnr1 = VariableLog2ParamComplexityCTable[3][xn]; // log(C_n^5)
205  double log2Cnr2 = VariableLog2ParamComplexityCTable[2][xn]; // log(C_n^4)
206  double log2Cnr = 0.0;
207  double k_r = std::exp((log2Cnr2 - log2Cnr1) * M_LN2);
208  double q_r = 1.0 + k_r * n / (6.0 - 2.0); // we first compute C_n^6
209  for (std::size_t i = std::size_t(6); i <= r; ++i) {
210  log2Cnr = log2Cnr1 + std::log2(q_r);
211  log2Cnr1 = log2Cnr;
212  k_r = 1.0 / q_r;
213  q_r = 1.0 + k_r * (n / (i - 1.0));
214  }
215 
216  // if we use a cache, update it
217  if (__use_cache) {
218  __cache.insert(std::pair< std::size_t, double >{r, n}, log2Cnr);
219  }
220 
221  return log2Cnr;
222  }
223  } else {
224  // try to find the value in the cache
225  if (__use_cache) {
226  try {
227  return __cache[std::pair< std::size_t, double >{r, n}];
228  } catch (NotFound&) {}
229  }
230 
231  // compute the corrected Szpankowski approximation of cn2 (see the
232  // documentation of constants cst1, cst2, cst3 in the ScorefNML header)
233  double log2Cnr1 =
234  0.5 * std::log2(n) + __cst1 + __cst2 / std::sqrt(n) + __cst3 / n;
235  if (r == std::size_t(2)) return log2Cnr1;
236 
237  // the value of log2(cn1), which is always equal to 0
238  double log2Cnr2 = 0.0;
239 
240  // use Equation (13) of the paper to compute the value of cnr
241  // (see the detail of the formulas in the above if statement)
242  double k_r = std::exp((log2Cnr2 - log2Cnr1) * M_LN2);
243  double q_r = 1.0 + k_r * n / (3.0 - 2.0); // we first compute C_n^3
244  double log2Cnr = 0.0;
245  for (std::size_t i = std::size_t(3); i <= r; ++i) {
246  log2Cnr = log2Cnr1 + std::log2(q_r);
247  log2Cnr1 = log2Cnr;
248  k_r = 1.0 / q_r;
249  q_r = 1.0 + k_r * (n / (i - 1.0));
250  }
251 
252  // if we use a cache, update it
253  if (__use_cache) {
254  __cache.insert(std::pair< std::size_t, double >{r, n}, log2Cnr);
255  }
256 
257  return log2Cnr;
258  }
259  }
260 
261 
263  template < template < typename > class ALLOC >
264  void
265  VariableLog2ParamComplexity< ALLOC >::CnrToFile(const std::string& filename) {
266  // save all the value of cn2
267  std::vector< long double > cn2_table(VariableLog2ParamComplexityCTableNSize);
268  cn2_table[0] = 1;
269  cn2_table[1] = 2;
270 
271  // for every value of n less than Szpankowski_threshold, we compute the
272  // value of C_n^2 and write it into cn2_table
273  GammaLog2 gamma_log2;
274  for (double n = 2; n < VariableLog2ParamComplexityCTableNSize; ++n) {
275  // here, note that, in Silander, Roos, Kontkanen and Myllymaki (2007)
276  // "Factorized Normalized Maximum Likelihood Criterion for Learning
277  // Bayesian Network Structures" paper, there is an uppercase N in the
278  // formula, but this should be a lowercase n. In addition, we will loop
279  // only on h=1 to n-1 and add to 2.0 the value computed to take into
280  // account of h=0 and h=n.
281  long double cn2 = 2;
282  for (double h = 1; h < n; ++h) {
283  long double elt =
284  (gamma_log2(n + 1) - gamma_log2(h + 1) - gamma_log2((n - h) + 1))
285  * M_LN2
286  + h * std::log(h / n) + (n - h) * std::log((n - h) / n);
287  cn2 += std::exp(elt);
288  }
289 
290  // const double logCn2 = (double) std::log2 ( cn2 );
291  cn2_table[n] = cn2;
292  }
293 
294  // write the header of the output file
295  std::ofstream outfile(filename);
296  if (!outfile.is_open()) {
297  GUM_ERROR(IOError, "It is impossible to open file " << filename);
298  }
299  outfile.precision(20);
300  outfile << "namespace gum {\n\n";
301  /*
302  outfile << " // the size in r of VariableLog2ParamComplexityCTable\n";
303  outfile << " const std::size_t VariableLog2ParamComplexityCTableRSize = "
304  << "4;\n\n";
305  outfile << " // the size in n of VariableLog2ParamComplexityCTable\n";
306  outfile << " const std::size_t VariableLog2ParamComplexityCTableNSize = "
307  << VariableLog2ParamComplexityCTableNSize << ";\n\n";
308  */
309  outfile << " // the CTable cache for log2(Cnr), n < "
310  << VariableLog2ParamComplexityCTableNSize << " and r in {2,3,4,5}\n";
311  outfile << " const double VariableLog2ParamComplexityCTable[4]["
312  << VariableLog2ParamComplexityCTableNSize << "] = {\n";
313 
314  // write the values of Cn2:
315  outfile << " { ";
316  bool first = true;
317  for (const auto cn2 : cn2_table) {
318  if (first)
319  first = false;
320  else
321  outfile << ",\n ";
322  const double logCn2 = (double)std::log2(cn2);
323  outfile << logCn2;
324  }
325  outfile << " },\n";
326 
327  // write the values of cn3, which are equal to cn2 + n
328  outfile << " { ";
329  for (std::size_t i = std::size_t(0);
331  ++i) {
332  if (i > std::size_t(0)) outfile << ",\n ";
333  const double logCn3 = (double)std::log2(cn2_table[i] + i);
334  outfile << logCn3;
335  }
336  outfile << " },\n";
337 
338  // write the values of cn4, which are equal to cn2 * (1 + n/2) + n
339  outfile << " { ";
340  for (std::size_t i = std::size_t(0);
342  ++i) {
343  if (i > std::size_t(0)) outfile << ",\n ";
344  const double logCn4 = (double)std::log2(cn2_table[i] * (1.0 + i / 2.0) + i);
345  outfile << logCn4;
346  }
347  outfile << " },\n";
348 
349  // write the values of cn5, which are equal to cn2 * (1 + 5n/6) + n + n^2/3
350  outfile << " { ";
351  for (std::size_t i = std::size_t(0);
353  ++i) {
354  if (i > std::size_t(0)) outfile << ",\n ";
355  const double logCn5 =
356  (double)std::log2(cn2_table[i] * (1.0 + 5.0 * i / 6.0) + i + i * i / 3.0);
357  outfile << logCn5;
358  }
359  outfile << " }\n";
360 
361  // write the footer and close the file
362  outfile << " };\n\n";
363  outfile << "} /* namespace gum */\n";
364  outfile.close();
365  }
366 
367 
368 } /* namespace gum */
369 
370 #endif /* DOXYGEN_SHOULD_SKIP_THIS */
VariableLog2ParamComplexity(const allocator_type &alloc=allocator_type())
default constructor
virtual VariableLog2ParamComplexity * clone() const
virtual copy constructor
allocator_type getAllocator() const
returns the allocator used by the parameterized complexity class
const double VariableLog2ParamComplexityCTable[4][1000]
HashTable< std::pair< std::size_t, double >, double > __cache
STL namespace.
The class for computing Log2 (Gamma(x)).
constexpr std::size_t VariableLog2ParamComplexityCTableRSize
#define M_LN2
Definition: math.h:41
gum is the global namespace for all aGrUM entities
Definition: agrum.h:25
the class for computing the log2 of the parametric complexity of an r-ary multinomial variable ...
void useCache(const bool on_off)
indicates whether we wish to use a cache for the Cnr
void CnrToFile(const std::string &filename)
the function used to write the cpp file with the values of log2(Cnr)
constexpr std::size_t VariableLog2ParamComplexityCTableNSize
ALLOC< double > allocator_type
type for the allocators passed in arguments of methods
void clear()
Removes all the elements in the hash table.
virtual ~VariableLog2ParamComplexity()
destructor
VariableLog2ParamComplexity & operator=(const VariableLog2ParamComplexity &from)
copy operator
value_type & insert(const Key &key, const Val &val)
Adds a new element (actually a copy of this element) into the hash table.
void clearCache()
clears the current cache
double log2Cnr(const std::size_t r, const double n)
returns the value of the log in base 2 of Cnr
#define GUM_ERROR(type, msg)
Definition: exceptions.h:52