aGrUM  0.16.0
variableLog2ParamComplexity_tpl.h
Go to the documentation of this file.
1 
30 #ifndef DOXYGEN_SHOULD_SKIP_THIS
31 
34 # include <iostream>
35 # include <sstream>
36 
37 
38 namespace gum {
39 
40 
42  template < template < typename > class ALLOC >
45  return *this;
46  }
47 
48 
50  template < template < typename > class ALLOC >
52  const allocator_type& alloc) :
53  VariableLog2ParamComplexity< ALLOC >::allocator_type(alloc),
54  __cache(1000) {
55  GUM_CONSTRUCTOR(VariableLog2ParamComplexity);
56  }
57 
58 
60  template < template < typename > class ALLOC >
62  const VariableLog2ParamComplexity< ALLOC >& from,
65  __use_cache(from.__use_cache), __cache(from.__cache) {
66  GUM_CONS_CPY(VariableLog2ParamComplexity);
67  }
68 
69 
71  template < template < typename > class ALLOC >
73  const VariableLog2ParamComplexity< ALLOC >& from) :
75 
76 
78  template < template < typename > class ALLOC >
80  VariableLog2ParamComplexity< ALLOC >&& from,
83  __use_cache(from.__use_cache), __cache(std::move(from.__cache)) {
84  GUM_CONS_MOV(VariableLog2ParamComplexity);
85  }
86 
87 
89  template < template < typename > class ALLOC >
91  VariableLog2ParamComplexity< ALLOC >&& from) :
92  VariableLog2ParamComplexity(std::move(from), this->getAllocator()) {}
93 
94 
96  template < template < typename > class ALLOC >
97  VariableLog2ParamComplexity< ALLOC >*
100  const {
101  ALLOC< VariableLog2ParamComplexity< ALLOC > > allocator(alloc);
102  VariableLog2ParamComplexity< ALLOC >* table = allocator.allocate(1);
103  try {
104  allocator.construct(table, *this);
105  } catch (...) {
106  allocator.deallocate(table, 1);
107  throw;
108  }
109  return table;
110  }
111 
112 
114  template < template < typename > class ALLOC >
115  VariableLog2ParamComplexity< ALLOC >*
117  return clone(this->getAllocator());
118  }
119 
120 
122  template < template < typename > class ALLOC >
124  GUM_DESTRUCTOR(VariableLog2ParamComplexity);
125  }
126 
127 
129  template < template < typename > class ALLOC >
130  INLINE VariableLog2ParamComplexity< ALLOC >&
132  operator=(const VariableLog2ParamComplexity< ALLOC >& from) {
133  return *this;
134  }
135 
136 
138  template < template < typename > class ALLOC >
139  INLINE VariableLog2ParamComplexity< ALLOC >&
141  operator=(VariableLog2ParamComplexity< ALLOC >&& from) {
142  return *this;
143  }
144 
145 
147  template < template < typename > class ALLOC >
148  INLINE void VariableLog2ParamComplexity< ALLOC >::useCache(const bool on_off) {
149  __use_cache = on_off;
150  }
151 
152 
154  template < template < typename > class ALLOC >
156  __cache.clear();
157  }
158 
159 
161  template < template < typename > class ALLOC >
162  double VariableLog2ParamComplexity< ALLOC >::log2Cnr(const std::size_t r,
163  const double n) {
164  // we know that c_n^1 = 1 for all values of n
165  // in addition, c_0^r = 1 for all values of r
166  // finally, it is easy to see that c_1^r = r for all r
167  if (r == std::size_t(1)) return 0.0; // log2(1)
168  if (n == 0.0) return 0.0; // log2(1)
169  if (n == 1.0) return std::log2((double)r); // log2(r)
170 
171  if (n < 0.0) {
172  GUM_ERROR(OutOfBounds,
173  "In the penalty of the fNML score, n must be greater "
174  << "than or equal to 0. But, here, n = " << n);
175  }
176 
178  // check if we can find the value we look for in precomputed table
179  // ScorefNMLVariableLog2ParamComplexity
180  std::size_t xn = (std::size_t)n;
182  return VariableLog2ParamComplexityCTable[r - 2][xn];
183  } else {
184  // try to find the value in the cache
185  if (__use_cache) {
186  try {
187  return __cache[std::pair< std::size_t, double >{r, n}];
188  } catch (NotFound&) {}
189  }
190 
191  // use Equation (13) of the paper to compute the value of cnr:
192  // C_n^r = C_n^{r-1} + (n / (r-2)) C_n^{r-2}
193  // as we handle only log2's of C_n^r, we have the following:
194  // let k_r be such that C_n^{r-2} = k_r * C_n^{r-1}
195  // log2 ( C_n^r ) = log2 ( C_n^{r-1} + k_r * (n/(r-2)) * C_n^{r-1} )
196  // = log2 ( C_n^{r-1} ) + log2 ( 1 + k_r * (n/(r-2)) )
197  // as k_r = C_n^{r-2} / C_n^{r-1}, we have that
198  // log2(k_r) = log2 ( C_n^{r-2} ) - log2 ( C_n^{r-1} )
199  // so, k_r = exp ( (log2(cn_^{r-2}) - log2(C_n^{r-1})) * log(2) )
200  // now, let q_r = 1 + k_r * (n/(r-2)), then
201  // C_n^r = C_n^{r-1} * q_r, or, equivalently,
202  // log2(C_n^r) = log2(C_n^{r-1}) + log2(q_r)
203  // Now, we can use the same method to compute C_n^{r+1}:
204  // k_{r+1} = C_n^{r-1} / C_n^r = 1 / q_r
205  // q_{r+1} = 1 + k_{r+1} * (n/(r-1))
206  // C_n^{r+1} = C_n^r * q_{r+1}
207  double log2Cnr1 = VariableLog2ParamComplexityCTable[3][xn]; // log(C_n^5)
208  double log2Cnr2 = VariableLog2ParamComplexityCTable[2][xn]; // log(C_n^4)
209  double log2Cnr = 0.0;
210  double k_r = std::exp((log2Cnr2 - log2Cnr1) * M_LN2);
211  double q_r = 1.0 + k_r * n / (6.0 - 2.0); // we first compute C_n^6
212  for (std::size_t i = std::size_t(6); i <= r; ++i) {
213  log2Cnr = log2Cnr1 + std::log2(q_r);
214  log2Cnr1 = log2Cnr;
215  k_r = 1.0 / q_r;
216  q_r = 1.0 + k_r * (n / (i - 1.0));
217  }
218 
219  // if we use a cache, update it
220  if (__use_cache) {
221  __cache.insert(std::pair< std::size_t, double >{r, n}, log2Cnr);
222  }
223 
224  return log2Cnr;
225  }
226  } else {
227  // try to find the value in the cache
228  if (__use_cache) {
229  try {
230  return __cache[std::pair< std::size_t, double >{r, n}];
231  } catch (NotFound&) {}
232  }
233 
234  // compute the corrected Szpankowski approximation of cn2 (see the
235  // documentation of constants cst1, cst2, cst3 in the ScorefNML header)
236  double log2Cnr1 =
237  0.5 * std::log2(n) + __cst1 + __cst2 / std::sqrt(n) + __cst3 / n;
238  if (r == std::size_t(2)) return log2Cnr1;
239 
240  // the value of log2(cn1), which is always equal to 0
241  double log2Cnr2 = 0.0;
242 
243  // use Equation (13) of the paper to compute the value of cnr
244  // (see the detail of the formulas in the above if statement)
245  double k_r = std::exp((log2Cnr2 - log2Cnr1) * M_LN2);
246  double q_r = 1.0 + k_r * n / (3.0 - 2.0); // we first compute C_n^3
247  double log2Cnr = 0.0;
248  for (std::size_t i = std::size_t(3); i <= r; ++i) {
249  log2Cnr = log2Cnr1 + std::log2(q_r);
250  log2Cnr1 = log2Cnr;
251  k_r = 1.0 / q_r;
252  q_r = 1.0 + k_r * (n / (i - 1.0));
253  }
254 
255  // if we use a cache, update it
256  if (__use_cache) {
257  __cache.insert(std::pair< std::size_t, double >{r, n}, log2Cnr);
258  }
259 
260  return log2Cnr;
261  }
262  }
263 
264 
266  template < template < typename > class ALLOC >
267  void
268  VariableLog2ParamComplexity< ALLOC >::CnrToFile(const std::string& filename) {
269  // save all the value of cn2
270  std::vector< long double > cn2_table(VariableLog2ParamComplexityCTableNSize);
271  cn2_table[0] = 1;
272  cn2_table[1] = 2;
273 
274  // for every value of n less than Szpankowski_threshold, we compute the
275  // value of C_n^2 and write it into cn2_table
276  GammaLog2 gamma_log2;
277  for (double n = 2; n < VariableLog2ParamComplexityCTableNSize; ++n) {
278  // here, note that, in Silander, Roos, Kontkanen and Myllymaki (2007)
279  // "Factorized Normalized Maximum Likelihood Criterion for Learning
280  // Bayesian Network Structures" paper, there is an uppercase N in the
281  // formula, but this should be a lowercase n. In addition, we will loop
282  // only on h=1 to n-1 and add to 2.0 the value computed to take into
283  // account of h=0 and h=n.
284  long double cn2 = 2;
285  for (double h = 1; h < n; ++h) {
286  long double elt =
287  (gamma_log2(n + 1) - gamma_log2(h + 1) - gamma_log2((n - h) + 1))
288  * M_LN2
289  + h * std::log(h / n) + (n - h) * std::log((n - h) / n);
290  cn2 += std::exp(elt);
291  }
292 
293  // const double logCn2 = (double) std::log2 ( cn2 );
294  cn2_table[n] = cn2;
295  }
296 
297  // write the header of the output file
298  std::ofstream outfile(filename);
299  if (!outfile.is_open()) {
300  GUM_ERROR(IOError, "It is impossible to open file " << filename);
301  }
302  outfile.precision(20);
303  outfile << "namespace gum {\n\n";
304  /*
305  outfile << " // the size in r of VariableLog2ParamComplexityCTable\n";
306  outfile << " const std::size_t VariableLog2ParamComplexityCTableRSize = "
307  << "4;\n\n";
308  outfile << " // the size in n of VariableLog2ParamComplexityCTable\n";
309  outfile << " const std::size_t VariableLog2ParamComplexityCTableNSize = "
310  << VariableLog2ParamComplexityCTableNSize << ";\n\n";
311  */
312  outfile << " // the CTable cache for log2(Cnr), n < "
313  << VariableLog2ParamComplexityCTableNSize << " and r in {2,3,4,5}\n";
314  outfile << " const double VariableLog2ParamComplexityCTable[4]["
315  << VariableLog2ParamComplexityCTableNSize << "] = {\n";
316 
317  // write the values of Cn2:
318  outfile << " { ";
319  bool first = true;
320  for (const auto cn2 : cn2_table) {
321  if (first)
322  first = false;
323  else
324  outfile << ",\n ";
325  const double logCn2 = (double)std::log2(cn2);
326  outfile << logCn2;
327  }
328  outfile << " },\n";
329 
330  // write the values of cn3, which are equal to cn2 + n
331  outfile << " { ";
332  for (std::size_t i = std::size_t(0);
334  ++i) {
335  if (i > std::size_t(0)) outfile << ",\n ";
336  const double logCn3 = (double)std::log2(cn2_table[i] + i);
337  outfile << logCn3;
338  }
339  outfile << " },\n";
340 
341  // write the values of cn4, which are equal to cn2 * (1 + n/2) + n
342  outfile << " { ";
343  for (std::size_t i = std::size_t(0);
345  ++i) {
346  if (i > std::size_t(0)) outfile << ",\n ";
347  const double logCn4 = (double)std::log2(cn2_table[i] * (1.0 + i / 2.0) + i);
348  outfile << logCn4;
349  }
350  outfile << " },\n";
351 
352  // write the values of cn5, which are equal to cn2 * (1 + 5n/6) + n + n^2/3
353  outfile << " { ";
354  for (std::size_t i = std::size_t(0);
356  ++i) {
357  if (i > std::size_t(0)) outfile << ",\n ";
358  const double logCn5 =
359  (double)std::log2(cn2_table[i] * (1.0 + 5.0 * i / 6.0) + i + i * i / 3.0);
360  outfile << logCn5;
361  }
362  outfile << " }\n";
363 
364  // write the footer and close the file
365  outfile << " };\n\n";
366  outfile << "} /* namespace gum */\n";
367  outfile.close();
368  }
369 
370 
371 } /* namespace gum */
372 
373 #endif /* DOXYGEN_SHOULD_SKIP_THIS */
VariableLog2ParamComplexity(const allocator_type &alloc=allocator_type())
default constructor
virtual VariableLog2ParamComplexity * clone() const
virtual copy constructor
allocator_type getAllocator() const
returns the allocator used by the parameterized complexity class
const double VariableLog2ParamComplexityCTable[4][1000]
HashTable< std::pair< std::size_t, double >, double > __cache
STL namespace.
Copyright 2005-2019 Pierre-Henri WUILLEMIN et Christophe GONZALES (LIP6) {prenom.nom}_at_lip6.fr.
constexpr std::size_t VariableLog2ParamComplexityCTableRSize
#define M_LN2
Definition: math.h:44
Copyright 2005-2019 Pierre-Henri WUILLEMIN et Christophe GONZALES (LIP6) {prenom.nom}_at_lip6.fr.
Definition: agrum.h:25
Copyright 2005-2019 Pierre-Henri WUILLEMIN et Christophe GONZALES (LIP6) {prenom.nom}_at_lip6.fr.
void useCache(const bool on_off)
indicates whether we wish to use a cache for the Cnr
void CnrToFile(const std::string &filename)
the function used to write the cpp file with the values of log2(Cnr)
constexpr std::size_t VariableLog2ParamComplexityCTableNSize
ALLOC< double > allocator_type
type for the allocators passed in arguments of methods
void clear()
Removes all the elements in the hash table.
virtual ~VariableLog2ParamComplexity()
destructor
VariableLog2ParamComplexity & operator=(const VariableLog2ParamComplexity &from)
copy operator
value_type & insert(const Key &key, const Val &val)
Adds a new element (actually a copy of this element) into the hash table.
void clearCache()
clears the current cache
double log2Cnr(const std::size_t r, const double n)
returns the value of the log in base 2 of Cnr
#define GUM_ERROR(type, msg)
Definition: exceptions.h:55