aGrUM  0.20.2
a C++ library for (probabilistic) graphical models
variableLog2ParamComplexity.h
Go to the documentation of this file.
1 /**
2  *
3  * Copyright 2005-2020 Pierre-Henri WUILLEMIN(@LIP6) & Christophe GONZALES(@AMU)
4  * info_at_agrum_dot_org
5  *
6  * This library is free software: you can redistribute it and/or modify
7  * it under the terms of the GNU Lesser General Public License as published by
8  * the Free Software Foundation, either version 3 of the License, or
9  * (at your option) any later version.
10  *
11  * This library is distributed in the hope that it will be useful,
12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14  * GNU Lesser General Public License for more details.
15  *
16  * You should have received a copy of the GNU Lesser General Public License
17  * along with this library. If not, see <http://www.gnu.org/licenses/>.
18  *
19  */
20 
21 
22 /** @file
23  * @brief the class for computing the log2 of the parametric complexity of
24  * an r-ary multinomial variable
25  *
26  * @author Christophe GONZALES(@AMU) and Pierre-Henri WUILLEMIN(@LIP6)
27  */
28 
29 #ifndef GUM_VARIABLE_LOG2_PARAM_COMPLEXITY_H
30 #define GUM_VARIABLE_LOG2_PARAM_COMPLEXITY_H
31 
32 #include <cstddef>
33 #include <string>
34 #include <fstream>
35 
36 #include <agrum/agrum.h>
37 #include <agrum/tools/core/math/math_utils.h>
38 #include <agrum/tools/core/hashTable.h>
39 
40 namespace gum {
41 
42 
43  // the CTable cache for log2(C_n^r), with n in {0,...,999} and r in {2,3,4,5}
44  extern const double VariableLog2ParamComplexityCTable[4][1000];
45 
46  // the size in r of the CTable cache
47  constexpr std::size_t VariableLog2ParamComplexityCTableRSize{std::size_t(4)};
48 
49  // the size in n of the CTable cache
50  constexpr std::size_t VariableLog2ParamComplexityCTableNSize{std::size_t(1000)};
51 
52 
53  /** @class VariableLog2ParamComplexity
54  * @brief the class for computing the log2 of the parametric complexity
55  * of an r-ary multinomial variable
56  * @headerfile VariableLog2ParamComplexity.h <agrum/tools/core/math/VariableLog2ParamComplexity.h>
57  * @ingroup math_group
58  *
59  * This class enables to compute the log in base 2 of the parametric
60  * complexity of a single r-ary multinomial variable, i.e., the log in
61  * base 2 of the C_N^r term used by NML scores in Bayesian network
62  * structure learning algorithm (see, e.g., Silander, Roos,
63  * Kontkanen and Myllymaki (2007) "Factorized Normalized Maximum "
64  * Likelihood Criterion for Learning Bayesian network Structures)"
65  */
66  template < template < typename > class ALLOC = std::allocator >
67  class VariableLog2ParamComplexity: private ALLOC< double > {
68  public:
69  /// type for the allocators passed in arguments of methods
70  using allocator_type = ALLOC< double >;
71 
72  // ########################################################################
73  /// @name Constructors / Destructors
74  // ########################################################################
75  /// @{
76 
77  /// default constructor
78  VariableLog2ParamComplexity(const allocator_type& alloc = allocator_type());
79 
80  /// copy constructor
81  VariableLog2ParamComplexity(const VariableLog2ParamComplexity& from);
82 
83  /// copy constructor with a given allocator
84  VariableLog2ParamComplexity(const VariableLog2ParamComplexity& from,
85  const allocator_type& alloc);
86 
87  /// move constructor
88  VariableLog2ParamComplexity(VariableLog2ParamComplexity&& from);
89 
90  /// move constructor with a given allocator
91  VariableLog2ParamComplexity(VariableLog2ParamComplexity&& from,
92  const allocator_type& alloc);
93 
94  /// virtual copy constructor
95  virtual VariableLog2ParamComplexity* clone() const;
96 
97  /// virtual copy constructor with a given allocator
98  virtual VariableLog2ParamComplexity* clone(const allocator_type& alloc) const;
99 
100  /// destructor
101  virtual ~VariableLog2ParamComplexity();
102 
103  /// @}
104 
105 
106  // ########################################################################
107  /// @name Operators
108  // ########################################################################
109  /// @{
110 
111  /// copy operator
112  VariableLog2ParamComplexity&
113  operator=(const VariableLog2ParamComplexity& from);
114 
115  /// move operator
116  VariableLog2ParamComplexity& operator=(VariableLog2ParamComplexity&& from);
117 
118  /// @}
119 
120 
121  // ########################################################################
122  /// @name Accessors / Modifiers
123  // ########################################################################
124  /// @{
125 
126  /// returns the value of the log in base 2 of Cnr
127  double log2Cnr(const std::size_t r, const double n);
128 
129  /// the function used to write the cpp file with the values of log2(Cnr)
130  void CnrToFile(const std::string& filename);
131 
132  /// indicates whether we wish to use a cache for the Cnr
133  void useCache(const bool on_off);
134 
135  /// clears the current cache
136  void clearCache();
137 
138  /// returns the allocator used by the parameterized complexity class
139  allocator_type getAllocator() const;
140 
141 
142  /// @}
143 
144  private:
145  /// the value of N above which we should use Szpankowski's approximation
146  const double Szpankowski_threshold__{VariableLog2ParamComplexityCTableNSize};
147 
148  // constants used to speed-up the computation of the Szpankowski
149  // approximation.
150  // The formula for the approximation given in Silander, Roos,
151  // Kontkanen and Myllymaki (2007) "Factorized Normalized Maximum "
152  // Likelihood Criterion for Learning Bayesian network Structures" paper
153  // is incorrect. However, the one in Kontkanen, Buntine, Myllymaki,
154  // Rissanen and Tirri (2003) "Efficient Computation of Stochastic
155  // Complexity" is correct. So we use the latter and simplify it. Thus,
156  // the approximation of log2(Cnr) is equal to:
157  // 0.5 log2(n) - 0.5 + log2(sqrt(pi)) + (sqrt(2/pi)/3) / sqrt(n) +
158  // (3/36 - 4/(9*pi)) / n.
159  // So, given the constants below, it is equal to:
160  // 0.5 * std::log2 (n) + cst1__ + cst2__ / std::sqrt(n) + cst3__ / n
161  const double cst1__ = -0.5 + std::log2(std::sqrt(M_PI));
162  const double cst2__ = std::sqrt(2.0 / M_PI) / 3.0;
163  const double cst3__ = 3.0 / 36.0 - 4.0 / (9.0 * M_PI);
164 
165  // indicates whether we should use a cache or not
166  bool use_cache__{true};
167 
168  // the cache used, eventually, to store the log2Cnr values
169  HashTable< std::pair< std::size_t, double >, double > cache__;
170  };
171 
172 } /* namespace gum */
173 
174 
175 // always include the template implementation
176 #include <agrum/tools/core/math/variableLog2ParamComplexity_tpl.h>
177 
178 #endif /* GUM_VARIABLE_LOG2_PARAM_COMPLEXITY_H */