aGrUM  0.20.3
a C++ library for (probabilistic) graphical models
variableLog2ParamComplexity.h
Go to the documentation of this file.
1 /**
2  *
3  * Copyright (c) 2005-2021 by Pierre-Henri WUILLEMIN(@LIP6) & Christophe GONZALES(@AMU)
4  * info_at_agrum_dot_org
5  *
6  * This library is free software: you can redistribute it and/or modify
7  * it under the terms of the GNU Lesser General Public License as published by
8  * the Free Software Foundation, either version 3 of the License, or
9  * (at your option) any later version.
10  *
11  * This library is distributed in the hope that it will be useful,
12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14  * GNU Lesser General Public License for more details.
15  *
16  * You should have received a copy of the GNU Lesser General Public License
17  * along with this library. If not, see <http://www.gnu.org/licenses/>.
18  *
19  */
20 
21 
22 /** @file
23  * @brief the class for computing the log2 of the parametric complexity of
24  * an r-ary multinomial variable
25  *
26  * @author Christophe GONZALES(@AMU) and Pierre-Henri WUILLEMIN(@LIP6)
27  */
28 
29 #ifndef GUM_VARIABLE_LOG2_PARAM_COMPLEXITY_H
30 #define GUM_VARIABLE_LOG2_PARAM_COMPLEXITY_H
31 
32 #include <cstddef>
33 #include <string>
34 #include <fstream>
35 
36 #include <agrum/agrum.h>
37 #include <agrum/tools/core/math/math_utils.h>
38 #include <agrum/tools/core/hashTable.h>
39 
40 namespace gum {
41 
42 
43  // the CTable cache for log2(C_n^r), with n in {0,...,999} and r in {2,3,4,5}
44  extern const double VariableLog2ParamComplexityCTable[4][1000];
45 
46  // the size in r of the CTable cache
47  constexpr std::size_t VariableLog2ParamComplexityCTableRSize{std::size_t(4)};
48 
49  // the size in n of the CTable cache
50  constexpr std::size_t VariableLog2ParamComplexityCTableNSize{std::size_t(1000)};
51 
52 
53  /** @class VariableLog2ParamComplexity
54  * @brief the class for computing the log2 of the parametric complexity
55  * of an r-ary multinomial variable
56  * @headerfile VariableLog2ParamComplexity.h <agrum/tools/core/math/VariableLog2ParamComplexity.h>
57  * @ingroup math_group
58  *
59  * This class enables to compute the log in base 2 of the parametric
60  * complexity of a single r-ary multinomial variable, i.e., the log in
61  * base 2 of the C_N^r term used by NML scores in Bayesian network
62  * structure learning algorithm (see, e.g., Silander, Roos,
63  * Kontkanen and Myllymaki (2007) "Factorized Normalized Maximum "
64  * Likelihood Criterion for Learning Bayesian network Structures)"
65  */
66  template < template < typename > class ALLOC = std::allocator >
67  class VariableLog2ParamComplexity: private ALLOC< double > {
68  public:
69  /// type for the allocators passed in arguments of methods
70  using allocator_type = ALLOC< double >;
71 
72  // ########################################################################
73  /// @name Constructors / Destructors
74  // ########################################################################
75  /// @{
76 
77  /// default constructor
78  VariableLog2ParamComplexity(const allocator_type& alloc = allocator_type());
79 
80  /// copy constructor
81  VariableLog2ParamComplexity(const VariableLog2ParamComplexity& from);
82 
83  /// copy constructor with a given allocator
84  VariableLog2ParamComplexity(const VariableLog2ParamComplexity& from,
85  const allocator_type& alloc);
86 
87  /// move constructor
88  VariableLog2ParamComplexity(VariableLog2ParamComplexity&& from);
89 
90  /// move constructor with a given allocator
91  VariableLog2ParamComplexity(VariableLog2ParamComplexity&& from, const allocator_type& alloc);
92 
93  /// virtual copy constructor
94  virtual VariableLog2ParamComplexity* clone() const;
95 
96  /// virtual copy constructor with a given allocator
97  virtual VariableLog2ParamComplexity* clone(const allocator_type& alloc) const;
98 
99  /// destructor
100  virtual ~VariableLog2ParamComplexity();
101 
102  /// @}
103 
104 
105  // ########################################################################
106  /// @name Operators
107  // ########################################################################
108  /// @{
109 
110  /// copy operator
111  VariableLog2ParamComplexity& operator=(const VariableLog2ParamComplexity& from);
112 
113  /// move operator
114  VariableLog2ParamComplexity& operator=(VariableLog2ParamComplexity&& from);
115 
116  /// @}
117 
118 
119  // ########################################################################
120  /// @name Accessors / Modifiers
121  // ########################################################################
122  /// @{
123 
124  /// returns the value of the log in base 2 of Cnr
125  double log2Cnr(const std::size_t r, const double n);
126 
127  /// the function used to write the cpp file with the values of log2(Cnr)
128  void CnrToFile(const std::string& filename);
129 
130  /// indicates whether we wish to use a cache for the Cnr
131  void useCache(const bool on_off);
132 
133  /// clears the current cache
134  void clearCache();
135 
136  /// returns the allocator used by the parameterized complexity class
137  allocator_type getAllocator() const;
138 
139 
140  /// @}
141 
142  private:
143  /// the value of N above which we should use Szpankowski's approximation
144  const double _Szpankowski_threshold_{VariableLog2ParamComplexityCTableNSize};
145 
146  // constants used to speed-up the computation of the Szpankowski
147  // approximation.
148  // The formula for the approximation given in Silander, Roos,
149  // Kontkanen and Myllymaki (2007) "Factorized Normalized Maximum "
150  // Likelihood Criterion for Learning Bayesian network Structures" paper
151  // is incorrect. However, the one in Kontkanen, Buntine, Myllymaki,
152  // Rissanen and Tirri (2003) "Efficient Computation of Stochastic
153  // Complexity" is correct. So we use the latter and simplify it. Thus,
154  // the approximation of log2(Cnr) is equal to:
155  // 0.5 log2(n) - 0.5 + log2(sqrt(pi)) + (sqrt(2/pi)/3) / sqrt(n) +
156  // (3/36 - 4/(9*pi)) / n.
157  // So, given the constants below, it is equal to:
158  // 0.5 * std::log2 (n) + _cst1_ + _cst2_ / std::sqrt(n) + _cst3_ / n
159  const double _cst1_ = -0.5 + std::log2(std::sqrt(M_PI));
160  const double _cst2_ = std::sqrt(2.0 / M_PI) / 3.0;
161  const double _cst3_ = 3.0 / 36.0 - 4.0 / (9.0 * M_PI);
162 
163  // indicates whether we should use a cache or not
164  bool _use_cache_{true};
165 
166  // the cache used, eventually, to store the log2Cnr values
167  HashTable< std::pair< std::size_t, double >, double > _cache_;
168  };
169 
170 } /* namespace gum */
171 
172 
173 // always include the template implementation
174 #include <agrum/tools/core/math/variableLog2ParamComplexity_tpl.h>
175 
176 #endif /* GUM_VARIABLE_LOG2_PARAM_COMPLEXITY_H */