aGrUM  0.20.2
a C++ library for (probabilistic) graphical models
kNML.h
Go to the documentation of this file.
1 /**
2  *
3  * Copyright 2005-2020 Pierre-Henri WUILLEMIN(@LIP6) & Christophe GONZALES(@AMU)
4  * info_at_agrum_dot_org
5  *
6  * This library is free software: you can redistribute it and/or modify
7  * it under the terms of the GNU Lesser General Public License as published by
8  * the Free Software Foundation, either version 3 of the License, or
9  * (at your option) any later version.
10  *
11  * This library is distributed in the hope that it will be useful,
12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14  * GNU Lesser General Public License for more details.
15  *
16  * You should have received a copy of the GNU Lesser General Public License
17  * along with this library. If not, see <http://www.gnu.org/licenses/>.
18  *
19  */
20 
21 
22 /**
23  * @file
24  * @brief The class for the NML penalty used in 3off2
25  *
26  * @author Christophe GONZALES(@AMU) and Pierre-Henri WUILLEMIN(@LIP6)
27  */
28 
29 #ifndef GUM_LEARNING_K_NML_H
30 #define GUM_LEARNING_K_NML_H
31 
32 #include <vector>
33 
34 #include <agrum/tools/core/math/math_utils.h>
35 #include <agrum/tools/core/math/variableLog2ParamComplexity.h>
36 #include <agrum/tools/stattests/independenceTest.h>
37 
38 namespace gum {
39 
40  namespace learning {
41 
42 
43  /** @class KNML
44  * @brief the class for computing the NML penalty used by 3off2
45  * @ingroup learning_scores
46  *
47  */
48  template < template < typename > class ALLOC = std::allocator >
49  class KNML: private IndependenceTest< ALLOC > {
50  public:
51  /// type for the allocators passed in arguments of methods
53 
54  // ##########################################################################
55  /// @name Constructors / Destructors
56  // ##########################################################################
57  /// @{
58 
59  /// default constructor
60  /** @param parser the parser used to parse the database
61  * @param apriori An apriori that we add to the computation of
62  * the score (this should come from expert knowledge): this consists in
63  * adding numbers to countings in the contingency tables
64  * @param ranges a set of pairs {(X1,Y1),...,(Xn,Yn)} of database's rows
65  * indices. The countings are then performed only on the union of the
66  * rows [Xi,Yi), i in {1,...,n}. This is useful, e.g, when performing
67  * cross validation tasks, in which part of the database should be ignored.
68  * An empty set of ranges is equivalent to an interval [X,Y) ranging over
69  * the whole database.
70  * @param nodeId2Columns a mapping from the ids of the nodes in the
71  * graphical model to the corresponding column in the DatabaseTable
72  * parsed by the parser. This enables estimating from a database in
73  * which variable A corresponds to the 2nd column the parameters of a BN
74  * in which variable A has a NodeId of 5. An empty nodeId2Columns
75  * bijection means that the mapping is an identity, i.e., the value of a
76  * NodeId is equal to the index of the column in the DatabaseTable.
77  * @param alloc the allocator used to allocate the structures within the
78  * Score.
79  * @warning If nodeId2columns is not empty, then only the scores over the
80  * ids belonging to this bijection can be computed: applying method
81  * score() over other ids will raise exception NotFound. */
82  KNML(const DBRowGeneratorParser< ALLOC >& parser,
83  const Apriori< ALLOC >& apriori,
84  const std::vector< std::pair< std::size_t, std::size_t >,
85  ALLOC< std::pair< std::size_t, std::size_t > > >&
86  ranges,
87  const Bijection< NodeId, std::size_t, ALLOC< std::size_t > >&
89  = Bijection< NodeId, std::size_t, ALLOC< std::size_t > >(),
91 
92 
93  /// default constructor
94  /** @param parser the parser used to parse the database
95  * @param apriori An apriori that we add to the computation of
96  * the score (this should come from expert knowledge): this consists in
97  * adding numbers to countings in the contingency tables
98  * @param nodeId2Columns a mapping from the ids of the nodes in the
99  * graphical model to the corresponding column in the DatabaseTable
100  * parsed by the parser. This enables estimating from a database in
101  * which variable A corresponds to the 2nd column the parameters of a BN
102  * in which variable A has a NodeId of 5. An empty nodeId2Columns
103  * bijection means that the mapping is an identity, i.e., the value of a
104  * NodeId is equal to the index of the column in the DatabaseTable.
105  * @param alloc the allocator used to allocate the structures within the
106  * Score.
107  * @warning If nodeId2columns is not empty, then only the scores over the
108  * ids belonging to this bijection can be computed: applying method
109  * score() over other ids will raise exception NotFound. */
110  KNML(const DBRowGeneratorParser< ALLOC >& parser,
111  const Apriori< ALLOC >& apriori,
112  const Bijection< NodeId, std::size_t, ALLOC< std::size_t > >&
114  = Bijection< NodeId, std::size_t, ALLOC< std::size_t > >(),
115  const allocator_type& alloc = allocator_type());
116 
117  /// copy constructor
118  KNML(const KNML< ALLOC >& from);
119 
120  /// copy constructor with a given allocator
121  KNML(const KNML< ALLOC >& from, const allocator_type& alloc);
122 
123  /// move constructor
124  KNML(KNML< ALLOC >&& from);
125 
126  /// move constructor with a given allocator
127  KNML(KNML< ALLOC >&& from, const allocator_type& alloc);
128 
129  /// virtual copy constructor
130  virtual KNML< ALLOC >* clone() const;
131 
132  /// virtual copy constructor with a given allocator
133  virtual KNML< ALLOC >* clone(const allocator_type& alloc) const;
134 
135  /// destructor
136  virtual ~KNML();
137 
138  /// @}
139 
140 
141  // ##########################################################################
142  /// @name Operators
143  // ##########################################################################
144 
145  /// @{
146 
147  /// copy operator
148  KNML< ALLOC >& operator=(const KNML< ALLOC >& from);
149 
150  /// move operator
151  KNML< ALLOC >& operator=(KNML< ALLOC >&& from);
152 
153  /// @}
154 
155 
156  // ##########################################################################
157  /// @name Accessors / Modifiers
158  // ##########################################################################
159  /// @{
160 
161  /// changes the max number of threads used to parse the database
163 
164  /// returns the number of threads used to parse the database
165  using IndependenceTest< ALLOC >::nbThreads;
166 
167  /** @brief changes the number min of rows a thread should process in a
168  * multithreading context
169  *
170  * When computing score, several threads are used by record counters to
171  * perform countings on the rows of the database, the MinNbRowsPerThread
172  * method indicates how many rows each thread should at least process.
173  * This is used to compute the number of threads actually run. This number
174  * is equal to the min between the max number of threads allowed and the
175  * number of records in the database divided by nb. */
177 
178  /// returns the minimum of rows that each thread should process
180 
181  /// sets new ranges to perform the countings used by kNML
182  /** @param ranges a set of pairs {(X1,Y1),...,(Xn,Yn)} of database's rows
183  * indices. The countings are then performed only on the union of the
184  * rows [Xi,Yi), i in {1,...,n}. This is useful, e.g, when performing
185  * cross validation tasks, in which part of the database should be ignored.
186  * An empty set of ranges is equivalent to an interval [X,Y) ranging over
187  * the whole database. */
188  using IndependenceTest< ALLOC >::setRanges;
189 
190  /// reset the ranges to the one range corresponding to the whole database
192 
193  /// returns the current ranges
194  using IndependenceTest< ALLOC >::ranges;
195 
196  /// the scores
197  using IndependenceTest< ALLOC >::score;
198 
199  /// clears all the data structures from memory, including the C_n^r cache
200  virtual void clear();
201 
202  /// clears the current C_n^r cache
203  virtual void clearCache();
204 
205  /// turn on/off the use of the C_n^r cache
206  virtual void useCache(const bool on_off);
207 
208  /// return the mapping between the columns of the database and the node ids
209  /** @warning An empty nodeId2Columns bijection means that the mapping is
210  * an identity, i.e., the value of a NodeId is equal to the index of the
211  * column in the DatabaseTable. */
213 
214  /// return the database used by the score
215  using IndependenceTest< ALLOC >::database;
216 
217  /// returns the allocator used by the score
219 
220  /// @}
221 
222 
223  protected:
224  /// returns the score for a given IdCondSet
225  /** @throws OperationNotAllowed is raised if the score does not support
226  * calling method score such an idset (due to too many/too few variables
227  * in the left hand side or the right hand side of the idset). */
228  virtual double score_(const IdCondSet< ALLOC >& idset) final;
229 
230 
231 #ifndef DOXYGEN_SHOULD_SKIP_THIS
232 
233  private:
234  /// the CTable computation
235  VariableLog2ParamComplexity< ALLOC > param_complexity__;
236 
237 #endif /* DOXYGEN_SHOULD_SKIP_THIS */
238  };
239 
240  } /* namespace learning */
241 
242 } /* namespace gum */
243 
244 
245 // always include the template implementation
246 #include <agrum/tools/stattests/kNML_tpl.h>
247 
248 #endif /* GUM_LEARNING_K_NML_H */
INLINE void emplace(Args &&... args)
Definition: set_tpl.h:669
KNML(const DBRowGeneratorParser< ALLOC > &parser, const Apriori< ALLOC > &apriori, const Bijection< NodeId, std::size_t, ALLOC< std::size_t > > &nodeId2columns=Bijection< NodeId, std::size_t, ALLOC< std::size_t > >(), const allocator_type &alloc=allocator_type())
default constructor
virtual KNML< ALLOC > * clone(const allocator_type &alloc) const
virtual copy constructor with a given allocator
KNML(KNML< ALLOC > &&from, const allocator_type &alloc)
move constructor with a given allocator
virtual void clear()
clears all the data structures from memory, including the C_n^r cache
virtual ~KNML()
destructor
KNML(const KNML< ALLOC > &from)
copy constructor
KNML(KNML< ALLOC > &&from)
move constructor
KNML(const KNML< ALLOC > &from, const allocator_type &alloc)
copy constructor with a given allocator
virtual void clearCache()
clears the current C_n^r cache
virtual double score_(const IdCondSet< ALLOC > &idset) final
returns the score for a given IdCondSet
KNML< ALLOC > & operator=(const KNML< ALLOC > &from)
copy operator
the class for computing the NML penalty used by 3off2
Definition: kNML.h:49
virtual void useCache(const bool on_off)
turn on/off the use of the C_n^r cache
Database(const std::string &filename, const BayesNet< GUM_SCALAR > &bn, const std::vector< std::string > &missing_symbols)
virtual KNML< ALLOC > * clone() const
virtual copy constructor
KNML< ALLOC > & operator=(KNML< ALLOC > &&from)
move operator
KNML(const DBRowGeneratorParser< ALLOC > &parser, const Apriori< ALLOC > &apriori, const std::vector< std::pair< std::size_t, std::size_t >, ALLOC< std::pair< std::size_t, std::size_t > > > &ranges, const Bijection< NodeId, std::size_t, ALLOC< std::size_t > > &nodeId2columns=Bijection< NodeId, std::size_t, ALLOC< std::size_t > >(), const allocator_type &alloc=allocator_type())
default constructor