aGrUM  0.13.3
recordCounter.h
Go to the documentation of this file.
1 /***************************************************************************
2  * Copyright (C) 2005 by Christophe GONZALES and Pierre-Henri WUILLEMIN *
3  * {prenom.nom}_at_lip6.fr *
4  * *
5  * This program is free software; you can redistribute it and/or modify *
6  * it under the terms of the GNU General Public License as published by *
7  * the Free Software Foundation; either version 2 of the License, or *
8  * (at your option) any later version. *
9  * *
10  * This program is distributed in the hope that it will be useful, *
11  * but WITHOUT ANY WARRANTY; without even the implied warranty of *
12  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the *
13  * GNU General Public License for more details. *
14  * *
15  * You should have received a copy of the GNU General Public License *
16  * along with this program; if not, write to the *
17  * Free Software Foundation, Inc., *
18  * 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. *
19  ***************************************************************************/
29 #ifndef GUM_LEARNING_RECORD_COUNTER_H
30 #define GUM_LEARNING_RECORD_COUNTER_H
31 
32 #include <algorithm>
33 #include <climits>
34 #include <cstring>
35 #include <initializer_list>
36 #include <type_traits>
37 #include <vector>
38 
39 #include <agrum/core/OMPThreads.h>
40 #include <agrum/core/bijection.h>
41 #include <agrum/core/hashTable.h>
42 #include <agrum/graphs/DAG.h>
45 
46 namespace gum {
47 
48  namespace learning {
49 
50  template < typename IdSetAlloc, typename CountAlloc >
51  class Counter;
52 
53  /* =========================================================================
54  */
55  /* === THREAD RECORD COUNTER BASE ===
56  */
57  /* =========================================================================
58  */
71  template < typename IdSetAlloc = std::allocator< Idx >,
72  typename CountAlloc = std::allocator< double > >
74  public:
75  // ##########################################################################
77  // ##########################################################################
79 
81  RecordCounterThreadBase(const std::vector< Size >& var_modalities);
82 
86 
90 
93  copyFactory() const = 0;
94 
96  virtual ~RecordCounterThreadBase();
97 
99 
100  // ##########################################################################
102  // ##########################################################################
104 
106 
107  Idx addNodeSet(const std::vector< Idx, IdSetAlloc >& ids);
108 
110  void clearNodeSets() noexcept;
111 
113  virtual void count() = 0;
114 
116  virtual Size DBSize() noexcept = 0;
117 
119  virtual void setRange(Size min_index, Size max_index) = 0;
120 
122  const std::vector< double, CountAlloc >& getCounts(Idx nodeset_id) const
123  noexcept;
124 
126 
127  protected:
129  const std::vector< Size >* _modalities{nullptr};
130 
132  std::vector< const std::vector< Idx, IdSetAlloc >* > _nodesets;
133 
135  std::vector< std::vector< double, CountAlloc > > _countings;
136 
138  static constexpr Size _cache_size{128};
139 
141  const char _align[_cache_size]{};
142  };
143 
144  /* =========================================================================
145  */
146  /* === THREAD RECORD COUNTER ===
147  */
148  /* =========================================================================
149  */
161  template < typename RowGeneratorParser,
162  typename IdSetAlloc = std::allocator< Idx >,
163  typename CountAlloc = std::allocator< double > >
165  : public RecordCounterThreadBase< IdSetAlloc, CountAlloc > {
167 
168  public:
169  // ##########################################################################
171  // ##########################################################################
173 
175  RecordCounterThread(const RowGeneratorParser& parser,
176  const std::vector< Size >& var_modalities);
177 
181  from);
182 
186 
189  copyFactory() const;
190 
192  virtual ~RecordCounterThread();
193 
195 
196  // ##########################################################################
198  // ##########################################################################
200 
202 
203  using Base::addNodeSet;
204 
206  using Base::clearNodeSets;
207 
209  void count();
210 
212  using Base::getCounts;
213 
215  Size DBSize() noexcept;
216 
218  void setRange(Size min_index, Size max_index);
219 
221  RowGeneratorParser& parser() noexcept;
222 
224 
225  private:
227  RowGeneratorParser __parser;
228  };
229 
230  /* =========================================================================
231  */
232  /* === RECORD COUNTER ===
233  */
234  /* =========================================================================
235  */
245  template < typename IdSetAlloc = std::allocator< Idx >,
246  typename CountAlloc = std::allocator< double > >
248  public:
249  // ##########################################################################
251  // ##########################################################################
253 
255  template < typename RowGeneratorParser >
256  RecordCounter(const RowGeneratorParser& parser,
257  const std::vector< Size >& var_modalities,
258  Size min_range = 0,
259  Size max_range = std::numeric_limits< Size >::max());
260 
262  RecordCounter(const RecordCounter< IdSetAlloc, CountAlloc >& from);
263 
265  RecordCounter(RecordCounter< IdSetAlloc, CountAlloc >&& from);
266 
268  ~RecordCounter();
269 
271 
272  // ##########################################################################
274  // ##########################################################################
276 
278  Idx addNodeSet(const std::vector< Idx, IdSetAlloc >& ids);
279 
281  Size DBParsedSize() noexcept;
282 
284 
288  void setRange(Size min_range, Size max_range);
289 
291 
298  void countOnSubDatabase();
299 
302  void countSubsets();
303 
305 
307  void count();
308 
310  const std::vector< double, CountAlloc >& getCounts(Idx idset) const noexcept;
311 
314  void clearNodeSets() noexcept;
315 
317  const std::vector< Size >& modalities() const;
318 
320  void setMaxNbThreads(Size nb) noexcept;
321 
323 
324  private:
325  template < typename I, typename C >
326  friend class Counter;
327 
329  const std::vector< Size >* __modalities{nullptr};
330 
332 
342 
344 
349  std::vector< const std::vector< Idx, IdSetAlloc >* > __nodesets;
350 
352 
355 
357  enum SetState {
358  NOT_SUBSET, // this is a proper nonempty superset
359  STRICT_SUBSET, // the set is included into another one
360  COPY_SET, // this set is a copy of another one
361  EMPTY_SET // the set is empty
362  };
363 
365  std::vector< SetState > __set_state;
366 
368 
369  std::vector< std::vector< double, CountAlloc > > __countings;
370 
372 
381 
384 
387  std::vector< std::pair< const IdSet< IdSetAlloc >*, Idx > > __set2thread_id;
388 
391 
394 
396  std::vector< RecordCounterThreadBase< IdSetAlloc, CountAlloc >* >
398 
400  Size __nb_thread_counters{0};
401 
403 #if defined(_OPENMP) && defined(NDEBUG)
404  Size __max_threads_number{getMaxNumberOfThreads()};
405 #else
406  Size __max_threads_number{1};
407 #endif /* NDEBUG */
408 
410  Size __min_nb_rows_per_thread{100};
411 
415 
418 
420  void __computeSubsets();
421 
422  // computes the countings of one subset from those of its superset
423  void __countOneSubset(Idx i);
424 
426  std::vector< std::vector< double, CountAlloc > >& __getCounts() noexcept;
427  };
428 
429  } /* namespace learning */
430 
431 } /* namespace gum */
432 
433 
434 extern template class gum::learning::RecordCounter<>;
435 
436 
437 // always include the template implementation
438 #include <agrum/learning/scores_and_tests/recordCounter_tpl.h>
439 
440 #endif /* GUM_LEARNING_RECORD_COUNTER_H */
const char _align[_cache_size]
used to prevent cacheline omp parallel problems
unsigned long Size
In aGrUM, hashed values are unsigned long int.
Definition: types.h:50
virtual RecordCounterThreadBase< IdSetAlloc, CountAlloc > * copyFactory() const =0
virtual copy constructor
std::vector< const std::vector< Idx, IdSetAlloc > *> _nodesets
the nodesets whose observations will be counted
virtual ~RecordCounterThreadBase()
destructor
Idx addNodeSet(const std::vector< Idx, IdSetAlloc > &ids)
adds a new target nodeset to be counted
STL namespace.
The database-independent class for preparing the computation of the number of observations of tuples ...
Definition: recordCounter.h:73
unsigned int getMaxNumberOfThreads()
Returns the maximum number of threads at any time.
A class used by RecordCounter to detect subsets of variables.
HashTable< Idx, std::vector< const IdSet< IdSetAlloc > *> > __var2idsets
a table associating to each variable the IdSets that contain it
gum is the global namespace for all aGrUM entities
Definition: agrum.h:25
std::vector< RecordCounterThreadBase< IdSetAlloc, CountAlloc > *> __thread_counters
the set of ThreadCounters
std::vector< const std::vector< Idx, IdSetAlloc > *> __nodesets
the vector of the unordered ids&#39; vectors used to generate the idsets
RecordCounterThreadBase(const std::vector< Size > &var_modalities)
default constructor
The class for generic Hash Tables.
Definition: hashTable.h:676
This class computes the number of observations of tuples of variables in the database A RecordCounter...
virtual void count()=0
update all the countings of all the nodesets by parsing the database
const std::vector< Size > * _modalities
the modalities of the variables
The class that computes countings of observations from the database.
std::vector< std::pair< const IdSet< IdSetAlloc > *, Idx > > __set2thread_id
a table associating to each IdSet its index in the threadRecordCounters
const std::vector< double, CountAlloc > & getCounts(Idx nodeset_id) const noexcept
returns the countings for the nodeset specified in argument
The counting class for all the scores used for learning (BIC, BDeu, etc) as well as for all the indep...
Definition: counter.h:108
virtual Size DBSize() noexcept=0
returns the size of the database
Set of pairs of elements with fast search for both elements.
Definition: bijection.h:1803
HashTable< const IdSet< IdSetAlloc > *, Idx > __idset2index
a hashtable associating to each IdSet its index in __set2thread_id
virtual void setRange(Size min_index, Size max_index)=0
sets the interval of records on which countings should be performed
std::vector< std::vector< double, CountAlloc > > _countings
the nodesets countings
std::vector< std::vector< double, CountAlloc > > __countings
a vector for computing the countings of the IdSets which are subsets
DAG __subset_lattice
a partial lattice indicating the relations between subsets and supersets
Wrappers for OpenMP.
static constexpr Size _cache_size
the size of the cache used to prevent cacheline omp parallel problems
Size __max_range
the number of the record after the last one taken into account
Size __min_range
the number of the first record to be taken into account during learning
The class for parsing DatabaseTable rows and generating output rows.
SetState
the possible states of a set of ids
unsigned long Idx
Type for indexes.
Definition: types.h:43
Base class for dag.
Definition: DAG.h:98
Class hash tables iterators.
Set of pairs of elements with fast search for both elements.
Base classes for directed acyclic graphs.
std::vector< SetState > __set_state
a table indicating whether each IdSet is a subset of another idSet
Bijection< IdSet< IdSetAlloc >, Idx > __idsets
the set of ordered vectors of ids + their indices in __nodesets
void clearNodeSets() noexcept
remove all the current target nodesets