aGrUM  0.15.1
recordCounter.h
Go to the documentation of this file.
1 
31 #ifndef GUM_LEARNING_RECORD_COUNTER_H
32 #define GUM_LEARNING_RECORD_COUNTER_H
33 
34 #include <vector>
35 #include <utility>
36 #include <sstream>
37 #include <string>
38 
39 #include <agrum/agrum.h>
40 #include <agrum/core/bijection.h>
41 #include <agrum/core/sequence.h>
42 #include <agrum/core/OMPThreads.h>
43 #include <agrum/core/threadData.h>
44 #include <agrum/graphs/DAG.h>
47 
48 
49 namespace gum {
50 
51  namespace learning {
52 
112  template < template < typename > class ALLOC = std::allocator >
114  public:
116  using allocator_type = ALLOC< NodeId >;
117 
118  // ##########################################################################
120  // ##########################################################################
122 
124 
144  const DBRowGeneratorParser< ALLOC >& parser,
145  const std::vector< std::pair< std::size_t, std::size_t >,
146  ALLOC< std::pair< std::size_t, std::size_t > > >&
147  ranges,
148  const Bijection< NodeId, std::size_t, ALLOC< std::size_t > >&
149  nodeId2columns =
150  Bijection< NodeId, std::size_t, ALLOC< std::size_t > >(),
151  const allocator_type& alloc = allocator_type());
152 
154 
168  const Bijection< NodeId, std::size_t, ALLOC< std::size_t > >&
169  nodeId2columns =
170  Bijection< NodeId, std::size_t, ALLOC< std::size_t > >(),
171  const allocator_type& alloc = allocator_type());
172 
175 
178  const allocator_type& alloc);
179 
182 
185 
187  virtual RecordCounter< ALLOC >* clone() const;
188 
190  virtual RecordCounter< ALLOC >* clone(const allocator_type& alloc) const;
191 
193  virtual ~RecordCounter();
194 
196 
197 
198  // ##########################################################################
200  // ##########################################################################
201 
203 
206 
209 
211 
212 
213  // ##########################################################################
215  // ##########################################################################
216 
218 
220  void clear();
221 
223  void setMaxNbThreads(const std::size_t nb) const;
224 
226  std::size_t nbThreads() const;
227 
237  void setMinNbRowsPerThread(const std::size_t nb) const;
238 
240  std::size_t minNbRowsPerThread() const;
241 
243 
280  const std::vector< double, ALLOC< double > >&
281  counts(const IdSet< ALLOC >& ids, const bool check_discrete_vars = false);
282 
284 
290  template < template < typename > class XALLOC >
291  void setRanges(
292  const std::vector< std::pair< std::size_t, std::size_t >,
293  XALLOC< std::pair< std::size_t, std::size_t > > >&
294  new_ranges);
295 
297  void clearRanges();
298 
300  const std::vector< std::pair< std::size_t, std::size_t >,
301  ALLOC< std::pair< std::size_t, std::size_t > > >&
302  ranges() const;
303 
305 
308  template < typename GUM_SCALAR >
309  void setBayesNet(const BayesNet< GUM_SCALAR >& new_bn);
310 
313 
315 
319  nodeId2Columns() const;
320 
322  const DatabaseTable< ALLOC >& database() const;
323 
325 
326 
327 #ifndef DOXYGEN_SHOULD_SKIP_THIS
328 
329  private:
330  // the parsers used by the threads
331  std::vector< ThreadData< DBRowGeneratorParser< ALLOC > >,
332  ALLOC< ThreadData< DBRowGeneratorParser< ALLOC > > > >
333  __parsers;
334 
335  // the set of ranges of the database's rows indices over which the user
336  // wishes to perform the countings
337  std::vector< std::pair< std::size_t, std::size_t >,
338  ALLOC< std::pair< std::size_t, std::size_t > > >
339  __ranges;
340 
341  // the ranges actually used by the threads: there is a hopefully clever
342  // algorithm that split the rows ranges into another set of ranges that
343  // are assigned to the threads. For instance, if the database has 1000
344  // rows and there are 10 threads, each one will be assed a set of 100
345  // rows. These sets are precisely what are stored in the field below
346  mutable std::vector< std::pair< std::size_t, std::size_t >,
347  ALLOC< std::pair< std::size_t, std::size_t > > >
348  __thread_ranges;
349 
350  // the mapping from the NodeIds of the variables to the indices of the
351  // columns in the database
353 
354  // the last database-parsed countings
355  std::vector< double, ALLOC< double > > __last_DB_countings;
356 
357  // the ids of the nodes for the last database-parsed countings
358  IdSet< ALLOC > __last_DB_ids;
359 
360  // the last countings deduced from __last_DB_countings
361  std::vector< double, ALLOC< double > > __last_nonDB_countings;
362 
363  // the ids of the nodes of last countings deduced from __last_DB_countings
364  IdSet< ALLOC > __last_nonDB_ids;
365 
366  // the maximal number of threads that the record counter can use
367  mutable std::size_t __max_nb_threads{
368  std::size_t(gum::getMaxNumberOfThreads())};
369 
370  // the min number of rows that a thread should process in a
371  // multithreading context
372  mutable std::size_t __min_nb_rows_per_thread{100};
373 
374  // returns a mapping from the nodes ids to the columns of the database
375  // for a given sequence of ids. This is especially convenient when
376  // __nodeId2columns is empty (which means that there is an identity mapping)
378  __getNodeIds2Columns(const IdSet< ALLOC >& ids) const;
379 
381  std::vector< double, ALLOC< double > >& __extractFromCountings(
382  const IdSet< ALLOC >& subset_ids,
383  const IdSet< ALLOC >& superset_ids,
384  const std::vector< double, ALLOC< double > >& superset_vect);
385 
387  std::vector< double, ALLOC< double > >&
388  __countFromDatabase(const IdSet< ALLOC >& ids);
389 
391  void __threadedCount(
392  const std::size_t range_begin,
393  const std::size_t range_end,
395  const std::vector< std::pair< std::size_t, std::size_t >,
396  ALLOC< std::pair< std::size_t, std::size_t > > >&
397  cols_and_offsets,
398  std::vector< double, ALLOC< double > >& countings);
399 
401 
404  template < template < typename > class XALLOC >
405  void __checkRanges(
406  const std::vector< std::pair< std::size_t, std::size_t >,
407  XALLOC< std::pair< std::size_t, std::size_t > > >&
408  new_ranges) const;
409 
411 
413  void __checkDiscreteVariables(const IdSet< ALLOC >& ids) const;
414 
416 
419  void __raiseCheckException(
420  const std::vector< std::string, ALLOC< std::string > >& bad_vars) const;
421 
423  void __dispatchRangesToThreads();
424 
425 #endif /* DOXYGEN_SHOULD_SKIP_THIS */
426  };
427 
428  } /* namespace learning */
429 
430 } /* namespace gum */
431 
434 
435 #endif /* GUM_LEARNING_RECORD_COUNTER_H */
Class representing a Bayesian Network.
Definition: BayesNet.h:78
Copyright 2005-2019 Pierre-Henri WUILLEMIN et Christophe GONZALES (LIP6) {prenom.nom}_at_lip6.fr.
virtual RecordCounter< ALLOC > * clone() const
virtual copy constructor
Copyright 2005-2019 Pierre-Henri WUILLEMIN et Christophe GONZALES (LIP6) {prenom.nom}_at_lip6.fr.
const Bijection< NodeId, std::size_t, ALLOC< std::size_t > > & nodeId2Columns() const
returns the mapping from ids to column positions in the database
void clearRanges()
reset the ranges to the one range corresponding to the whole database
std::size_t nbThreads() const
returns the number of threads used to parse the database
A class for storing a pair of sets of NodeIds, the second one corresponding to a conditional set...
Definition: idSet.h:48
allocator_type getAllocator() const
returns the allocator used
unsigned int getMaxNumberOfThreads()
Returns the maximum number of threads at any time.
Copyright 2005-2019 Pierre-Henri WUILLEMIN et Christophe GONZALES (LIP6) {prenom.nom}_at_lip6.fr.
Copyright 2005-2019 Pierre-Henri WUILLEMIN et Christophe GONZALES (LIP6) {prenom.nom}_at_lip6.fr.
Definition: agrum.h:25
void setMaxNbThreads(const std::size_t nb) const
changes the max number of threads used to parse the database
The class for generic Hash Tables.
Definition: hashTable.h:679
const std::vector< std::pair< std::size_t, std::size_t >, ALLOC< std::pair< std::size_t, std::size_t > > > & ranges() const
returns the current ranges
void clear()
clears all the last database-parsed countings from memory
std::size_t minNbRowsPerThread() const
returns the minimum of rows that each thread should process
The class that computes countings of observations from the database.
const std::vector< double, ALLOC< double > > & counts(const IdSet< ALLOC > &ids, const bool check_discrete_vars=false)
returns the counts over all the variables in an IdSet
ALLOC< NodeId > allocator_type
type for the allocators passed in arguments of methods
Copyright 2005-2019 Pierre-Henri WUILLEMIN et Christophe GONZALES (LIP6) {prenom.nom}_at_lip6.fr.
void setMinNbRowsPerThread(const std::size_t nb) const
changes the number min of rows a thread should process in a multithreading context ...
Set of pairs of elements with fast search for both elements.
Definition: bijection.h:1805
The class representing a tabular database as used by learning tasks.
virtual ~RecordCounter()
destructor
Copyright 2005-2019 Pierre-Henri WUILLEMIN et Christophe GONZALES (LIP6) {prenom.nom}_at_lip6.fr.
void setRanges(const std::vector< std::pair< std::size_t, std::size_t >, XALLOC< std::pair< std::size_t, std::size_t > > > &new_ranges)
sets new ranges to perform the countings
RecordCounter< ALLOC > & operator=(const RecordCounter< ALLOC > &from)
copy operator
The class for parsing DatabaseTable rows and generating output rows.
RecordCounter(const DBRowGeneratorParser< ALLOC > &parser, const std::vector< std::pair< std::size_t, std::size_t >, ALLOC< std::pair< std::size_t, std::size_t > > > &ranges, const Bijection< NodeId, std::size_t, ALLOC< std::size_t > > &nodeId2columns=Bijection< NodeId, std::size_t, ALLOC< std::size_t > >(), const allocator_type &alloc=allocator_type())
default constructor
the class used to read a row in the database and to transform it into a set of DBRow instances that c...
Size NodeId
Type for node ids.
Definition: graphElements.h:98
Copyright 2005-2019 Pierre-Henri WUILLEMIN et Christophe GONZALES (LIP6) {prenom.nom}_at_lip6.fr.
Copyright 2005-2019 Pierre-Henri WUILLEMIN et Christophe GONZALES (LIP6) {prenom.nom}_at_lip6.fr.
void setBayesNet(const BayesNet< GUM_SCALAR > &new_bn)
assign a new Bayes net to all the counter&#39;s generators depending on a BN
const DatabaseTable< ALLOC > & database() const
returns the database on which we perform the counts