aGrUM  0.14.2
genericBNLearner_tpl.h
Go to the documentation of this file.
1 
2 /**************************************************************************
3  * Copyright (C) 2017 by Pierre-Henri WUILLEMIN and Christophe GONZALES *
4  * {prenom.nom}_at_lip6.fr *
5  * *
6  * This program is free software; you can redistribute it and/or modify *
7  * it under the terms of the GNU General Public License as published by *
8  * the Free Software Foundation; either version 2 of the License, or *
9  * (at your option) any later version. *
10  * *
11  * This program is distributed in the hope that it will be useful, *
12  * but WITHOUT ANY WARRANTY; without even the implied warranty of *
13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the *
14  * GNU General Public License for more details. *
15  * *
16  * You should have received a copy of the GNU General Public License *
17  * along with this program; if not, write to the *
18  * Free Software Foundation, Inc., *
19  * 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. *
20  ***************************************************************************/
21 #include <algorithm>
22 
24 
25 namespace gum {
26 
27  namespace learning {
28 
29  template < typename GUM_SCALAR >
31  const std::string& filename,
32  const BayesNet< GUM_SCALAR >& bn,
33  const std::vector< std::string >& missing_symbols) {
34  // assign to each column name in the database its position
36  DBInitializerFromCSV<> initializer(filename);
37  const auto& xvar_names = initializer.variableNames();
38  std::size_t nb_vars = xvar_names.size();
39  HashTable< std::string, std::size_t > var_names(nb_vars);
40  for (std::size_t i = std::size_t(0); i < nb_vars; ++i)
41  var_names.insert(xvar_names[i], i);
42 
43  // we use the bn to insert the translators into the database table
44  std::vector< NodeId > nodes;
45  nodes.reserve(bn.dag().sizeNodes());
46  for (const auto node : bn.dag())
47  nodes.push_back(node);
48  std::sort(nodes.begin(), nodes.end());
49  try {
50  std::size_t i = std::size_t(0);
51  for (auto node : nodes) {
52  const Variable& var = bn.variable(node);
53  __database.insertTranslator(var, var_names[var.name()], missing_symbols);
54  __nodeId2cols.insert(NodeId(node), i++);
55  }
56  } catch (NotFound&) {
58  "the database does not contain variable ");
59  }
60 
61  // fill the database
62  initializer.fillDatabase(__database);
63 
64  // get the domain sizes of the variables
65  for (auto dom : __database.domainSizes())
66  __domain_sizes.push_back(dom);
67 
68  // create the parser
69  __parser =
71  }
72 
73 
74  template < typename GUM_SCALAR >
77  const std::size_t nb_vars = __database.nbVariables();
78  for (std::size_t i = 0; i < nb_vars; ++i) {
79  const DiscreteVariable& var =
80  dynamic_cast< const DiscreteVariable& >(__database.variable(i));
81  bn.add(var);
82  }
83  return bn;
84  }
85 
86 
87  template < typename GUM_SCALAR >
89  const std::string& filename,
91  const std::vector< std::string >& missing_symbols) :
92  __score_database(filename, bn, missing_symbols) {
94  GUM_CONSTRUCTOR(genericBNLearner);
95  }
96 
97 
99  template < template < typename > class XALLOC >
101  const std::vector< std::pair< std::size_t, std::size_t >,
102  XALLOC< std::pair< std::size_t, std::size_t > > >&
103  new_ranges) {
104  // use a score to detect whether the ranges are ok
106  score.setRanges(new_ranges);
107  __ranges = score.ranges();
108  }
109  } // namespace learning
110 } // namespace gum
void insert(const T1 &first, const T2 &second)
Inserts a new association in the gum::Bijection.
Class representing a Bayesian Network.
Definition: BayesNet.h:76
const std::vector< std::string, ALLOC< std::string > > & variableNames()
returns the names of the variables in the input dataset
void setRanges(const std::vector< std::pair< std::size_t, std::size_t >, XALLOC< std::pair< std::size_t, std::size_t > > > &new_ranges)
sets new ranges to perform the countings used by the score
Base class for every random variable.
Definition: variable.h:63
Database __score_database
the database to be used by the scores and parameter estimators
const DiscreteVariable & variable(NodeId id) const final
Returns a gum::DiscreteVariable given its gum::NodeId in the gum::BayesNet.
Definition: BayesNet_tpl.h:199
static void __checkFileName(const std::string &filename)
checks whether the extension of a CSV filename is correct
A class for generic framework of learning algorithms that can easily be used.
DBVector< std::size_t > domainSizes() const
returns the domain sizes of all the variables in the database table
The class used to pack sets of generators.
NodeId add(const DiscreteVariable &var)
Add a variable to the gum::BayesNet.
Definition: BayesNet_tpl.h:229
DatabaseTable __database
the database itself
Base class for discrete random variable.
gum is the global namespace for all aGrUM entities
Definition: agrum.h:25
The class for generic Hash Tables.
Definition: hashTable.h:676
the class for computing Log2-likelihood scores
std::size_t nbVariables() const noexcept
returns the number of variables (columns) of the database
genericBNLearner(const std::string &filename, const std::vector< std::string > &missing_symbols)
default constructor
void fillDatabase(DATABASE< ALLOC > &database, const bool retry_insertion=false)
fills the rows of the database table
const DatabaseTable & databaseTable() const
returns the internal database table
std::vector< std::pair< std::size_t, std::size_t > > __ranges
the set of rows&#39; ranges within the database in which learning is done
const Variable & variable(const std::size_t k, const bool k_is_input_col=false) const
returns either the kth variable of the database table or the first one corresponding to the kth colum...
std::size_t insertTranslator(const DBTranslator< ALLOC > &translator, const std::size_t input_column, const bool unique_column=true)
insert a new translator into the database table
DBRowGeneratorParser * __parser
the parser used for reading the database
A pack of learning algorithms that can easily be used.
DBRowGeneratorParser & parser()
returns the parser for the database
void useDatabaseRanges(const std::vector< std::pair< std::size_t, std::size_t >, XALLOC< std::pair< std::size_t, std::size_t > > > &new_ranges)
use a new set of database rows&#39; ranges to perform learning
The class for initializing DatabaseTable and RawDatabaseTable instances from CSV files.
BayesNet< GUM_SCALAR > __BNVars() const
std::vector< std::size_t > __domain_sizes
the domain sizes of the variables (useful to speed-up computations)
Bijection< NodeId, std::size_t > __nodeId2cols
a bijection assigning to each variable name its NodeId
Database(const std::string &file, const std::vector< std::string > &missing_symbols)
default constructor
value_type & insert(const Key &key, const Val &val)
Adds a new element (actually a copy of this element) into the hash table.
const std::string & name() const
returns the name of the variable
the class used to read a row in the database and to transform it into a set of DBRow instances that c...
const DAG & dag() const
Returns a constant reference to the dag of this Bayes Net.
Definition: DAGmodel_inl.h:60
Size NodeId
Type for node ids.
Definition: graphElements.h:97
the no a priori class: corresponds to 0 weight-sample
iterator handler() const
returns a new unsafe handler pointing to the 1st record of the database
#define GUM_ERROR(type, msg)
Definition: exceptions.h:52