aGrUM  0.20.2
a C++ library for (probabilistic) graphical models
BNDatabaseGenerator.h
Go to the documentation of this file.
1 /**
2  *
3  * Copyright 2005-2020 Pierre-Henri WUILLEMIN(@LIP6) & Christophe GONZALES(@AMU)
4  * info_at_agrum_dot_org
5  *
6  * This library is free software: you can redistribute it and/or modify
7  * it under the terms of the GNU Lesser General Public License as published by
8  * the Free Software Foundation, either version 3 of the License, or
9  * (at your option) any later version.
10  *
11  * This library is distributed in the hope that it will be useful,
12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14  * GNU Lesser General Public License for more details.
15  *
16  * You should have received a copy of the GNU Lesser General Public License
17  * along with this library. If not, see <http://www.gnu.org/licenses/>.
18  *
19  */
20 
21 
22 /** @file
23  * @brief
24  *
25  * @author Santiago CORTIJO and Pierre-Henri WUILLEMIN(@LIP6)
26  *
27  * Constructor
28  * @code
29  * gum::learning::BNDatabaseGenerator< float > dbgen(bn);
30  * @endcode
31  * (being "bn" a BayesNet<GUM_SCALAR>)
32  *
33  * CSV Generation:
34  * @code
35  * std::string csvFileName="foo.csv"
36  * gum::Size nbSamples = 100;
37  * bool useLabels = false;
38  * bool append = false;
39  * std::string csvSeparator(",");
40  * gum::learning::BNDatabaseGenerator< float > dbgen(bn);
41  * dbgen.drawSamples(nbSamples);
42  * dbgen.setRandomVarOrder();
43  * dbgen.toCSV(csvFileName, useLabels, append, csvSeparator);
44  * @endcode
45  *
46  * CSV append:
47  * @code
48  * std::string csvFileName="foo.csv"
49  * gum::Size nbSamples = 100;
50  * bool useLabels = false;
51  * bool append = true;
52  * std::string csvSeparator(":::");
53  * bool checkOnAppend = true;
54  * dbgen.drawSamples(nbSamples);
55  * dbgen.setVarOrderFromCSV(csv_file, csvSeparator);
56  * dbgen.toCSV(csv_file, useLabels, append, csvSeparator, checkOnAppend);
57  * @endcode
58  *
59  * DatabaseVectInRam mdoe:
60  * @code
61  * gum::learning::DatabaseVectInRAM database =
62  * dbgen.toDatabaseVectInRAM(useLabels);
63  * @endcode
64  *
65  */
66 
67 #ifndef GUM_BN_DATABASE_GENERATOR
68 #define GUM_BN_DATABASE_GENERATOR
69 
70 #include <agrum/BN/BayesNet.h>
71 #include <agrum/tools/core/progressNotification.h>
72 #include <agrum/tools/database/databaseTable.h>
73 #include <fstream>
74 
75 namespace gum {
76 
77  namespace learning {
78  template < typename GUM_SCALAR >
79  class BNDatabaseGenerator: public ProgressNotifier {
80  public:
81  // #######################################################################
82  /// @name Constructors / Destructors
83  // #######################################################################
84  /// @{
85 
86  /// default constructor
87  BNDatabaseGenerator(const BayesNet< GUM_SCALAR >& bn);
88 
89  /// destructor
90  ~BNDatabaseGenerator();
91 
92  /// @}
93 
94  // #######################################################################
95  /// @name Accessors / Modifiers
96  // #######################################################################
97  /// @{
98 
99 
100  /// generate and stock database, returns log2likelihood
101  /// using ProgressNotifier as notification
102  double drawSamples(Size nbSamples); // drawSamples
103 
104  /// generates csv database according to bn
105  void toCSV(const std::string& csvFileURL,
106  bool useLabels = true,
107  bool append = false,
108  std::string csvSeparator = ",",
109  bool checkOnAppend = false) const;
110 
111  /// generates a DatabaseVectInRAM
112  DatabaseTable<> toDatabaseTable(bool useLabels = true) const;
113 
114  /// generates database according to bn into a std::vector
115  /// @warning: makes a copy of the whole database
116  std::vector< std::vector< Idx > > database() const;
117 
118  /// change columns order
119  void setVarOrder(const std::vector< Idx >& varOrder);
120 
121  /// change columns order using variable names
122  void setVarOrder(const std::vector< std::string >& varOrder);
123 
124  /// change columns order according to a csv file
125  void setVarOrderFromCSV(const std::string& csvFileURL,
126  const std::string& csvSeparator = ",");
127 
128  /// set columns in topoligical order
129  void setTopologicalVarOrder();
130 
131  /// set columns in antiTopoligical order
132  void setAntiTopologicalVarOrder();
133 
134  /// set columns in random order
135  void setRandomVarOrder();
136 
137  /// returns variable order indexes
138  std::vector< Idx > varOrder() const;
139 
140  /// returns variable order.
141  std::vector< std::string > varOrderNames() const;
142 
143  /// returns log2Likelihood of generated samples
144  double log2likelihood() const;
145 
146  /// @}
147 
148  private:
149  /// Bayesian network
150  const BayesNet< GUM_SCALAR >& bn__;
151 
152  /// bijection nodes names
153  Bijection< std::string, NodeId > names2ids__;
154 
155  /// number of variables
156  Size nbVars__;
157 
158  /// generated database
159  std::vector< std::vector< Idx > > database__;
160 
161  /// variable order in generated database
162  std::vector< Idx > varOrder__;
163 
164  /// whether drawSamples has been already called.
165  bool drawnSamples__ = false;
166 
167  /// log2Likelihood of generated samples
168  double log2likelihood__ = 0;
169 
170  /// returns varOrder from a csv file
171  std::vector< Idx > varOrderFromCSV__(const std::string& csvFileURL,
172  const std::string& csvSeparator
173  = ",") const;
174 
175  /// returns varOrder from a csv file
176  std::vector< Idx > varOrderFromCSV__(std::ifstream& csvFile,
177  const std::string& csvSeparator
178  = ",") const;
179 
180  // forbidden copies / moves
181  BNDatabaseGenerator(const BNDatabaseGenerator&) = delete;
182  BNDatabaseGenerator(BNDatabaseGenerator&&) = delete;
183  BNDatabaseGenerator& operator=(const BNDatabaseGenerator&) = delete;
184  BNDatabaseGenerator& operator=(BNDatabaseGenerator&&) = delete;
185  };
186 
187 
188 #ifndef GUM_NO_EXTERN_TEMPLATE_CLASS
189  extern template class BNDatabaseGenerator< double >;
190 #endif
191 
192  } /* namespace learning */
193 } /* namespace gum */
194 
195 #include <agrum/BN/database/BNDatabaseGenerator_tpl.h>
196 #endif /* BN_DATABASE_GENERTOR */