aGrUM  0.21.0
a C++ library for (probabilistic) graphical models
BNDatabaseGenerator.h
Go to the documentation of this file.
1 /**
2  *
3  * Copyright (c) 2005-2021 by Pierre-Henri WUILLEMIN(@LIP6) & Christophe GONZALES(@AMU)
4  * info_at_agrum_dot_org
5  *
6  * This library is free software: you can redistribute it and/or modify
7  * it under the terms of the GNU Lesser General Public License as published by
8  * the Free Software Foundation, either version 3 of the License, or
9  * (at your option) any later version.
10  *
11  * This library is distributed in the hope that it will be useful,
12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14  * GNU Lesser General Public License for more details.
15  *
16  * You should have received a copy of the GNU Lesser General Public License
17  * along with this library. If not, see <http://www.gnu.org/licenses/>.
18  *
19  */
20 
21 
22 /** @file
23  * @brief
24  *
25  * @author Santiago CORTIJO and Pierre-Henri WUILLEMIN(@LIP6)
26  *
27  * Constructor
28  * @code
29  * gum::learning::BNDatabaseGenerator< float > dbgen(bn);
30  * @endcode
31  * (being "bn" a BayesNet<GUM_SCALAR>)
32  *
33  * CSV Generation:
34  * @code
35  * std::string csvFileName="foo.csv"
36  * gum::Size nbSamples = 100;
37  * bool useLabels = false;
38  * bool append = false;
39  * std::string csvSeparator(",");
40  * gum::learning::BNDatabaseGenerator< float > dbgen(bn);
41  * dbgen.drawSamples(nbSamples);
42  * dbgen.setRandomVarOrder();
43  * dbgen.toCSV(csvFileName, useLabels, append, csvSeparator);
44  * @endcode
45  *
46  * CSV append:
47  * @code
48  * std::string csvFileName="foo.csv"
49  * gum::Size nbSamples = 100;
50  * bool useLabels = false;
51  * bool append = true;
52  * std::string csvSeparator(":::");
53  * bool checkOnAppend = true;
54  * dbgen.drawSamples(nbSamples);
55  * dbgen.setVarOrderFromCSV(csv_file, csvSeparator);
56  * dbgen.toCSV(csv_file, useLabels, append, csvSeparator, checkOnAppend);
57  * @endcode
58  *
59  * DatabaseVectInRam mdoe:
60  * @code
61  * gum::learning::DatabaseVectInRAM database =
62  * dbgen.toDatabaseVectInRAM(useLabels);
63  * @endcode
64  *
65  */
66 
67 #ifndef GUM_BN_DATABASE_GENERATOR
68 #define GUM_BN_DATABASE_GENERATOR
69 
70 #include <agrum/BN/BayesNet.h>
71 #include <agrum/tools/core/progressNotification.h>
72 #include <agrum/tools/database/databaseTable.h>
73 #include <fstream>
74 
75 namespace gum {
76 
77  namespace learning {
78  template < typename GUM_SCALAR >
79  class BNDatabaseGenerator: public ProgressNotifier {
80  public:
81  // #######################################################################
82  /// @name Constructors / Destructors
83  // #######################################################################
84  /// @{
85 
86  /// default constructor
87  BNDatabaseGenerator(const BayesNet< GUM_SCALAR >& bn);
88 
89  /// destructor
90  ~BNDatabaseGenerator();
91 
92  /// @}
93 
94  // #######################################################################
95  /// @name Accessors / Modifiers
96  // #######################################################################
97  /// @{
98 
99 
100  /// generate and stock database, returns log2likelihood
101  /// using ProgressNotifier as notification
102  double drawSamples(Size nbSamples); // drawSamples
103 
104  /// generates csv database according to bn
105  void toCSV(const std::string& csvFileURL,
106  bool useLabels = true,
107  bool append = false,
108  std::string csvSeparator = ",",
109  bool checkOnAppend = false) const;
110 
111  /// generates a DatabaseVectInRAM
112  DatabaseTable<> toDatabaseTable(bool useLabels = true) const;
113 
114  /// generates database according to bn into a std::vector
115  /// @warning: makes a copy of the whole database
116  std::vector< std::vector< Idx > > database() const;
117 
118  /// change columns order
119  void setVarOrder(const std::vector< Idx >& varOrder);
120 
121  /// change columns order using variable names
122  void setVarOrder(const std::vector< std::string >& varOrder);
123 
124  /// change columns order according to a csv file
125  void setVarOrderFromCSV(const std::string& csvFileURL, const std::string& csvSeparator = ",");
126 
127  /// set columns in topoligical order
128  void setTopologicalVarOrder();
129 
130  /// set columns in antiTopoligical order
131  void setAntiTopologicalVarOrder();
132 
133  /// set columns in random order
134  void setRandomVarOrder();
135 
136  /// returns variable order indexes
137  std::vector< Idx > varOrder() const;
138 
139  /// returns variable order.
140  std::vector< std::string > varOrderNames() const;
141 
142  /// returns log2Likelihood of generated samples
143  double log2likelihood() const;
144 
145  /// @}
146 
147  private:
148  /// Bayesian network
149  const BayesNet< GUM_SCALAR >& _bn_;
150 
151  /// bijection nodes names
152  Bijection< std::string, NodeId > _names2ids_;
153 
154  /// number of variables
155  Size _nbVars_;
156 
157  /// generated database
158  std::vector< std::vector< Idx > > _database_;
159 
160  /// variable order in generated database
161  std::vector< Idx > _varOrder_;
162 
163  /// whether drawSamples has been already called.
164  bool _drawnSamples_ = false;
165 
166  /// log2Likelihood of generated samples
167  double _log2likelihood_ = 0;
168 
169  /// returns varOrder from a csv file
170  std::vector< Idx > _varOrderFromCSV_(const std::string& csvFileURL,
171  const std::string& csvSeparator = ",") const;
172 
173  /// returns varOrder from a csv file
174  std::vector< Idx > _varOrderFromCSV_(std::ifstream& csvFile,
175  const std::string& csvSeparator = ",") const;
176 
177  // forbidden copies / moves
178  BNDatabaseGenerator(const BNDatabaseGenerator&) = delete;
179  BNDatabaseGenerator(BNDatabaseGenerator&&) = delete;
180  BNDatabaseGenerator& operator=(const BNDatabaseGenerator&) = delete;
181  BNDatabaseGenerator& operator=(BNDatabaseGenerator&&) = delete;
182  };
183 
184 
185 #ifndef GUM_NO_EXTERN_TEMPLATE_CLASS
186  extern template class BNDatabaseGenerator< double >;
187 #endif
188 
189  } /* namespace learning */
190 } /* namespace gum */
191 
192 #include <agrum/BN/database/BNDatabaseGenerator_tpl.h>
193 #endif /* BN_DATABASE_GENERTOR */