aGrUM  0.14.2
gum::learning::BNDatabaseGenerator< GUM_SCALAR > Class Template Reference

#include <BNDatabaseGenerator.h>

+ Inheritance diagram for gum::learning::BNDatabaseGenerator< GUM_SCALAR >:
+ Collaboration diagram for gum::learning::BNDatabaseGenerator< GUM_SCALAR >:

Public Attributes

Signaler2< Size, doubleonProgress
 Progression (percent) and time. More...
 
Signaler1< const std::string &> onStop
 with a possible explanation for stopping More...
 

Public Member Functions

Constructors / Destructors
 BNDatabaseGenerator (const BayesNet< GUM_SCALAR > &bn)
 default constructor More...
 
 ~BNDatabaseGenerator ()
 destructor More...
 
Accessors / Modifiers
double drawSamples (Size nbSamples)
 generate and stock database, returns log2likelihood using ProgressNotifier as notification More...
 
void toCSV (const std::string &csvFileURL, bool useLabels=true, bool append=false, std::string csvSeparator=",", bool checkOnAppend=false) const
 generates csv database according to bn More...
 
DatabaseTable toDatabaseTable (bool useLabels=true) const
 generates a DatabaseVectInRAM More...
 
std::vector< std::vector< Idx > > database () const
 generates database according to bn into a std::vector More...
 
void setVarOrder (const std::vector< Idx > &varOrder)
 change columns order More...
 
void setVarOrder (const std::vector< std::string > &varOrder)
 change columns order using variable names More...
 
void setVarOrderFromCSV (const std::string &csvFileURL, const std::string &csvSeparator=",")
 change columns order according to a csv file More...
 
void setTopologicalVarOrder ()
 set columns in topoligical order More...
 
void setAntiTopologicalVarOrder ()
 set columns in antiTopoligical order More...
 
void setRandomVarOrder ()
 set columns in random order More...
 
std::vector< IdxvarOrder () const
 returns variable order indexes More...
 
std::vector< std::string > varOrderNames () const
 returns variable order. More...
 
double log2likelihood () const
 returns log2Likelihood of generated samples More...
 

Detailed Description

template<typename GUM_SCALAR>
class gum::learning::BNDatabaseGenerator< GUM_SCALAR >

Definition at line 77 of file BNDatabaseGenerator.h.

Constructor & Destructor Documentation

◆ BNDatabaseGenerator() [1/3]

template<typename GUM_SCALAR >
gum::learning::BNDatabaseGenerator< GUM_SCALAR >::BNDatabaseGenerator ( const BayesNet< GUM_SCALAR > &  bn)

default constructor

Definition at line 37 of file BNDatabaseGenerator_tpl.h.

References gum::learning::BNDatabaseGenerator< GUM_SCALAR >::__bn, gum::learning::BNDatabaseGenerator< GUM_SCALAR >::__names2ids, gum::learning::BNDatabaseGenerator< GUM_SCALAR >::__nbVars, gum::learning::BNDatabaseGenerator< GUM_SCALAR >::__varOrder, and gum::BijectionImplementation< T1, T2, Alloc, Gen >::insert().

38  :
39  __bn(bn) {
40  // for debugging purposes
41  GUM_CONSTRUCTOR(BNDatabaseGenerator);
42 
43  // get the node names => they will serve as ids
44  NodeId id = 0;
45  for (const auto& var : __bn.dag()) {
46  auto name = __bn.variable(var).name();
47  __names2ids.insert(name, var);
48  ++id;
49  }
50  __nbVars = id;
51  __varOrder.resize(__nbVars);
52  std::iota(__varOrder.begin(), __varOrder.end(), (Idx)0);
53  }
void insert(const T1 &first, const T2 &second)
Inserts a new association in the gum::Bijection.
Bijection< std::string, NodeId > __names2ids
bijection nodes names
const BayesNet< GUM_SCALAR > & __bn
Bayesian network.
Size Idx
Type for indexes.
Definition: types.h:50
std::vector< Idx > __varOrder
variable order in generated database
Size NodeId
Type for node ids.
Definition: graphElements.h:97
BNDatabaseGenerator(const BayesNet< GUM_SCALAR > &bn)
default constructor
+ Here is the call graph for this function:

◆ ~BNDatabaseGenerator()

template<typename GUM_SCALAR >
gum::learning::BNDatabaseGenerator< GUM_SCALAR >::~BNDatabaseGenerator ( )

destructor

Definition at line 57 of file BNDatabaseGenerator_tpl.h.

57  {
58  GUM_DESTRUCTOR(BNDatabaseGenerator);
59  }
BNDatabaseGenerator(const BayesNet< GUM_SCALAR > &bn)
default constructor

◆ BNDatabaseGenerator() [2/3]

template<typename GUM_SCALAR>
gum::learning::BNDatabaseGenerator< GUM_SCALAR >::BNDatabaseGenerator ( const BNDatabaseGenerator< GUM_SCALAR > &  )
privatedelete

◆ BNDatabaseGenerator() [3/3]

template<typename GUM_SCALAR>
gum::learning::BNDatabaseGenerator< GUM_SCALAR >::BNDatabaseGenerator ( BNDatabaseGenerator< GUM_SCALAR > &&  )
privatedelete

Member Function Documentation

◆ __varOrderFromCSV() [1/2]

template<typename GUM_SCALAR >
std::vector< Idx > gum::learning::BNDatabaseGenerator< GUM_SCALAR >::__varOrderFromCSV ( const std::string &  csvFileURL,
const std::string &  csvSeparator = "," 
) const
private

returns varOrder from a csv file

Definition at line 399 of file BNDatabaseGenerator_tpl.h.

References GUM_ERROR, and gum::learning::BNDatabaseGenerator< GUM_SCALAR >::varOrder().

Referenced by gum::learning::BNDatabaseGenerator< GUM_SCALAR >::setVarOrderFromCSV(), and gum::learning::BNDatabaseGenerator< GUM_SCALAR >::toCSV().

400  {
401  std::ifstream csvFile(csvFileURL);
402  std::vector< Idx > varOrder;
403  if (csvFile) {
404  varOrder = __varOrderFromCSV(csvFile, csvSeparator);
405  csvFile.close();
406  } else {
407  GUM_ERROR(NotFound, "csvFileURL does not exist");
408  }
409 
410  return varOrder;
411  }
std::vector< Idx > __varOrderFromCSV(const std::string &csvFileURL, const std::string &csvSeparator=",") const
returns varOrder from a csv file
std::vector< Idx > varOrder() const
returns variable order indexes
#define GUM_ERROR(type, msg)
Definition: exceptions.h:52
+ Here is the call graph for this function:
+ Here is the caller graph for this function:

◆ __varOrderFromCSV() [2/2]

template<typename GUM_SCALAR >
std::vector< Idx > gum::learning::BNDatabaseGenerator< GUM_SCALAR >::__varOrderFromCSV ( std::ifstream &  csvFile,
const std::string &  csvSeparator = "," 
) const
private

returns varOrder from a csv file

Definition at line 415 of file BNDatabaseGenerator_tpl.h.

References gum::learning::BNDatabaseGenerator< GUM_SCALAR >::__names2ids, gum::learning::BNDatabaseGenerator< GUM_SCALAR >::__nbVars, gum::BijectionImplementation< T1, T2, Alloc, Gen >::second(), and gum::learning::BNDatabaseGenerator< GUM_SCALAR >::varOrder().

416  {
417  std::string line;
418  std::vector< std::string > header_found;
419  header_found.reserve(__nbVars);
420  while (std::getline(csvFile, line)) {
421  std::size_t i = 0;
422  auto pos = line.find(csvSeparator);
423  while (pos != std::string::npos) {
424  header_found.push_back(line.substr(i, pos - i));
425  pos += csvSeparator.length();
426  i = pos;
427  pos = line.find(csvSeparator, pos);
428 
429  if (pos == std::string::npos)
430  header_found.push_back(line.substr(i, line.length()));
431  }
432  break;
433  }
434 
435  std::vector< Size > varOrder;
436  varOrder.reserve(__nbVars);
437 
438  for (const auto& hf : header_found) {
439  varOrder.push_back(__names2ids.second(hf));
440  }
441 
442  return varOrder;
443  }
const T2 & second(const T1 &first) const
Returns the second value of a pair given its first value.
Bijection< std::string, NodeId > __names2ids
bijection nodes names
std::vector< Idx > varOrder() const
returns variable order indexes
+ Here is the call graph for this function:

◆ database()

template<typename GUM_SCALAR >
std::vector< std::vector< Idx > > gum::learning::BNDatabaseGenerator< GUM_SCALAR >::database ( ) const

generates database according to bn into a std::vector

returns database using specified data order

Warning
: makes a copy of the whole database

Definition at line 274 of file BNDatabaseGenerator_tpl.h.

References gum::learning::BNDatabaseGenerator< GUM_SCALAR >::__database, gum::learning::BNDatabaseGenerator< GUM_SCALAR >::__drawnSamples, gum::learning::BNDatabaseGenerator< GUM_SCALAR >::__nbVars, gum::learning::BNDatabaseGenerator< GUM_SCALAR >::__varOrder, and GUM_ERROR.

274  {
275  if (!__drawnSamples)
276  GUM_ERROR(OperationNotAllowed, "drawSamples() must be called first.");
277 
278  auto db(__database);
279  for (Idx i = 0; i < __database.size(); ++i) {
280  for (Idx j = 0; j < __nbVars; ++j) {
281  db.at(i).at(j) = (Idx)__database.at(i).at(__varOrder.at(j));
282  }
283  }
284  return db;
285  }
std::vector< std::vector< Idx > > __database
generated database
Size Idx
Type for indexes.
Definition: types.h:50
bool __drawnSamples
whether drawSamples has been already called.
std::vector< Idx > __varOrder
variable order in generated database
#define GUM_ERROR(type, msg)
Definition: exceptions.h:52

◆ drawSamples()

template<typename GUM_SCALAR >
double gum::learning::BNDatabaseGenerator< GUM_SCALAR >::drawSamples ( Size  nbSamples)

generate and stock database, returns log2likelihood using ProgressNotifier as notification

draw instances from __bn

Definition at line 64 of file BNDatabaseGenerator_tpl.h.

References gum::learning::BNDatabaseGenerator< GUM_SCALAR >::__bn, gum::learning::BNDatabaseGenerator< GUM_SCALAR >::__database, gum::learning::BNDatabaseGenerator< GUM_SCALAR >::__drawnSamples, gum::learning::BNDatabaseGenerator< GUM_SCALAR >::__log2likelihood, gum::learning::BNDatabaseGenerator< GUM_SCALAR >::__nbVars, gum::Instantiation::chgVal(), gum::Instantiation::end(), GUM_EMIT1, GUM_EMIT2, gum::Instantiation::incVar(), gum::ProgressNotifier::onProgress, gum::ProgressNotifier::onStop, gum::ArcGraphPart::parents(), gum::Timer::reset(), gum::Timer::step(), and gum::Instantiation::val().

64  {
65  Timer timer;
66  int progress = 0;
67 
68  timer.reset();
69 
70  if (onProgress.hasListener()) {
71  GUM_EMIT2(onProgress, progress, timer.step());
72  }
73  __database.clear();
74  __database.resize(nbSamples);
75  for (auto& row : __database) {
76  row.resize(__nbVars);
77  }
78  // get the order in which the nodes will be sampled
79  const gum::Sequence< gum::NodeId >& topOrder = __bn.topologicalOrder();
80  std::vector< gum::Instantiation > instantiations;
81 
82  // create instantiations in advance
83  for (Idx node = 0; node < __nbVars; ++node)
84  instantiations.push_back(gum::Instantiation(__bn.cpt(node)));
85 
86  // create the random generator
87  std::random_device rd;
88  std::mt19937 gen(rd());
89  std::uniform_real_distribution<> distro(0.0, 1.0);
90 
91  // perform the sampling
92  __log2likelihood = 0;
93  const gum::DAG& dag = __bn.dag();
94  for (Idx i = 0; i < nbSamples; ++i) {
95  if (onProgress.hasListener()) {
96  int p = int((i * 100) / nbSamples);
97  if (p != progress) {
98  progress = p;
99  GUM_EMIT2(onProgress, progress, timer.step());
100  }
101  }
102  std::vector< Idx >& sample = __database.at(i);
103  for (Idx j = 0; j < __nbVars; ++j) {
104  const gum::NodeId node = topOrder[j];
105  const auto& var = __bn.variable(node);
106  const auto& cpt = __bn.cpt(node);
107 
108  gum::Instantiation& inst = instantiations[node];
109  for (auto par : dag.parents(node))
110  inst.chgVal(__bn.variable(par), sample.at(par));
111 
112  const double nb = distro(gen);
113  double cumul = 0.0;
114  for (inst.chgVal(var, 0); !inst.end(); inst.incVar(var)) {
115  cumul += cpt[inst];
116  if (cumul >= nb) break;
117  }
118 
119  if (inst.end()) inst.chgVal(var, var.domainSize() - 1);
120  sample.at(node) = inst.val(var);
121 
122  __log2likelihood += std::log2(__bn.cpt(node)[inst]);
123  }
124  }
125 
126  __drawnSamples = true;
127 
128  if (onProgress.hasListener()) {
129  std::stringstream ss;
130  ss << "Database of size " << nbSamples << " generated in " << timer.step()
131  << " seconds. Log2likelihood : " << __log2likelihood;
132  GUM_EMIT1(onStop, ss.str());
133  }
134 
135  return __log2likelihood;
136  }
Signaler2< Size, double > onProgress
Progression (percent) and time.
double __log2likelihood
log2Likelihood of generated samples
#define GUM_EMIT1(signal, arg1)
Definition: signaler1.h:40
The generic class for storing (ordered) sequences of objects.
Definition: sequence.h:1019
Instantiation & chgVal(const DiscreteVariable &v, Idx newval)
Assign newval to variable v in the Instantiation.
void incVar(const DiscreteVariable &v)
Operator increment for variable v only.
Idx val(Idx i) const
Returns the current value of the variable at position i.
const BayesNet< GUM_SCALAR > & __bn
Bayesian network.
#define GUM_EMIT2(signal, arg1, arg2)
Definition: signaler2.h:40
const NodeSet & parents(const NodeId id) const
returns the set of nodes with arc ingoing to a given node
std::vector< std::vector< Idx > > __database
generated database
Class for assigning/browsing values to tuples of discrete variables.
Definition: instantiation.h:80
Signaler1< const std::string &> onStop
with a possible explanation for stopping
bool __drawnSamples
whether drawSamples has been already called.
Base class for dag.
Definition: DAG.h:99
Size NodeId
Type for node ids.
Definition: graphElements.h:97
bool end() const
Returns true if the Instantiation reached the end.
+ Here is the call graph for this function:

◆ log2likelihood()

template<typename GUM_SCALAR >
double gum::learning::BNDatabaseGenerator< GUM_SCALAR >::log2likelihood ( ) const

returns log2Likelihood of generated samples

Definition at line 390 of file BNDatabaseGenerator_tpl.h.

References gum::learning::BNDatabaseGenerator< GUM_SCALAR >::__drawnSamples, gum::learning::BNDatabaseGenerator< GUM_SCALAR >::__log2likelihood, and GUM_ERROR.

390  {
391  if (!__drawnSamples) {
392  GUM_ERROR(OperationNotAllowed, "drawSamples() must be called first.");
393  }
394  return __log2likelihood;
395  }
double __log2likelihood
log2Likelihood of generated samples
bool __drawnSamples
whether drawSamples has been already called.
#define GUM_ERROR(type, msg)
Definition: exceptions.h:52

◆ operator=() [1/2]

template<typename GUM_SCALAR>
BNDatabaseGenerator& gum::learning::BNDatabaseGenerator< GUM_SCALAR >::operator= ( const BNDatabaseGenerator< GUM_SCALAR > &  )
privatedelete

◆ operator=() [2/2]

template<typename GUM_SCALAR>
BNDatabaseGenerator& gum::learning::BNDatabaseGenerator< GUM_SCALAR >::operator= ( BNDatabaseGenerator< GUM_SCALAR > &&  )
privatedelete

◆ setAntiTopologicalVarOrder()

template<typename GUM_SCALAR >
void gum::learning::BNDatabaseGenerator< GUM_SCALAR >::setAntiTopologicalVarOrder ( )

set columns in antiTopoligical order

Definition at line 344 of file BNDatabaseGenerator_tpl.h.

References gum::learning::BNDatabaseGenerator< GUM_SCALAR >::__bn, gum::learning::BNDatabaseGenerator< GUM_SCALAR >::__nbVars, gum::learning::BNDatabaseGenerator< GUM_SCALAR >::setVarOrder(), and gum::learning::BNDatabaseGenerator< GUM_SCALAR >::varOrder().

344  {
345  std::vector< Idx > varOrder;
346  varOrder.reserve(__nbVars);
347  for (const auto& v : __bn.topologicalOrder()) {
348  varOrder.push_back(v);
349  }
350  std::reverse(varOrder.begin(), varOrder.end());
351  setVarOrder(varOrder);
352  }
const BayesNet< GUM_SCALAR > & __bn
Bayesian network.
void setVarOrder(const std::vector< Idx > &varOrder)
change columns order
std::vector< Idx > varOrder() const
returns variable order indexes
+ Here is the call graph for this function:

◆ setRandomVarOrder()

template<typename GUM_SCALAR >
void gum::learning::BNDatabaseGenerator< GUM_SCALAR >::setRandomVarOrder ( )

set columns in random order

Definition at line 356 of file BNDatabaseGenerator_tpl.h.

References gum::learning::BNDatabaseGenerator< GUM_SCALAR >::__bn, gum::learning::BNDatabaseGenerator< GUM_SCALAR >::setVarOrder(), and gum::learning::BNDatabaseGenerator< GUM_SCALAR >::varOrder().

356  {
357  std::vector< std::string > varOrder;
358  varOrder.reserve(__bn.size());
359  for (const auto& var : __bn.dag()) {
360  varOrder.push_back(__bn.variable(var).name());
361  }
362  std::random_device rd;
363  std::mt19937 g(rd());
364  std::shuffle(varOrder.begin(), varOrder.end(), g);
365  setVarOrder(varOrder);
366  }
const BayesNet< GUM_SCALAR > & __bn
Bayesian network.
void setVarOrder(const std::vector< Idx > &varOrder)
change columns order
std::vector< Idx > varOrder() const
returns variable order indexes
+ Here is the call graph for this function:

◆ setTopologicalVarOrder()

template<typename GUM_SCALAR >
void gum::learning::BNDatabaseGenerator< GUM_SCALAR >::setTopologicalVarOrder ( )

set columns in topoligical order

Definition at line 333 of file BNDatabaseGenerator_tpl.h.

References gum::learning::BNDatabaseGenerator< GUM_SCALAR >::__bn, gum::learning::BNDatabaseGenerator< GUM_SCALAR >::__nbVars, gum::learning::BNDatabaseGenerator< GUM_SCALAR >::setVarOrder(), and gum::learning::BNDatabaseGenerator< GUM_SCALAR >::varOrder().

333  {
334  std::vector< Idx > varOrder;
335  varOrder.reserve(__nbVars);
336  for (const auto& v : __bn.topologicalOrder()) {
337  varOrder.push_back(v);
338  }
339  setVarOrder(varOrder);
340  }
const BayesNet< GUM_SCALAR > & __bn
Bayesian network.
void setVarOrder(const std::vector< Idx > &varOrder)
change columns order
std::vector< Idx > varOrder() const
returns variable order indexes
+ Here is the call graph for this function:

◆ setVarOrder() [1/2]

template<typename GUM_SCALAR >
void gum::learning::BNDatabaseGenerator< GUM_SCALAR >::setVarOrder ( const std::vector< Idx > &  varOrder)

change columns order

Definition at line 289 of file BNDatabaseGenerator_tpl.h.

References gum::learning::BNDatabaseGenerator< GUM_SCALAR >::__nbVars, gum::learning::BNDatabaseGenerator< GUM_SCALAR >::__varOrder, GUM_ERROR, and gum::learning::BNDatabaseGenerator< GUM_SCALAR >::varOrder().

Referenced by gum::learning::BNDatabaseGenerator< GUM_SCALAR >::setAntiTopologicalVarOrder(), gum::learning::BNDatabaseGenerator< GUM_SCALAR >::setRandomVarOrder(), gum::learning::BNDatabaseGenerator< GUM_SCALAR >::setTopologicalVarOrder(), gum::learning::BNDatabaseGenerator< GUM_SCALAR >::setVarOrder(), and gum::learning::BNDatabaseGenerator< GUM_SCALAR >::setVarOrderFromCSV().

290  {
291  if (varOrder.size() != __nbVars) {
292  GUM_ERROR(FatalError,
293  "varOrder's size must be equal to the number of variables");
294  }
295  std::vector< bool > usedVars(__nbVars, false);
296  for (const auto& i : varOrder) {
297  if (i >= __nbVars) {
298  GUM_ERROR(FatalError, "varOrder contains invalid variables");
299  }
300  if (usedVars.at(i))
301  GUM_ERROR(FatalError, "varOrder must not have repeated variables");
302  usedVars.at(i) = true;
303  }
304 
305  if (std::find(usedVars.begin(), usedVars.end(), false) != usedVars.end()) {
306  GUM_ERROR(FatalError, "varOrder must contain all variables");
307  }
308 
310  }
std::vector< Idx > varOrder() const
returns variable order indexes
std::vector< Idx > __varOrder
variable order in generated database
#define GUM_ERROR(type, msg)
Definition: exceptions.h:52
+ Here is the call graph for this function:
+ Here is the caller graph for this function:

◆ setVarOrder() [2/2]

template<typename GUM_SCALAR >
void gum::learning::BNDatabaseGenerator< GUM_SCALAR >::setVarOrder ( const std::vector< std::string > &  varOrder)

change columns order using variable names

Definition at line 314 of file BNDatabaseGenerator_tpl.h.

References gum::learning::BNDatabaseGenerator< GUM_SCALAR >::__names2ids, gum::BijectionImplementation< T1, T2, Alloc, Gen >::second(), and gum::learning::BNDatabaseGenerator< GUM_SCALAR >::setVarOrder().

315  {
316  std::vector< Idx > varOrderIdx;
317  varOrderIdx.reserve(varOrder.size());
318  for (const auto& vname : varOrder) {
319  varOrderIdx.push_back(__names2ids.second(vname));
320  }
321  setVarOrder(varOrderIdx);
322  }
const T2 & second(const T1 &first) const
Returns the second value of a pair given its first value.
Bijection< std::string, NodeId > __names2ids
bijection nodes names
void setVarOrder(const std::vector< Idx > &varOrder)
change columns order
+ Here is the call graph for this function:

◆ setVarOrderFromCSV()

template<typename GUM_SCALAR >
void gum::learning::BNDatabaseGenerator< GUM_SCALAR >::setVarOrderFromCSV ( const std::string &  csvFileURL,
const std::string &  csvSeparator = "," 
)

change columns order according to a csv file

Definition at line 326 of file BNDatabaseGenerator_tpl.h.

References gum::learning::BNDatabaseGenerator< GUM_SCALAR >::__varOrderFromCSV(), and gum::learning::BNDatabaseGenerator< GUM_SCALAR >::setVarOrder().

327  {
328  setVarOrder(__varOrderFromCSV(csvFileURL, csvSeparator));
329  }
std::vector< Idx > __varOrderFromCSV(const std::string &csvFileURL, const std::string &csvSeparator=",") const
returns varOrder from a csv file
void setVarOrder(const std::vector< Idx > &varOrder)
change columns order
+ Here is the call graph for this function:

◆ toCSV()

template<typename GUM_SCALAR >
void gum::learning::BNDatabaseGenerator< GUM_SCALAR >::toCSV ( const std::string &  csvFileURL,
bool  useLabels = true,
bool  append = false,
std::string  csvSeparator = ",",
bool  checkOnAppend = false 
) const

generates csv database according to bn

generates database, and writes csv file

Definition at line 140 of file BNDatabaseGenerator_tpl.h.

References gum::learning::BNDatabaseGenerator< GUM_SCALAR >::__bn, gum::learning::BNDatabaseGenerator< GUM_SCALAR >::__database, gum::learning::BNDatabaseGenerator< GUM_SCALAR >::__drawnSamples, gum::learning::BNDatabaseGenerator< GUM_SCALAR >::__varOrder, gum::learning::BNDatabaseGenerator< GUM_SCALAR >::__varOrderFromCSV(), GUM_ERROR, and gum::learning::BNDatabaseGenerator< GUM_SCALAR >::varOrder().

144  {
145  if (!__drawnSamples) {
146  GUM_ERROR(OperationNotAllowed, "drawSamples() must be called first.");
147  }
148 
149  if (csvSeparator.find("\n") != std::string::npos) {
150  GUM_ERROR(InvalidArgument,
151  "csvSeparator must not contain end-line characters");
152  }
153 
154  bool includeHeader = true;
155  if (append) {
156  std::ifstream csvFile(csvFileURL);
157  if (csvFile) {
158  auto varOrder = __varOrderFromCSV(csvFile, csvSeparator);
159  if (checkOnAppend && varOrder != __varOrder) {
160  GUM_ERROR(
161  OperationNotAllowed,
162  "Inconsistent variable order in csvFile when appending. You "
163  "can use setVarOrderFromCSV(url) function to get the right "
164  "order. You could also set parameter checkOnAppend=false if you "
165  "know what you are doing.");
166  }
167  includeHeader = false;
168  }
169  csvFile.close();
170  }
171 
172 
173  auto ofstreamFlag = append ? std::ofstream::app : std::ofstream::out;
174 
175  std::ofstream os(csvFileURL, ofstreamFlag);
176  bool firstCol = true;
177  if (includeHeader) {
178  for (const auto& i : __varOrder) {
179  if (firstCol) {
180  firstCol = false;
181  } else {
182  os << csvSeparator;
183  }
184  os << __bn.variable(i).name();
185  }
186  }
187  os << std::endl;
188 
189  bool firstRow = true;
190  for (const auto& row : __database) {
191  if (firstRow) {
192  firstRow = false;
193  } else {
194  os << std::endl;
195  }
196  firstCol = true;
197  for (const auto& i : __varOrder) {
198  if (firstCol) {
199  firstCol = false;
200  } else {
201  os << csvSeparator;
202  }
203  if (useLabels) {
204  os << __bn.variable(i).label(row.at(i));
205  } else {
206  os << row[i];
207  }
208  }
209  }
210 
211  os.close();
212  }
const BayesNet< GUM_SCALAR > & __bn
Bayesian network.
std::vector< Idx > __varOrderFromCSV(const std::string &csvFileURL, const std::string &csvSeparator=",") const
returns varOrder from a csv file
std::vector< std::vector< Idx > > __database
generated database
std::vector< Idx > varOrder() const
returns variable order indexes
bool __drawnSamples
whether drawSamples has been already called.
std::vector< Idx > __varOrder
variable order in generated database
#define GUM_ERROR(type, msg)
Definition: exceptions.h:52
+ Here is the call graph for this function:

◆ toDatabaseTable()

template<typename GUM_SCALAR >
DatabaseTable gum::learning::BNDatabaseGenerator< GUM_SCALAR >::toDatabaseTable ( bool  useLabels = true) const

generates a DatabaseVectInRAM

Definition at line 217 of file BNDatabaseGenerator_tpl.h.

References gum::learning::BNDatabaseGenerator< GUM_SCALAR >::__bn, gum::learning::BNDatabaseGenerator< GUM_SCALAR >::__database, gum::learning::BNDatabaseGenerator< GUM_SCALAR >::__drawnSamples, gum::learning::BNDatabaseGenerator< GUM_SCALAR >::__names2ids, gum::learning::BNDatabaseGenerator< GUM_SCALAR >::__nbVars, gum::learning::BNDatabaseGenerator< GUM_SCALAR >::__varOrder, gum::learning::DISCRETE, gum::BijectionImplementation< T1, T2, Alloc, Gen >::first(), GUM_ERROR, gum::learning::DatabaseTable< ALLOC >::insertRow(), gum::learning::DatabaseTable< ALLOC >::insertTranslator(), and gum::learning::DatabaseTable< ALLOC >::translator().

217  {
218  if (!__drawnSamples)
219  GUM_ERROR(OperationNotAllowed, "proceed() must be called first.");
220 
221  DatabaseTable<> db;
222  std::vector< std::string > varNames;
223  varNames.reserve(__nbVars);
224  for (const auto& i : __varOrder) {
225  varNames.push_back(__names2ids.first(i));
226  }
227 
228  // create the translators
229  for (std::size_t i = 0; i < __nbVars; ++i) {
230  const Variable& var = __bn.variable(__varOrder[i]);
231  db.insertTranslator(var, i);
232  }
233 
234 
235  // db.setVariableNames(varNames);
236  // db.setVariableNames(varOrderNames());
237 
238  if (useLabels) {
239  std::vector< std::string > xrow(__nbVars);
240  for (const auto& row : __database) {
241  for (Idx i = 0; i < __nbVars; ++i) {
242  Idx j = __varOrder.at(i);
243  xrow[i] = __bn.variable(j).label(row.at(j));
244  }
245  db.insertRow(xrow);
246  }
247  } else {
248  std::vector< DBTranslatedValueType > translatorType(__nbVars);
249  for (std::size_t i = 0; i < __nbVars; ++i) {
250  translatorType[i] = db.translator(i).getValType();
251  }
252  DBRow< DBTranslatedValue > xrow(__nbVars);
254  for (const auto& row : __database) {
255  for (Idx i = 0; i < __nbVars; ++i) {
256  Idx j = __varOrder.at(i);
257 
258  if (translatorType[i] == DBTranslatedValueType::DISCRETE)
259  xrow[i].discr_val = std::size_t(row.at(j));
260  else
261  xrow[i].cont_val = float(row.at(j));
262  }
263  }
264  db.insertRow(xrow, xmiss);
265  }
266 
267  return db;
268  }
const T1 & first(const T2 &second) const
Returns the first value of a pair given its second value.
Bijection< std::string, NodeId > __names2ids
bijection nodes names
const BayesNet< GUM_SCALAR > & __bn
Bayesian network.
std::vector< std::vector< Idx > > __database
generated database
The class representing a tabular database as used by learning tasks.
bool __drawnSamples
whether drawSamples has been already called.
std::vector< Idx > __varOrder
variable order in generated database
#define GUM_ERROR(type, msg)
Definition: exceptions.h:52
+ Here is the call graph for this function:

◆ varOrder()

template<typename GUM_SCALAR >
std::vector< Idx > gum::learning::BNDatabaseGenerator< GUM_SCALAR >::varOrder ( ) const

◆ varOrderNames()

template<typename GUM_SCALAR >
std::vector< std::string > gum::learning::BNDatabaseGenerator< GUM_SCALAR >::varOrderNames ( ) const

returns variable order.

Definition at line 378 of file BNDatabaseGenerator_tpl.h.

References gum::learning::BNDatabaseGenerator< GUM_SCALAR >::__names2ids, gum::learning::BNDatabaseGenerator< GUM_SCALAR >::__nbVars, gum::learning::BNDatabaseGenerator< GUM_SCALAR >::__varOrder, and gum::BijectionImplementation< T1, T2, Alloc, Gen >::first().

378  {
379  std::vector< std::string > varNames;
380  varNames.reserve(__nbVars);
381  for (const auto& i : __varOrder) {
382  varNames.push_back(__names2ids.first(i));
383  }
384 
385  return varNames;
386  }
const T1 & first(const T2 &second) const
Returns the first value of a pair given its second value.
Bijection< std::string, NodeId > __names2ids
bijection nodes names
std::vector< Idx > __varOrder
variable order in generated database
+ Here is the call graph for this function:

Member Data Documentation

◆ __bn

◆ __database

◆ __drawnSamples

◆ __log2likelihood

template<typename GUM_SCALAR>
double gum::learning::BNDatabaseGenerator< GUM_SCALAR >::__log2likelihood = 0
private

◆ __names2ids

◆ __nbVars

◆ __varOrder

◆ onProgress

Signaler2< Size, double > gum::ProgressNotifier::onProgress
inherited

Progression (percent) and time.

Definition at line 45 of file progressNotification.h.

Referenced by gum::learning::BNDatabaseGenerator< GUM_SCALAR >::drawSamples().

◆ onStop

Signaler1< const std::string& > gum::ProgressNotifier::onStop
inherited

with a possible explanation for stopping

Definition at line 48 of file progressNotification.h.

Referenced by gum::learning::BNDatabaseGenerator< GUM_SCALAR >::drawSamples().


The documentation for this class was generated from the following files: