39 template <
typename GUM_SCALAR >
48 for (
const auto& var :
__bn.dag()) {
49 auto name =
__bn.variable(var).name();
59 template <
typename GUM_SCALAR >
66 template <
typename GUM_SCALAR >
83 std::vector< gum::Instantiation > instantiations;
90 std::random_device rd;
91 std::mt19937 gen(rd());
92 std::uniform_real_distribution<> distro(0.0, 1.0);
97 for (
Idx i = 0; i < nbSamples; ++i) {
99 int p = int((i * 100) / nbSamples);
105 std::vector< Idx >& sample = __database.at(i);
108 const auto& var =
__bn.variable(node);
109 const auto& cpt =
__bn.cpt(node);
112 for (
auto par : dag.
parents(node))
113 inst.
chgVal(
__bn.variable(par), sample.at(par));
115 const double nb = distro(gen);
119 if (cumul >= nb)
break;
122 if (inst.
end()) inst.
chgVal(var, var.domainSize() - 1);
123 sample.at(node) = inst.
val(var);
132 std::stringstream ss;
133 ss <<
"Database of size " << nbSamples <<
" generated in " << timer.
step()
142 template <
typename GUM_SCALAR >
146 std::string csvSeparator,
147 bool checkOnAppend)
const {
152 if (csvSeparator.find(
"\n") != std::string::npos) {
154 "csvSeparator must not contain end-line characters");
157 bool includeHeader =
true;
159 std::ifstream csvFile(csvFileURL);
165 "Inconsistent variable order in csvFile when appending. You " 166 "can use setVarOrderFromCSV(url) function to get the right " 167 "order. You could also set parameter checkOnAppend=false if you " 168 "know what you are doing.");
170 includeHeader =
false;
176 auto ofstreamFlag = append ? std::ofstream::app : std::ofstream::out;
178 std::ofstream os(csvFileURL, ofstreamFlag);
179 bool firstCol =
true;
187 os <<
__bn.variable(i).name();
192 bool firstRow =
true;
207 os <<
__bn.variable(i).label(row.at(i));
218 template <
typename GUM_SCALAR >
225 std::vector< std::string > varNames;
232 for (std::size_t i = 0; i <
__nbVars; ++i) {
242 std::vector< std::string > xrow(__nbVars);
245 Idx j = __varOrder.at(i);
246 xrow[i] =
__bn.variable(j).label(row.at(j));
251 std::vector< DBTranslatedValueType > translatorType(__nbVars);
252 for (std::size_t i = 0; i <
__nbVars; ++i) {
253 translatorType[i] = db.
translator(i).getValType();
259 Idx j = __varOrder.at(i);
262 xrow[i].discr_val = std::size_t(row.at(j));
264 xrow[i].cont_val = float(row.at(j));
275 template <
typename GUM_SCALAR >
276 std::vector< std::vector< Idx > >
291 template <
typename GUM_SCALAR >
293 const std::vector< Idx >&
varOrder) {
296 "varOrder's size must be equal to the number of variables");
298 std::vector< bool > usedVars(
__nbVars,
false);
299 for (
const auto& i : varOrder) {
305 usedVars.at(i) =
true;
308 if (std::find(usedVars.begin(), usedVars.end(),
false) != usedVars.end()) {
316 template <
typename GUM_SCALAR >
318 const std::vector< std::string >&
varOrder) {
319 std::vector< Idx > varOrderIdx;
320 varOrderIdx.reserve(varOrder.size());
321 for (
const auto& vname : varOrder) {
328 template <
typename GUM_SCALAR >
330 const std::string& csvFileURL,
const std::string& csvSeparator) {
335 template <
typename GUM_SCALAR >
339 for (
const auto& v :
__bn.topologicalOrder()) {
340 varOrder.push_back(v);
346 template <
typename GUM_SCALAR >
350 for (
const auto& v :
__bn.topologicalOrder()) {
351 varOrder.push_back(v);
353 std::reverse(varOrder.begin(), varOrder.end());
358 template <
typename GUM_SCALAR >
360 std::vector< std::string >
varOrder;
361 varOrder.reserve(
__bn.size());
362 for (
const auto& var :
__bn.dag()) {
363 varOrder.push_back(
__bn.variable(var).name());
365 std::random_device rd;
366 std::mt19937 g(rd());
367 std::shuffle(varOrder.begin(), varOrder.end(), g);
373 template <
typename GUM_SCALAR >
379 template <
typename GUM_SCALAR >
380 std::vector< std::string >
382 std::vector< std::string > varNames;
392 template <
typename GUM_SCALAR >
401 template <
typename GUM_SCALAR >
403 const std::string& csvFileURL,
const std::string& csvSeparator)
const {
404 std::ifstream csvFile(csvFileURL);
417 template <
typename GUM_SCALAR >
419 std::ifstream& csvFile,
const std::string& csvSeparator)
const {
421 std::vector< std::string > header_found;
423 while (std::getline(csvFile, line)) {
425 auto pos = line.find(csvSeparator);
426 while (pos != std::string::npos) {
427 header_found.push_back(line.substr(i, pos - i));
428 pos += csvSeparator.length();
430 pos = line.find(csvSeparator, pos);
432 if (pos == std::string::npos)
433 header_found.push_back(line.substr(i, line.length()));
441 for (
const auto& hf : header_found) {
void insert(const T1 &first, const T2 &second)
Inserts a new association in the gum::Bijection.
Class representing a Bayesian Network.
const T2 & second(const T1 &first) const
Returns the second value of a pair given its first value.
Base class for every random variable.
~BNDatabaseGenerator()
destructor
Signaler2< Size, double > onProgress
Progression (percent) and time.
double __log2likelihood
log2Likelihood of generated samples
double step() const
Returns the delta time between now and the last reset() call (or the constructor).
const T1 & first(const T2 &second) const
Returns the first value of a pair given its second value.
#define GUM_EMIT1(signal, arg1)
std::vector< std::string > varOrderNames() const
returns variable order.
double drawSamples(Size nbSamples)
generate and stock database, returns log2likelihood using ProgressNotifier as notification ...
The generic class for storing (ordered) sequences of objects.
virtual void insertRow(const std::vector< std::string, ALLOC< std::string > > &new_row) final
insert a new row at the end of the database
Instantiation & chgVal(const DiscreteVariable &v, Idx newval)
Assign newval to variable v in the Instantiation.
Bijection< std::string, NodeId > __names2ids
bijection nodes names
Copyright 2005-2019 Pierre-Henri WUILLEMIN et Christophe GONZALES (LIP6) {prenom.nom}_at_lip6.fr.
Copyright 2005-2019 Pierre-Henri WUILLEMIN et Christophe GONZALES (LIP6) {prenom.nom}_at_lip6.fr.
void incVar(const DiscreteVariable &v)
Operator increment for variable v only.
DatabaseTable toDatabaseTable(bool useLabels=true) const
generates a DatabaseVectInRAM
void setTopologicalVarOrder()
set columns in topoligical order
Idx val(Idx i) const
Returns the current value of the variable at position i.
void reset()
Reset the timer.
const BayesNet< GUM_SCALAR > & __bn
Bayesian network.
#define GUM_EMIT2(signal, arg1, arg2)
const DBTranslator< ALLOC > & translator(const std::size_t k, const bool k_is_input_col=false) const
returns either the kth translator of the database table or the first one reading the kth column of th...
const NodeSet & parents(const NodeId id) const
returns the set of nodes with arc ingoing to a given node
std::vector< Idx > __varOrderFromCSV(const std::string &csvFileURL, const std::string &csvSeparator=",") const
returns varOrder from a csv file
void toCSV(const std::string &csvFileURL, bool useLabels=true, bool append=false, std::string csvSeparator=",", bool checkOnAppend=false) const
generates csv database according to bn
double log2likelihood() const
returns log2Likelihood of generated samples
std::vector< std::vector< Idx > > database() const
generates database according to bn into a std::vector
The class for storing a record in a database.
std::vector< std::vector< Idx > > __database
generated database
The class representing a tabular database as used by learning tasks.
void setVarOrder(const std::vector< Idx > &varOrder)
change columns order
std::size_t insertTranslator(const DBTranslator< ALLOC > &translator, const std::size_t input_column, const bool unique_column=true)
insert a new translator into the database table
Class for assigning/browsing values to tuples of discrete variables.
void setRandomVarOrder()
set columns in random order
void setAntiTopologicalVarOrder()
set columns in antiTopoligical order
Size __nbVars
number of variables
std::vector< Idx > varOrder() const
returns variable order indexes
Class used to compute response times for benchmark purposesThis class represents a classic timer...
Size Idx
Type for indexes.
Signaler1< const std::string &> onStop
with a possible explanation for stopping
bool __drawnSamples
whether drawSamples has been already called.
std::vector< Idx > __varOrder
variable order in generated database
void setVarOrderFromCSV(const std::string &csvFileURL, const std::string &csvSeparator=",")
change columns order according to a csv file
std::size_t Size
In aGrUM, hashed values are unsigned long int.
Copyright 2005-2019 Pierre-Henri WUILLEMIN et Christophe GONZALES (LIP6) {prenom.nom}_at_lip6.fr.
Size NodeId
Type for node ids.
BNDatabaseGenerator(const BayesNet< GUM_SCALAR > &bn)
default constructor
#define GUM_ERROR(type, msg)
bool end() const
Returns true if the Instantiation reached the end.