36 template <
typename GUM_SCALAR >
45 for (
const auto& var :
__bn.dag()) {
46 auto name =
__bn.variable(var).name();
56 template <
typename GUM_SCALAR >
63 template <
typename GUM_SCALAR >
80 std::vector< gum::Instantiation > instantiations;
87 std::random_device rd;
88 std::mt19937 gen(rd());
89 std::uniform_real_distribution<> distro(0.0, 1.0);
94 for (
Idx i = 0; i < nbSamples; ++i) {
96 int p = int((i * 100) / nbSamples);
102 std::vector< Idx >& sample = __database.at(i);
105 const auto& var =
__bn.variable(node);
106 const auto& cpt =
__bn.cpt(node);
109 for (
auto par : dag.
parents(node))
110 inst.
chgVal(
__bn.variable(par), sample.at(par));
112 const double nb = distro(gen);
116 if (cumul >= nb)
break;
119 if (inst.
end()) inst.
chgVal(var, var.domainSize() - 1);
120 sample.at(node) = inst.
val(var);
129 std::stringstream ss;
130 ss <<
"Database of size " << nbSamples <<
" generated in " << timer.
step()
139 template <
typename GUM_SCALAR >
143 std::string csvSeparator,
144 bool checkOnAppend)
const {
149 if (csvSeparator.find(
"\n") != std::string::npos) {
151 "csvSeparator must not contain end-line characters");
154 bool includeHeader =
true;
156 std::ifstream csvFile(csvFileURL);
162 "Inconsistent variable order in csvFile when appending. You " 163 "can use setVarOrderFromCSV(url) function to get the right " 164 "order. You could also set parameter checkOnAppend=false if you " 165 "know what you are doing.");
167 includeHeader =
false;
173 auto ofstreamFlag = append ? std::ofstream::app : std::ofstream::out;
175 std::ofstream os(csvFileURL, ofstreamFlag);
176 bool firstCol =
true;
184 os <<
__bn.variable(i).name();
189 bool firstRow =
true;
204 os <<
__bn.variable(i).label(row.at(i));
215 template <
typename GUM_SCALAR >
222 std::vector< std::string > varNames;
229 for (std::size_t i = 0; i <
__nbVars; ++i) {
239 std::vector< std::string > xrow(__nbVars);
242 Idx j = __varOrder.at(i);
243 xrow[i] =
__bn.variable(j).label(row.at(j));
248 std::vector< DBTranslatedValueType > translatorType(__nbVars);
249 for (std::size_t i = 0; i <
__nbVars; ++i) {
250 translatorType[i] = db.
translator(i).getValType();
256 Idx j = __varOrder.at(i);
259 xrow[i].discr_val = std::size_t(row.at(j));
261 xrow[i].cont_val = float(row.at(j));
272 template <
typename GUM_SCALAR >
273 std::vector< std::vector< Idx > >
288 template <
typename GUM_SCALAR >
290 const std::vector< Idx >&
varOrder) {
293 "varOrder's size must be equal to the number of variables");
295 std::vector< bool > usedVars(
__nbVars,
false);
296 for (
const auto& i : varOrder) {
302 usedVars.at(i) =
true;
305 if (std::find(usedVars.begin(), usedVars.end(),
false) != usedVars.end()) {
313 template <
typename GUM_SCALAR >
315 const std::vector< std::string >&
varOrder) {
316 std::vector< Idx > varOrderIdx;
317 varOrderIdx.reserve(varOrder.size());
318 for (
const auto& vname : varOrder) {
325 template <
typename GUM_SCALAR >
327 const std::string& csvFileURL,
const std::string& csvSeparator) {
332 template <
typename GUM_SCALAR >
336 for (
const auto& v :
__bn.topologicalOrder()) {
337 varOrder.push_back(v);
343 template <
typename GUM_SCALAR >
347 for (
const auto& v :
__bn.topologicalOrder()) {
348 varOrder.push_back(v);
350 std::reverse(varOrder.begin(), varOrder.end());
355 template <
typename GUM_SCALAR >
357 std::vector< std::string >
varOrder;
358 varOrder.reserve(
__bn.size());
359 for (
const auto& var :
__bn.dag()) {
360 varOrder.push_back(
__bn.variable(var).name());
362 std::random_device rd;
363 std::mt19937 g(rd());
364 std::shuffle(varOrder.begin(), varOrder.end(), g);
370 template <
typename GUM_SCALAR >
376 template <
typename GUM_SCALAR >
377 std::vector< std::string >
379 std::vector< std::string > varNames;
389 template <
typename GUM_SCALAR >
398 template <
typename GUM_SCALAR >
400 const std::string& csvFileURL,
const std::string& csvSeparator)
const {
401 std::ifstream csvFile(csvFileURL);
414 template <
typename GUM_SCALAR >
416 std::ifstream& csvFile,
const std::string& csvSeparator)
const {
418 std::vector< std::string > header_found;
420 while (std::getline(csvFile, line)) {
422 auto pos = line.find(csvSeparator);
423 while (pos != std::string::npos) {
424 header_found.push_back(line.substr(i, pos - i));
425 pos += csvSeparator.length();
427 pos = line.find(csvSeparator, pos);
429 if (pos == std::string::npos)
430 header_found.push_back(line.substr(i, line.length()));
438 for (
const auto& hf : header_found) {
void insert(const T1 &first, const T2 &second)
Inserts a new association in the gum::Bijection.
Class representing a Bayesian Network.
const T2 & second(const T1 &first) const
Returns the second value of a pair given its first value.
Base class for every random variable.
~BNDatabaseGenerator()
destructor
Signaler2< Size, double > onProgress
Progression (percent) and time.
double __log2likelihood
log2Likelihood of generated samples
double step() const
Returns the delta time between now and the last reset() call (or the constructor).
const T1 & first(const T2 &second) const
Returns the first value of a pair given its second value.
#define GUM_EMIT1(signal, arg1)
std::vector< std::string > varOrderNames() const
returns variable order.
double drawSamples(Size nbSamples)
generate and stock database, returns log2likelihood using ProgressNotifier as notification ...
The generic class for storing (ordered) sequences of objects.
virtual void insertRow(const std::vector< std::string, ALLOC< std::string > > &new_row) final
insert a new row at the end of the database
Instantiation & chgVal(const DiscreteVariable &v, Idx newval)
Assign newval to variable v in the Instantiation.
Bijection< std::string, NodeId > __names2ids
bijection nodes names
Class used to compute response times for benchmark purposes.
gum is the global namespace for all aGrUM entities
void incVar(const DiscreteVariable &v)
Operator increment for variable v only.
DatabaseTable toDatabaseTable(bool useLabels=true) const
generates a DatabaseVectInRAM
void setTopologicalVarOrder()
set columns in topoligical order
Idx val(Idx i) const
Returns the current value of the variable at position i.
void reset()
Reset the timer.
const BayesNet< GUM_SCALAR > & __bn
Bayesian network.
#define GUM_EMIT2(signal, arg1, arg2)
const DBTranslator< ALLOC > & translator(const std::size_t k, const bool k_is_input_col=false) const
returns either the kth translator of the database table or the first one reading the kth column of th...
const NodeSet & parents(const NodeId id) const
returns the set of nodes with arc ingoing to a given node
std::vector< Idx > __varOrderFromCSV(const std::string &csvFileURL, const std::string &csvSeparator=",") const
returns varOrder from a csv file
void toCSV(const std::string &csvFileURL, bool useLabels=true, bool append=false, std::string csvSeparator=",", bool checkOnAppend=false) const
generates csv database according to bn
double log2likelihood() const
returns log2Likelihood of generated samples
std::vector< std::vector< Idx > > database() const
generates database according to bn into a std::vector
The class for storing a record in a database.
std::vector< std::vector< Idx > > __database
generated database
The class representing a tabular database as used by learning tasks.
void setVarOrder(const std::vector< Idx > &varOrder)
change columns order
std::size_t insertTranslator(const DBTranslator< ALLOC > &translator, const std::size_t input_column, const bool unique_column=true)
insert a new translator into the database table
Class for assigning/browsing values to tuples of discrete variables.
void setRandomVarOrder()
set columns in random order
void setAntiTopologicalVarOrder()
set columns in antiTopoligical order
Size __nbVars
number of variables
std::vector< Idx > varOrder() const
returns variable order indexes
Class used to compute response times for benchmark purposesThis class represents a classic timer...
Size Idx
Type for indexes.
Signaler1< const std::string &> onStop
with a possible explanation for stopping
bool __drawnSamples
whether drawSamples has been already called.
std::vector< Idx > __varOrder
variable order in generated database
void setVarOrderFromCSV(const std::string &csvFileURL, const std::string &csvSeparator=",")
change columns order according to a csv file
std::size_t Size
In aGrUM, hashed values are unsigned long int.
Size NodeId
Type for node ids.
BNDatabaseGenerator(const BayesNet< GUM_SCALAR > &bn)
default constructor
#define GUM_ERROR(type, msg)
bool end() const
Returns true if the Instantiation reached the end.