51 const std::size_t nb_vars = var_names.size();
54 for (std::size_t i = 0; i < nb_vars; ++i) {
65 const std::string& filename,
66 const std::vector< std::string >& missing_symbols) :
71 const std::string& CSV_filename,
73 const std::vector< std::string >& missing_symbols) {
78 std::size_t apriori_nb_vars = apriori_names.size();
80 for (std::size_t i = std::size_t(0); i < apriori_nb_vars; ++i)
81 apriori_names2col.
insert(apriori_names[i], i);
87 "the a apriori database has fewer variables " 88 "than the observed database");
93 const std::vector< std::string >& score_names =
95 const std::size_t score_nb_vars = score_names.size();
97 for (std::size_t i = std::size_t(0); i < score_nb_vars; ++i) {
99 mapping.
insert(i, apriori_names2col[score_names[i]]);
104 <<
" of the observed database does not belong to the " 105 <<
"apriori database");
110 for (std::size_t i = std::size_t(0); i < score_nb_vars; ++i) {
188 const std::string& filename,
189 const std::vector< std::string >& missing_symbols) :
356 std::move(from.__local_search_with_tabu_list);
358 __ranges = std::move(from.__ranges);
370 Size filename_size =
Size(filename.size());
372 if (filename_size < 4) {
374 "genericBNLearner could not determine the " 375 "file type of the database");
378 std::string extension = filename.substr(filename.size() - 4);
380 extension.begin(), extension.end(), extension.begin(), ::tolower);
382 if (extension !=
".csv") {
384 "genericBNLearner does not support yet this type " 391 const std::size_t nb_vars = var_names.size();
395 for (std::size_t i = 0; i < nb_vars; ++i) {
409 Size filename_size =
Size(filename.size());
411 if (filename_size < 4) {
413 "genericBNLearner could not determine the " 414 "file type of the database");
417 std::string extension = filename.substr(filename.size() - 4);
419 extension.begin(), extension.end(), extension.begin(), ::tolower);
421 if (extension !=
".csv") {
424 "genericBNLearner does not support yet this type of database file");
430 const std::string& filename,
431 const std::vector< std::string >& missing_symbols) {
438 const std::size_t nb_vars = var_names.size();
442 for (std::size_t i = 0; i < nb_vars; ++i) {
495 "The BNLearner does not support yet this apriori");
502 if (old_apriori !=
nullptr)
delete old_apriori;
555 "genericBNLearner does not support yet this score");
559 if (old_score !=
nullptr)
delete old_score;
564 bool take_into_account_score) {
570 if (take_into_account_score && (
__score !=
nullptr)) {
590 "genericBNLearner does not support " 591 <<
"yet this parameter estimator");
597 return param_estimator;
606 for (
Size j = 0; j < i; ++j) {
614 for (
const auto& arc : mandatory_arcs) {
615 initial_marks.
insert({arc.tail(), arc.head()},
'>');
619 for (
const auto& arc : forbidden_arcs) {
620 initial_marks.
insert({arc.tail(), arc.head()},
'-');
638 "For the moment, the BNLearner is unable to learn " 639 <<
"structures with missing values in databases");
680 "The BNLearner's corrected mutual information class does " 692 "For the moment, the BNLearner is unable to cope " 693 "with missing values in databases");
701 for (
const auto& arc : mandatory_arcs) {
706 init_graph.
addArc(arc.tail(), arc.head());
711 for (
const auto& arc : forbidden_arcs) {
732 static_cast< StructuralConstraintMandatoryArcs&
>(gen_constraint) =
734 static_cast< StructuralConstraintForbiddenArcs&
>(gen_constraint) =
745 static_cast< StructuralConstraintIndegree&
>(sel_constraint) =
750 selector(*
__score, sel_constraint, op_set);
762 static_cast< StructuralConstraintMandatoryArcs&
>(gen_constraint) =
764 static_cast< StructuralConstraintForbiddenArcs&
>(gen_constraint) =
776 static_cast< StructuralConstraintTabuList&
>(sel_constraint) =
778 static_cast< StructuralConstraintIndegree&
>(sel_constraint) =
783 selector(*
__score, sel_constraint, op_set);
795 static_cast< StructuralConstraintMandatoryArcs&
>(gen_constraint) =
805 const ArcSet& mandatory_arcs =
806 static_cast< StructuralConstraintMandatoryArcs&
>(gen_constraint)
809 bool order_compatible =
true;
811 for (
const auto& arc : mandatory_arcs) {
812 if (order.
pos(arc.tail()) >= order.
pos(arc.head())) {
813 order_compatible =
false;
818 if (order_compatible) {
822 static_cast< StructuralConstraintIndegree&
>(sel_constraint) =
827 selector(*
__score, sel_constraint, op_set);
834 static_cast< StructuralConstraintIndegree&
>(sel_constraint) =
839 selector(*
__score, sel_constraint, op_set);
848 "the learnDAG method has not been implemented for this " 849 "learning algorithm");
876 default:
return "genericBNLearner does not support yet this score";
882 std::pair< std::size_t, std::size_t >
884 const std::size_t k_fold) {
889 if (learning_fold >= k_fold) {
891 "In " << k_fold <<
"-fold cross validation, the learning " 892 <<
"fold should be strictly lower than " << k_fold
893 <<
" but, here, it is equal to " << learning_fold);
897 if (k_fold >= db_size) {
899 "In " << k_fold <<
"-fold cross validation, the database's " 900 <<
"size should be strictly greater than " << k_fold
901 <<
" but, here, the database has only " << db_size
906 const std::size_t foldSize = db_size / k_fold;
907 const std::size_t unfold_deb = learning_fold * foldSize;
908 const std::size_t unfold_end = unfold_deb + foldSize;
911 if (learning_fold == std::size_t(0)) {
913 std::pair< std::size_t, std::size_t >(unfold_end, db_size));
916 std::pair< std::size_t, std::size_t >(std::size_t(0), unfold_deb));
918 if (learning_fold != k_fold - 1) {
920 std::pair< std::size_t, std::size_t >(unfold_end, db_size));
924 return std::pair< std::size_t, std::size_t >(unfold_deb, unfold_end);
929 const NodeId id1,
const NodeId id2,
const std::vector< NodeId >& knowing) {
936 return chi2score.
statistics(id1, id2, knowing);
939 std::pair< double, double >
941 const std::string& name2,
942 const std::vector< std::string >& knowing) {
943 std::vector< NodeId > knowingIds;
947 std::back_inserter(knowingIds),
953 const std::vector< NodeId >& knowing) {
960 std::vector< NodeId > total(vars);
961 total.insert(total.end(), knowing.begin(), knowing.end());
962 double LLtotal = ll2score.
score(
IdSet<>(total,
false,
true));
963 if (knowing.size() == (
Size)0) {
966 double LLknw = ll2score.
score(
IdSet<>(knowing,
false,
true));
967 return LLtotal - LLknw;
973 const std::vector< std::string >& knowing) {
974 std::vector< NodeId > ids;
975 std::vector< NodeId > knowingIds;
977 auto mapper = [
this](
const std::string& c) ->
NodeId {
981 std::transform(vars.begin(), vars.end(), std::back_inserter(ids), mapper);
983 knowing.begin(), knowing.end(), std::back_inserter(knowingIds), mapper);
AlgoType __selected_algo
the selected learning algorithm
the class for structural constraints limiting the number of parents of nodes in a directed graph ...
void insert(const T1 &first, const T2 &second)
Inserts a new association in the gum::Bijection.
const std::vector< std::string, ALLOC< std::string > > & variableNames()
returns the names of the variables in the input dataset
the class for computing BDeu scores
ApproximationScheme & approximationScheme()
returns the approximation policy of the learning algorithm
Score * __score
the score used
AprioriNoApriori * __no_apriori
double score(const IdSet< ALLOC > &idset)
returns the score for a given IdSet
Base class for every random variable.
virtual void addNodeWithId(const NodeId id)
try to insert a node with the given id
Database __score_database
the database to be used by the scores and parameter estimators
Idx pos(const Key &key) const
Returns the position of the object passed in argument (if it exists).
virtual void setWeight(const double weight)
sets the weight of the a priori (kind of effective sample size)
the structural constraint for forbidding the creation of some arcs during structure learning ...
CorrectedMutualInformation ::KModeTypes __3off2_kmode
the penalty used in 3off2
const std::string & __getAprioriType() const
returns the type (as a string) of a given apriori
double __EMepsilon
epsilon for EM. if espilon=0.0 : no EM
std::pair< std::size_t, std::size_t > useCrossValidationFold(const std::size_t learning_fold, const std::size_t k_fold)
sets the ranges of rows to be used for cross-validation learning
const ArcSet & arcs() const
returns the set of mandatory arcs
static void __checkFileName(const std::string &filename)
checks whether the extension of a CSV filename is correct
The base class for all the scores used for learning (BIC, BDeu, etc)
A class for generic framework of learning algorithms that can easily be used.
const std::vector< std::string > & missingSymbols() const
returns the set of missing symbols taken into account
void setRanges(const std::vector< std::pair< std::size_t, std::size_t >, XALLOC< std::pair< std::size_t, std::size_t > > > &new_ranges)
sets new ranges to perform the countings used by the parameter estimator
DBVector< std::size_t > domainSizes() const
returns the domain sizes of all the variables in the database table
the class for computing Log2-likelihood scores
MixedGraph learnMixedStructure(CorrectedMutualInformation<> &I, MixedGraph graph)
learns the structure of an Essential Graph
void __createScore()
create the score used for learning
The class used to pack sets of generators.
the class for computing Bayesian Dirichlet (BD) log2 scores
StructuralConstraintSliceOrder __constraint_SliceOrder
the constraint for 2TBNs
Database & operator=(const Database &from)
copy operator
the structural constraint indicating that some arcs shall never be removed or reversed ...
virtual void eraseArc(const Arc &arc)
removes an arc from the ArcGraphPart
Miic __miic_3off2
the 3off2 algorithm
the class for computing Chi2 independence test scores
virtual void addEdge(const NodeId first, const NodeId second)
insert a new edge into the undirected graph
ParamEstimatorType __param_estimator_type
the type of the parameter estimator
void addConstraints(HashTable< std::pair< NodeId, NodeId >, char > constraints)
Set a ensemble of constraints for the orientation phase.
the class for computing Chi2 scores
A class for storing a pair of sets of NodeIds, the second one corresponding to a conditional set...
A class that redirects gum_signal from algorithms to the listeners of BNLearn.
the base class for all a priori
virtual std::string isAprioriCompatible() const final
indicates whether the apriori is compatible (meaningful) with the score
DatabaseTable __database
the database itself
the class for computing K2 scores (actually their log2 value)
MixedGraph __prepare_miic_3off2()
prepares the initial graph for 3off2 or miic
std::pair< double, double > chi2(const NodeId id1, const NodeId id2, const std::vector< NodeId > &knowing={})
Return the <statistic,pvalue> pair for the BNLearner.
bool exists(const NodeId id) const
alias for existsNode
gum is the global namespace for all aGrUM entities
double logLikelihood(const std::vector< NodeId > &vars, const std::vector< NodeId > &knowing={})
Return the loglikelihood of vars in the base, conditioned by knowing for the BNLearner.
AprioriType __apriori_type
the a priori selected for the score and parameters
NodeId idFromName(const std::string &var_name) const
returns the node id corresponding to a variable name
the internal apriori for the BDeu score (N' / (r_i * q_i)BDeu is a BD score with a N'/(r_i * q_i) apr...
The class for generic Hash Tables.
the class for computing Log2-likelihood scores
std::pair< double, double > statistics(NodeId var1, NodeId var2, const std::vector< NodeId, ALLOC< NodeId > > &rhs_ids={})
get the pair <chi2 statistic,pvalue> for a test var1 indep var2 given rhs_ids
CorrectedMutualInformation * __mutual_info
the selected correction for 3off2 and miic
A dirichlet priori: computes its N'_ijk from a database.
DAG __initial_dag
an initial DAG given to learners
const Sequence< NodeId > & order() const noexcept
returns the current order
The mecanism to compute the next available graph changes for directed structure learning search algor...
StructuralConstraintMandatoryArcs __constraint_MandatoryArcs
the constraint on forbidden arcs
std::size_t nbVariables() const noexcept
returns the number of variables (columns) of the database
genericBNLearner(const std::string &filename, const std::vector< std::string > &missing_symbols)
default constructor
DAG learnStructure(GRAPH_CHANGES_SELECTOR &selector, DAG initial_dag=DAG())
learns the structure of a Bayes net
Database * __apriori_database
the database used by the Dirichlet a priori
LocalSearchWithTabuList __local_search_with_tabu_list
the local search with tabu list algorithm
DatabaseTable readFile(const std::string &filename)
const ApproximationScheme * __current_algorithm
bool hasMissingValues() const
indicates whether the database contains some missing values
the "meta-programming" class for storing structural constraintsIn aGrUM, there are two ways to store ...
ParamEstimator * __createParamEstimator(DBRowGeneratorParser<> &parser, bool take_into_account_score=true)
create the parameter estimator used for learning
void __createCorrectedMutualInformation()
create the Corrected Mutual Information instance for Miic/3off2
const ArcSet & arcs() const
returns the set of mandatory arcs
Apriori * __apriori
the apriori used
StructuralConstraintTabuList __constraint_TabuList
the constraint for tabu lists
std::string __apriori_dbname
the filename for the Dirichlet a priori, if any
void fillDatabase(DATABASE< ALLOC > &database, const bool retry_insertion=false)
fills the rows of the database table
std::size_t insertTranslator(const Translator< ALLOC > &translator, const std::size_t column, const bool unique_column=true)
inserts a new translator at the end of the translator set
DAG __learnDAG()
returns the DAG learnt
GreedyHillClimbing __greedy_hill_climbing
the greedy hill climbing algorithm
std::size_t nbRows() const noexcept
returns the number of records (rows) in the database
Base class for all aGrUM's exceptions.
genericBNLearner & operator=(const genericBNLearner &)
copy operator
const DatabaseTable & databaseTable() const
returns the internal database table
The basic class for computing the next graph changes possible in a structure learning algorithm...
std::string checkScoreAprioriCompatibility()
checks whether the current score and apriori are compatible
the class for computing AIC scores
the class for computing BIC scores
virtual const Apriori< ALLOC > & internalApriori() const =0
returns the internal apriori of the score
virtual void addArc(const NodeId tail, const NodeId head)
insert a new arc into the directed graph
DAG learnDAG()
learn a structure from a file (must have read the db before)
virtual std::string isAprioriCompatible() const final
indicates whether the apriori is compatible (meaningful) with the score
std::vector< std::pair< std::size_t, std::size_t > > __ranges
the set of rows' ranges within the database in which learning is done
virtual std::string isAprioriCompatible() const final
indicates whether the apriori is compatible (meaningful) with the score
const Variable & variable(const std::size_t k, const bool k_is_input_col=false) const
returns either the kth variable of the database table or the first one corresponding to the kth colum...
A listener that allows BNLearner to be used as a proxy for its inner algorithms.
const Bijection< NodeId, std::size_t > & nodeId2Columns() const
returns the mapping between node ids and their columns in the database
The class representing a tabular database as used by learning tasks.
MixedGraph learnMixedStructure()
learn a partial structure from a file (must have read the db before and must have selected miic or 3o...
StructuralConstraintIndegree __constraint_Indegree
the constraint for indegrees
ScoreType __score_type
the score selected for learning
const std::vector< std::pair< std::size_t, std::size_t > > & databaseRanges() const
returns the current database rows' ranges used for learning
std::size_t insertTranslator(const DBTranslator< ALLOC > &translator, const std::size_t input_column, const bool unique_column=true)
insert a new translator into the database table
virtual ~genericBNLearner()
destructor
DBRowGeneratorParser * __parser
the parser used for reading the database
virtual std::string isAprioriCompatible() const final
indicates whether the apriori is compatible (meaningful) with the score
A pack of learning algorithms that can easily be used.
DAG learnStructure(GRAPH_CHANGES_SELECTOR &selector, DAG initial_dag=DAG())
learns the structure of a Bayes net
static DatabaseTable __readFile(const std::string &filename, const std::vector< std::string > &missing_symbols)
reads a file and returns a databaseVectInRam
DBRowGeneratorParser & parser()
returns the parser for the database
virtual void setVariableNames(const std::vector< std::string, ALLOC< std::string > > &names, const bool from_external_object=true) final
sets the names of the variables
double __apriori_weight
the weight of the apriori
virtual std::string isAprioriCompatible() const final
indicates whether the apriori is compatible (meaningful) with the score
void __createApriori()
create the apriori used for learning
The class imposing a N-sized tabu list as a structural constraints for learning algorithms.
The class for initializing DatabaseTable and RawDatabaseTable instances from CSV files.
the smooth a priori: adds a weight w to all the countings
A pack of learning algorithms that can easily be used.
std::vector< std::size_t > __domain_sizes
the domain sizes of the variables (useful to speed-up computations)
DAG learnStructure(GRAPH_CHANGES_SELECTOR &selector, DAG initial_dag=DAG())
learns the structure of a Bayes net
Bijection< NodeId, std::size_t > __nodeId2cols
a bijection assigning to each variable name its NodeId
Database(const std::string &file, const std::vector< std::string > &missing_symbols)
default constructor
the class for packing together the translators used to preprocess the datasets
The databases' cell translators for labelized variables.
std::size_t Size
In aGrUM, hashed values are unsigned long int.
const DatabaseTable & database() const
returns the database used by the BNLearner
StructuralConstraintForbiddenArcs __constraint_ForbiddenArcs
the constraint on forbidden arcs
virtual std::string isAprioriCompatible() const final
indicates whether the apriori is compatible (meaningful) with the score
a helper to easily read databases
value_type & insert(const Key &key, const Val &val)
Adds a new element (actually a copy of this element) into the hash table.
The base class for estimating parameters of CPTs.
The class for estimating parameters of CPTs using Maximum Likelihood.
const DBVector< std::string > & variableNames() const noexcept
returns the variable names for all the columns of the database
the class used to read a row in the database and to transform it into a set of DBRow instances that c...
The basic class for computing the next graph changes possible in a structure learning algorithm...
The base class for structural constraints used by learning algorithms that learn a directed graph str...
Size NodeId
Type for node ids.
void reorder(const std::size_t k, const bool k_is_input_col=false)
performs a reordering of the kth translator or of the first translator parsing the kth column of the ...
the no a priori class: corresponds to 0 weight-sample
iterator handler() const
returns a new unsafe handler pointing to the 1st record of the database
DAG learnStructure(CorrectedMutualInformation<> &I, MixedGraph graph)
learns the structure of an Bayesian network, ie a DAG, by first learning an Essential graph and then ...
#define GUM_ERROR(type, msg)
The base class for structural constraints imposed by DAGs.
Base class for mixed graphs.
the structural constraint imposing a partial order over nodes