55 const std::size_t nb_vars = var_names.size();
58 for (std::size_t i = 0; i < nb_vars; ++i) {
69 const std::string& filename,
70 const std::vector< std::string >& missing_symbols) :
75 const std::string& CSV_filename,
77 const std::vector< std::string >& missing_symbols) {
82 std::size_t apriori_nb_vars = apriori_names.size();
84 for (std::size_t i = std::size_t(0); i < apriori_nb_vars; ++i)
85 apriori_names2col.
insert(apriori_names[i], i);
91 "the a apriori database has fewer variables " 92 "than the observed database");
97 const std::vector< std::string >& score_names =
99 const std::size_t score_nb_vars = score_names.size();
101 for (std::size_t i = std::size_t(0); i < score_nb_vars; ++i) {
103 mapping.
insert(i, apriori_names2col[score_names[i]]);
108 <<
" of the observed database does not belong to the " 109 <<
"apriori database");
114 for (std::size_t i = std::size_t(0); i < score_nb_vars; ++i) {
192 const std::string& filename,
193 const std::vector< std::string >& missing_symbols) :
360 std::move(from.__local_search_with_tabu_list);
362 __ranges = std::move(from.__ranges);
374 Size filename_size =
Size(filename.size());
376 if (filename_size < 4) {
378 "genericBNLearner could not determine the " 379 "file type of the database");
382 std::string extension = filename.substr(filename.size() - 4);
384 extension.begin(), extension.end(), extension.begin(), ::tolower);
386 if (extension !=
".csv") {
388 "genericBNLearner does not support yet this type " 395 const std::size_t nb_vars = var_names.size();
399 for (std::size_t i = 0; i < nb_vars; ++i) {
413 Size filename_size =
Size(filename.size());
415 if (filename_size < 4) {
417 "genericBNLearner could not determine the " 418 "file type of the database");
421 std::string extension = filename.substr(filename.size() - 4);
423 extension.begin(), extension.end(), extension.begin(), ::tolower);
425 if (extension !=
".csv") {
428 "genericBNLearner does not support yet this type of database file");
434 const std::string& filename,
435 const std::vector< std::string >& missing_symbols) {
442 const std::size_t nb_vars = var_names.size();
446 for (std::size_t i = 0; i < nb_vars; ++i) {
499 "The BNLearner does not support yet this apriori");
506 if (old_apriori !=
nullptr)
delete old_apriori;
559 "genericBNLearner does not support yet this score");
563 if (old_score !=
nullptr)
delete old_score;
568 bool take_into_account_score) {
574 if (take_into_account_score && (
__score !=
nullptr)) {
594 "genericBNLearner does not support " 595 <<
"yet this parameter estimator");
601 return param_estimator;
610 for (
Size j = 0; j < i; ++j) {
618 for (
const auto& arc : mandatory_arcs) {
619 initial_marks.
insert({arc.tail(), arc.head()},
'>');
623 for (
const auto& arc : forbidden_arcs) {
624 initial_marks.
insert({arc.tail(), arc.head()},
'-');
642 "For the moment, the BNLearner is unable to learn " 643 <<
"structures with missing values in databases");
684 "The BNLearner's corrected mutual information class does " 696 "For the moment, the BNLearner is unable to cope " 697 "with missing values in databases");
705 for (
const auto& arc : mandatory_arcs) {
710 init_graph.
addArc(arc.tail(), arc.head());
715 for (
const auto& arc : forbidden_arcs) {
737 static_cast< StructuralConstraintMandatoryArcs&
>(gen_constraint) =
739 static_cast< StructuralConstraintForbiddenArcs&
>(gen_constraint) =
741 static_cast< StructuralConstraintPossibleEdges&
>(gen_constraint) =
752 static_cast< StructuralConstraintIndegree&
>(sel_constraint) =
757 selector(*
__score, sel_constraint, op_set);
770 static_cast< StructuralConstraintMandatoryArcs&
>(gen_constraint) =
772 static_cast< StructuralConstraintForbiddenArcs&
>(gen_constraint) =
774 static_cast< StructuralConstraintPossibleEdges&
>(gen_constraint) =
786 static_cast< StructuralConstraintTabuList&
>(sel_constraint) =
788 static_cast< StructuralConstraintIndegree&
>(sel_constraint) =
793 selector(*
__score, sel_constraint, op_set);
806 static_cast< StructuralConstraintMandatoryArcs&
>(gen_constraint) =
808 static_cast< StructuralConstraintForbiddenArcs&
>(gen_constraint) =
818 const ArcSet& mandatory_arcs =
819 static_cast< StructuralConstraintMandatoryArcs&
>(gen_constraint)
822 bool order_compatible =
true;
824 for (
const auto& arc : mandatory_arcs) {
825 if (order.
pos(arc.tail()) >= order.
pos(arc.head())) {
826 order_compatible =
false;
831 if (order_compatible) {
835 static_cast< StructuralConstraintIndegree&
>(sel_constraint) =
840 selector(*
__score, sel_constraint, op_set);
847 static_cast< StructuralConstraintIndegree&
>(sel_constraint) =
852 selector(*
__score, sel_constraint, op_set);
861 "the learnDAG method has not been implemented for this " 862 "learning algorithm");
889 default:
return "genericBNLearner does not support yet this score";
895 std::pair< std::size_t, std::size_t >
897 const std::size_t k_fold) {
902 if (learning_fold >= k_fold) {
904 "In " << k_fold <<
"-fold cross validation, the learning " 905 <<
"fold should be strictly lower than " << k_fold
906 <<
" but, here, it is equal to " << learning_fold);
910 if (k_fold >= db_size) {
912 "In " << k_fold <<
"-fold cross validation, the database's " 913 <<
"size should be strictly greater than " << k_fold
914 <<
" but, here, the database has only " << db_size
919 const std::size_t foldSize = db_size / k_fold;
920 const std::size_t unfold_deb = learning_fold * foldSize;
921 const std::size_t unfold_end = unfold_deb + foldSize;
924 if (learning_fold == std::size_t(0)) {
926 std::pair< std::size_t, std::size_t >(unfold_end, db_size));
929 std::pair< std::size_t, std::size_t >(std::size_t(0), unfold_deb));
931 if (learning_fold != k_fold - 1) {
933 std::pair< std::size_t, std::size_t >(unfold_end, db_size));
937 return std::pair< std::size_t, std::size_t >(unfold_deb, unfold_end);
942 const NodeId id1,
const NodeId id2,
const std::vector< NodeId >& knowing) {
947 return chi2score.
statistics(id1, id2, knowing);
950 std::pair< double, double >
952 const std::string& name2,
953 const std::vector< std::string >& knowing) {
954 std::vector< NodeId > knowingIds;
958 std::back_inserter(knowingIds),
964 const NodeId id1,
const NodeId id2,
const std::vector< NodeId >& knowing) {
971 std::pair< double, double >
973 const std::string& name2,
974 const std::vector< std::string >& knowing) {
975 std::vector< NodeId > knowingIds;
979 std::back_inserter(knowingIds),
985 const std::vector< NodeId >& knowing) {
990 std::vector< NodeId > total(vars);
991 total.insert(total.end(), knowing.begin(), knowing.end());
992 double LLtotal = ll2score.
score(
IdSet<>(total,
false,
true));
993 if (knowing.size() == (
Size)0) {
996 double LLknw = ll2score.score(
IdSet<>(knowing,
false,
true));
997 return LLtotal - LLknw;
1003 const std::vector< std::string >& knowing) {
1004 std::vector< NodeId > ids;
1005 std::vector< NodeId > knowingIds;
1007 auto mapper = [
this](
const std::string& c) ->
NodeId {
1011 std::transform(vars.begin(), vars.end(), std::back_inserter(ids), mapper);
1013 knowing.begin(), knowing.end(), std::back_inserter(knowingIds), mapper);
AlgoType __selected_algo
the selected learning algorithm
the class for structural constraints limiting the number of parents of nodes in a directed graph ...
void insert(const T1 &first, const T2 &second)
Inserts a new association in the gum::Bijection.
const std::vector< std::string, ALLOC< std::string > > & variableNames()
returns the names of the variables in the input dataset
Copyright 2005-2019 Pierre-Henri WUILLEMIN et Christophe GONZALES (LIP6) {prenom.nom}_at_lip6.fr.
the class for computing BDeu scores
ApproximationScheme & approximationScheme()
returns the approximation policy of the learning algorithm
Score * __score
the score used
AprioriNoApriori * __no_apriori
double score(const IdSet< ALLOC > &idset)
returns the score for a given IdSet
Base class for every random variable.
virtual void addNodeWithId(const NodeId id)
try to insert a node with the given id
Database __score_database
the database to be used by the scores and parameter estimators
Idx pos(const Key &key) const
Returns the position of the object passed in argument (if it exists).
virtual void setWeight(const double weight)
sets the weight of the a priori (kind of effective sample size)
the structural constraint for forbidding the creation of some arcs during structure learning ...
CorrectedMutualInformation ::KModeTypes __3off2_kmode
the penalty used in 3off2
const std::string & __getAprioriType() const
returns the type (as a string) of a given apriori
double __EMepsilon
epsilon for EM. if espilon=0.0 : no EM
std::pair< std::size_t, std::size_t > useCrossValidationFold(const std::size_t learning_fold, const std::size_t k_fold)
sets the ranges of rows to be used for cross-validation learning
const ArcSet & arcs() const
returns the set of mandatory arcs
static void __checkFileName(const std::string &filename)
checks whether the extension of a CSV filename is correct
The base class for all the scores used for learning (BIC, BDeu, etc)
Copyright 2005-2019 Pierre-Henri WUILLEMIN et Christophe GONZALES (LIP6) {prenom.nom}_at_lip6.fr.
const std::vector< std::string > & missingSymbols() const
returns the set of missing symbols taken into account
void setRanges(const std::vector< std::pair< std::size_t, std::size_t >, XALLOC< std::pair< std::size_t, std::size_t > > > &new_ranges)
sets new ranges to perform the countings used by the parameter estimator
DBVector< std::size_t > domainSizes() const
returns the domain sizes of all the variables in the database table
Copyright 2005-2019 Pierre-Henri WUILLEMIN et Christophe GONZALES (LIP6) {prenom.nom}_at_lip6.fr.
MixedGraph learnMixedStructure(CorrectedMutualInformation<> &I, MixedGraph graph)
learns the structure of an Essential Graph
void __createScore()
create the score used for learning
the structural constraint for forbidding the creation of some arcs except those defined in the class ...
The class used to pack sets of generators.
the class for computing Bayesian Dirichlet (BD) log2 scores
StructuralConstraintSliceOrder __constraint_SliceOrder
the constraint for 2TBNs
Database & operator=(const Database &from)
copy operator
the structural constraint indicating that some arcs shall never be removed or reversed ...
virtual void eraseArc(const Arc &arc)
removes an arc from the ArcGraphPart
Miic __miic_3off2
the 3off2 algorithm
the class for computing Chi2 independence test scores
virtual void addEdge(const NodeId first, const NodeId second)
insert a new edge into the undirected graph
ParamEstimatorType __param_estimator_type
the type of the parameter estimator
void addConstraints(HashTable< std::pair< NodeId, NodeId >, char > constraints)
Set a ensemble of constraints for the orientation phase.
Copyright 2005-2019 Pierre-Henri WUILLEMIN et Christophe GONZALES (LIP6) {prenom.nom}_at_lip6.fr.
A class for storing a pair of sets of NodeIds, the second one corresponding to a conditional set...
A class that redirects gum_signal from algorithms to the listeners of BNLearn.
the base class for all a priori
virtual std::string isAprioriCompatible() const final
indicates whether the apriori is compatible (meaningful) with the score
DatabaseTable __database
the database itself
the class for computing K2 scores (actually their log2 value)
MixedGraph __prepare_miic_3off2()
prepares the initial graph for 3off2 or miic
std::pair< double, double > chi2(const NodeId id1, const NodeId id2, const std::vector< NodeId > &knowing={})
Return the <statistic,pvalue> pair for chi2 test in the database.
bool exists(const NodeId id) const
alias for existsNode
Copyright 2005-2019 Pierre-Henri WUILLEMIN et Christophe GONZALES (LIP6) {prenom.nom}_at_lip6.fr.
double logLikelihood(const std::vector< NodeId > &vars, const std::vector< NodeId > &knowing={})
Return the loglikelihood of vars in the base, conditioned by knowing for the BNLearner.
std::pair< double, double > G2(const NodeId id1, const NodeId id2, const std::vector< NodeId > &knowing={})
Return the <statistic,pvalue> pair for for G2 test in the database.
AprioriType __apriori_type
the a priori selected for the score and parameters
NodeId idFromName(const std::string &var_name) const
returns the node id corresponding to a variable name
the internal apriori for the BDeu score (N' / (r_i * q_i)BDeu is a BD score with a N'/(r_i * q_i) apr...
The class for generic Hash Tables.
the class for computing Log2-likelihood scores
std::pair< double, double > statistics(NodeId var1, NodeId var2, const std::vector< NodeId, ALLOC< NodeId > > &rhs_ids={})
get the pair <chi2 statistic,pvalue> for a test var1 indep var2 given rhs_ids
CorrectedMutualInformation * __mutual_info
the selected correction for 3off2 and miic
A dirichlet priori: computes its N'_ijk from a database.
the class for computing G2 independence test scores
DAG __initial_dag
an initial DAG given to learners
const Sequence< NodeId > & order() const noexcept
returns the current order
The mecanism to compute the next available graph changes for directed structure learning search algor...
StructuralConstraintMandatoryArcs __constraint_MandatoryArcs
the constraint on forbidden arcs
std::size_t nbVariables() const noexcept
returns the number of variables (columns) of the database
genericBNLearner(const std::string &filename, const std::vector< std::string > &missing_symbols)
default constructor
DAG learnStructure(GRAPH_CHANGES_SELECTOR &selector, DAG initial_dag=DAG())
learns the structure of a Bayes net
Database * __apriori_database
the database used by the Dirichlet a priori
LocalSearchWithTabuList __local_search_with_tabu_list
the local search with tabu list algorithm
DatabaseTable readFile(const std::string &filename)
const ApproximationScheme * __current_algorithm
std::pair< double, double > statistics(NodeId var1, NodeId var2, const std::vector< NodeId, ALLOC< NodeId > > &rhs_ids={})
get the pair <G2statistic,pvalue> for a test var1 indep var2 given rhs_ids
bool hasMissingValues() const
indicates whether the database contains some missing values
the "meta-programming" class for storing structural constraintsIn aGrUM, there are two ways to store ...
ParamEstimator * __createParamEstimator(DBRowGeneratorParser<> &parser, bool take_into_account_score=true)
create the parameter estimator used for learning
void __createCorrectedMutualInformation()
create the Corrected Mutual Information instance for Miic/3off2
const ArcSet & arcs() const
returns the set of mandatory arcs
Apriori * __apriori
the apriori used
StructuralConstraintTabuList __constraint_TabuList
the constraint for tabu lists
std::string __apriori_dbname
the filename for the Dirichlet a priori, if any
void fillDatabase(DATABASE< ALLOC > &database, const bool retry_insertion=false)
fills the rows of the database table
StructuralConstraintPossibleEdges __constraint_PossibleEdges
the constraint on possible Edges
std::size_t insertTranslator(const Translator< ALLOC > &translator, const std::size_t column, const bool unique_column=true)
inserts a new translator at the end of the translator set
DAG __learnDAG()
returns the DAG learnt
GreedyHillClimbing __greedy_hill_climbing
the greedy hill climbing algorithm
std::size_t nbRows() const noexcept
returns the number of records (rows) in the database
Base class for all aGrUM's exceptions.
genericBNLearner & operator=(const genericBNLearner &)
copy operator
const DatabaseTable & databaseTable() const
returns the internal database table
The basic class for computing the next graph changes possible in a structure learning algorithm...
std::string checkScoreAprioriCompatibility()
checks whether the current score and apriori are compatible
the class for computing AIC scores
the class for computing BIC scores
virtual const Apriori< ALLOC > & internalApriori() const =0
returns the internal apriori of the score
virtual void addArc(const NodeId tail, const NodeId head)
insert a new arc into the directed graph
DAG learnDAG()
learn a structure from a file (must have read the db before)
virtual std::string isAprioriCompatible() const final
indicates whether the apriori is compatible (meaningful) with the score
std::vector< std::pair< std::size_t, std::size_t > > __ranges
the set of rows' ranges within the database in which learning is done
virtual std::string isAprioriCompatible() const final
indicates whether the apriori is compatible (meaningful) with the score
const Variable & variable(const std::size_t k, const bool k_is_input_col=false) const
returns either the kth variable of the database table or the first one corresponding to the kth colum...
Copyright 2005-2019 Pierre-Henri WUILLEMIN et Christophe GONZALES (LIP6) {prenom.nom}_at_lip6.fr.
const Bijection< NodeId, std::size_t > & nodeId2Columns() const
returns the mapping between node ids and their columns in the database
The class representing a tabular database as used by learning tasks.
MixedGraph learnMixedStructure()
learn a partial structure from a file (must have read the db before and must have selected miic or 3o...
StructuralConstraintIndegree __constraint_Indegree
the constraint for indegrees
ScoreType __score_type
the score selected for learning
const std::vector< std::pair< std::size_t, std::size_t > > & databaseRanges() const
returns the current database rows' ranges used for learning
std::size_t insertTranslator(const DBTranslator< ALLOC > &translator, const std::size_t input_column, const bool unique_column=true)
insert a new translator into the database table
virtual ~genericBNLearner()
destructor
DBRowGeneratorParser * __parser
the parser used for reading the database
virtual std::string isAprioriCompatible() const final
indicates whether the apriori is compatible (meaningful) with the score
A pack of learning algorithms that can easily be used.
DAG learnStructure(GRAPH_CHANGES_SELECTOR &selector, DAG initial_dag=DAG())
learns the structure of a Bayes net
static DatabaseTable __readFile(const std::string &filename, const std::vector< std::string > &missing_symbols)
reads a file and returns a databaseVectInRam
DBRowGeneratorParser & parser()
returns the parser for the database
virtual void setVariableNames(const std::vector< std::string, ALLOC< std::string > > &names, const bool from_external_object=true) final
sets the names of the variables
double __apriori_weight
the weight of the apriori
virtual std::string isAprioriCompatible() const final
indicates whether the apriori is compatible (meaningful) with the score
void __createApriori()
create the apriori used for learning
The class imposing a N-sized tabu list as a structural constraints for learning algorithms.
The class for initializing DatabaseTable and RawDatabaseTable instances from CSV files.
the smooth a priori: adds a weight w to all the countings
Copyright 2005-2019 Pierre-Henri WUILLEMIN et Christophe GONZALES (LIP6) {prenom.nom}_at_lip6.fr.
std::vector< std::size_t > __domain_sizes
the domain sizes of the variables (useful to speed-up computations)
DAG learnStructure(GRAPH_CHANGES_SELECTOR &selector, DAG initial_dag=DAG())
learns the structure of a Bayes net
Bijection< NodeId, std::size_t > __nodeId2cols
a bijection assigning to each variable name its NodeId
Database(const std::string &file, const std::vector< std::string > &missing_symbols)
default constructor
the class for packing together the translators used to preprocess the datasets
The databases' cell translators for labelized variables.
std::size_t Size
In aGrUM, hashed values are unsigned long int.
const DatabaseTable & database() const
returns the database used by the BNLearner
StructuralConstraintForbiddenArcs __constraint_ForbiddenArcs
the constraint on forbidden arcs
virtual std::string isAprioriCompatible() const final
indicates whether the apriori is compatible (meaningful) with the score
a helper to easily read databases
value_type & insert(const Key &key, const Val &val)
Adds a new element (actually a copy of this element) into the hash table.
The base class for estimating parameters of CPTs.
The class for estimating parameters of CPTs using Maximum Likelihood.
const DBVector< std::string > & variableNames() const noexcept
returns the variable names for all the columns of the database
the class used to read a row in the database and to transform it into a set of DBRow instances that c...
The basic class for computing the next graph changes possible in a structure learning algorithm...
The base class for structural constraints used by learning algorithms that learn a directed graph str...
Size NodeId
Type for node ids.
void reorder(const std::size_t k, const bool k_is_input_col=false)
performs a reordering of the kth translator or of the first translator parsing the kth column of the ...
the no a priori class: corresponds to 0 weight-sample
iterator handler() const
returns a new unsafe handler pointing to the 1st record of the database
DAG learnStructure(CorrectedMutualInformation<> &I, MixedGraph graph)
learns the structure of an Bayesian network, ie a DAG, by first learning an Essential graph and then ...
#define GUM_ERROR(type, msg)
The base class for structural constraints imposed by DAGs.
Base class for mixed graphs.
the structural constraint imposing a partial order over nodes