30 #ifndef GUM_LEARNING_GENERIC_BN_LEARNER_H 31 #define GUM_LEARNING_GENERIC_BN_LEARNER_H 94 class BNLearnerListener;
117 DIRICHLET_FROM_DATABASE,
124 GREEDY_HILL_CLIMBING,
125 LOCAL_SEARCH_WITH_TABU_LIST,
142 explicit Database(
const std::string& file,
143 const std::vector< std::string >& missing_symbols);
161 Database(
const std::string& filename,
163 const std::vector< std::string >& missing_symbols);
172 template <
typename GUM_SCALAR >
173 Database(
const std::string& filename,
175 const std::vector< std::string >& missing_symbols);
210 const std::vector< std::size_t >&
domainSizes()
const;
213 const std::vector< std::string >&
names()
const;
232 const std::vector< std::string >& missingSymbols()
const;
249 #if defined(_OPENMP) && !defined(GUM_DEBUG_MODE) 256 Size __min_nb_rows_per_thread{100};
261 template <
typename GUM_SCALAR >
280 const std::vector< std::string >& missing_symbols);
302 template <
typename GUM_SCALAR >
305 const std::vector< std::string >& missing_symbols);
347 const std::vector< std::string >&
names()
const;
350 const std::vector< std::size_t >&
domainSizes()
const;
376 template <
template <
typename >
class XALLOC >
378 const std::vector< std::pair< std::size_t, std::size_t >,
379 XALLOC< std::pair< std::size_t, std::size_t > > >&
389 const std::vector< std::pair< std::size_t, std::size_t > >&
413 std::pair< std::size_t, std::size_t >
415 const std::size_t k_fold);
425 std::pair< double, double >
chi2(
const NodeId id1,
427 const std::vector< NodeId >& knowing = {});
435 std::pair< double, double >
436 chi2(
const std::string& name1,
437 const std::string& name2,
438 const std::vector< std::string >& knowing = {});
448 const std::vector< NodeId >& knowing = {});
458 const std::vector< std::string >& knowing = {});
557 void useK2(
const std::vector< NodeId >& order);
605 void setSliceOrder(
const std::vector< std::vector< std::string > >& slices);
614 void addForbiddenArc(
const std::string& tail,
const std::string& head);
631 void addMandatoryArc(
const std::string& tail,
const std::string& head);
711 std::vector< std::pair< std::size_t, std::size_t > >
__ranges;
728 const std::vector< std::string >& missing_symbols);
742 bool take_into_account_score =
true);
784 std::string message) {
798 __local_search_with_tabu_list.
setEpsilon(eps);
888 __local_search_with_tabu_list.
setMaxIter(max);
933 __local_search_with_tabu_list.
setMaxTime(timeout);
1044 #ifndef GUM_NO_INLINE AlgoType __selected_algo
the selected learning algorithm
the class for structural constraints limiting the number of parents of nodes in a directed graph ...
INLINE void setCurrentApproximationScheme(const ApproximationScheme *approximationScheme)
{@ /// distribute signals
A class that, given a structure and a parameter estimator returns a full Bayes net.
void setMaxIndegree(Size max_indegree)
sets the max indegree
void useScoreBIC()
indicate that we wish to use a BIC score
Class representing a Bayesian Network.
double minEpsilonRate() const
Returns the value of the minimal epsilon rate.
void enableMaxTime()
stopping criterion on timeout If the criterion was disabled it will be enabled
ApproximationScheme & approximationScheme()
returns the approximation policy of the learning algorithm
Score * __score
the score used
AprioriNoApriori * __no_apriori
A DBRowGenerator class that returns incomplete rows as EM would do.
bool hasMissingValues() const
returns true if the learner's database has missing values
Database __score_database
the database to be used by the scores and parameter estimators
the base class for structural constraints imposed by DAGs
ApproximationSchemeSTATE stateApproximationScheme() const
history
void setMaxTime(double timeout)
stopping criterion on timeout If the criterion was disabled it will be enabled
CorrectedMutualInformation ::KModeTypes __3off2_kmode
the penalty used in 3off2
the structural constraint for forbidding the creation of some arcs during structure learning ...
Signaler3< Size, double, double > onProgress
Progression, error and time.
void setVerbosity(bool v)
verbosity
void disableMinEpsilonRate()
Disable stopping criterion on epsilon rate.
AprioriType
an enumeration to select the apriori
The class representing a tabular database stored in RAM.
const std::string & __getAprioriType() const
returns the type (as a string) of a given apriori
double __EMepsilon
epsilon for EM. if espilon=0.0 : no EM
Header file of gum::Sequence, a class for storing (ordered) sequences of objects. ...
double maxTime() const
Returns the timeout (in seconds).
std::pair< std::size_t, std::size_t > useCrossValidationFold(const std::size_t learning_fold, const std::size_t k_fold)
sets the ranges of rows to be used for cross-validation learning
the class imposing a N-sized tabu list as a structural constraints for learning algorithms ...
static void __checkFileName(const std::string &filename)
checks whether the extension of a CSV filename is correct
gum::ApproximationSchemeListener header file.
The base class for all the scores used for learning (BIC, BDeu, etc)
void setDatabaseWeight(const double new_weight)
assign a weight to all the rows of the learning database so that the sum of their weights is equal to...
void clearDatabaseRanges()
reset the ranges to the one range corresponding to the whole database
bool isEnabledEpsilon() const
#define GUM_EMIT1(signal, arg1)
the classes to account for structure changes in a graph
the class for computing Log2-likelihood scores
void setMinEpsilonRate(double rate)
Given that we approximate f(t), stopping criterion on d/dt(|f(t+1)-f(t)|) If the criterion was disabl...
the class for computing BIC scores
ScoreType
an enumeration enabling to select easily the score we wish to use
the no a priori class: corresponds to 0 weight-sample
void enableMinEpsilonRate()
Enable stopping criterion on epsilon rate.
void __createScore()
create the score used for learning
void addMandatoryArc(const Arc &arc)
INLINE void distributeStop(const ApproximationScheme *approximationScheme, std::string message)
distribute signals
StructuralConstraintSliceOrder __constraint_SliceOrder
the constraint for 2TBNs
the structural constraint indicating that some arcs shall never be removed or reversed ...
Miic __miic_3off2
the 3off2 algorithm
ParamEstimatorType
an enumeration to select the type of parameter estimation we shall apply
void setInitialDAG(const DAG &)
sets an initial DAG structure
INLINE void distributeProgress(const ApproximationScheme *approximationScheme, Size pourcent, double error, double time)
{@ /// distribute signals
const std::string & nameFromId(NodeId id) const
returns the variable name corresponding to a given node id
ParamEstimatorType __param_estimator_type
the type of the parameter estimator
the class for computing BDeu scores
The basic class for computing the set of digraph changes allowed by the user to be executed by the le...
the base class for all a priori
unsigned int getMaxNumberOfThreads()
Returns the maximum number of threads at any time.
void setPeriodSize(Size p)
How many samples between two stopping is enable.
DatabaseTable __database
the database itself
the structural constraint imposing a partial order over nodes
MixedGraph __prepare_miic_3off2()
prepares the initial graph for 3off2 or miic
Class representing Bayesian networks.
the class for computing K2 scores (actually their log2 value)
void use3off2()
indicate that we wish to use 3off2
void useNoApriori()
use no apriori
std::pair< double, double > chi2(const NodeId id1, const NodeId id2, const std::vector< NodeId > &knowing={})
Return the <statistic,pvalue> pair for the BNLearner.
double epsilon() const
Get the value of epsilon.
the "meta-programming" class for storing several structural constraints
This file contains getters and setters defintion for ApproximationSchem settings. ...
void useScoreK2()
indicate that we wish to use a K2 score
gum is the global namespace for all aGrUM entities
void setMinEpsilonRate(double rate)
Given that we approximate f(t), stopping criterion on d/dt(|f(t+1)-f(t)|).
the class for structural constraints limiting the number of parents of nodes in a directed graph ...
void enableMinEpsilonRate()
Enable stopping criterion on epsilon rate.
double logLikelihood(const std::vector< NodeId > &vars, const std::vector< NodeId > &knowing={})
Return the loglikelihood of vars in the base, conditioned by knowing for the BNLearner.
const std::vector< std::string > & names() const
returns the names of the variables in the database
double minEpsilonRate() const
Get the value of the minimal epsilon rate.
AprioriType __apriori_type
the a priori selected for the score and parameters
void setVerbosity(bool v)
Set the verbosity on (true) or off (false).
NodeId idFromName(const std::string &var_name) const
returns the node id corresponding to a variable name
the structural constraint for forbidding the creation of some arcs during structure learning ...
The class for generic Hash Tables.
Size periodSize() const
Returns the period size.
void useScoreLog2Likelihood()
indicate that we wish to use a Log2Likelihood score
CorrectedMutualInformation * __mutual_info
the selected correction for 3off2 and miic
void setMaxTime(double timeout)
Stopping criterion on timeout.
AlgoType
an enumeration to select easily the learning algorithm to use
const std::vector< Arc > latentVariables() const
get the list of arcs hiding latent variables
void disableEpsilon()
Disable stopping criterion on epsilon.
DAG __initial_dag
an initial DAG given to learners
void useNML()
indicate that we wish to use the NML correction for 3off2
Size nbrIterations() const
Returns the number of iterations.
void enableMaxIter()
Enable stopping criterion on max iterations.
the smooth a priori: adds a weight w to all the countings
StructuralConstraintMandatoryArcs __constraint_MandatoryArcs
the constraint on forbidden arcs
A dirichlet priori: computes its N'_ijk from a database.
genericBNLearner(const std::string &filename, const std::vector< std::string > &missing_symbols)
default constructor
void disableMaxTime()
Disable stopping criterion on timeout.
The class for initializing DatabaseTable and RawDatabaseTable instances from CSV files.
void setMandatoryArcs(const ArcSet &set)
assign a set of forbidden arcs
void disableEpsilon()
Disable stopping criterion on epsilon.
void eraseMandatoryArc(const Arc &arc)
Database * __apriori_database
the database used by the Dirichlet a priori
LocalSearchWithTabuList __local_search_with_tabu_list
the local search with tabu list algorithm
const ApproximationScheme * __current_algorithm
bool isEnabledMaxTime() const
the base class for structural constraints used by learning algorithms that learn a directed graph str...
The mecanism to compute the next available graph changes for directed structure learning search algor...
void setPeriodSize(Size p)
how many samples between 2 stopping isEnableds
void enableEpsilon()
Enable stopping criterion on epsilon.
bool isEnabledMinEpsilonRate() const
Returns true if stopping criterion on epsilon rate is enabled, false otherwise.
ParamEstimator * __createParamEstimator(DBRowGeneratorParser<> &parser, bool take_into_account_score=true)
create the parameter estimator used for learning
Signaler1< std::string > onStop
Criteria messageApproximationScheme.
void __createCorrectedMutualInformation()
create the Corrected Mutual Information instance for Miic/3off2
Size nbrIterations() const
The base class for all directed edgesThis class is used as a basis for manipulating all directed edge...
double currentTime() const
get the current running time in second (double)
Apriori * __apriori
the apriori used
void useNoCorr()
indicate that we wish to use the NoCorr correction for 3off2
void disableMaxTime()
Disable stopping criterion on timeout.
StructuralConstraintTabuList __constraint_TabuList
the constraint for tabu lists
std::string __apriori_dbname
the filename for the Dirichlet a priori, if any
void enableMaxIter()
Enable stopping criterion on max iterations.
DAG __learnDAG()
returns the DAG learnt
Size maxIter() const
Returns the criterion on number of iterations.
GreedyHillClimbing __greedy_hill_climbing
the greedy hill climbing algorithm
void useLocalSearchWithTabuList(Size tabu_size=100, Size nb_decrease=2)
indicate that we wish to use a local search with tabu list
genericBNLearner & operator=(const genericBNLearner &)
copy operator
void __setAprioriWeight(double weight)
sets the apriori weight
std::string checkScoreAprioriCompatibility()
checks whether the current score and apriori are compatible
void eraseForbiddenArc(const Arc &arc)
DAG2BNLearner __Dag2BN
the parametric EM
void useGreedyHillClimbing()
indicate that we wish to use a greedy hill climbing algorithm
DAG learnDAG()
learn a structure from a file (must have read the db before)
the structural constraint indicating that some arcs shall never be removed or reversed ...
std::vector< std::pair< std::size_t, std::size_t > > __ranges
the set of rows' ranges within the database in which learning is done
void useAprioriSmoothing(double weight=1)
use the apriori smoothing
The greedy hill climbing learning algorithm (for directed graphs)
const std::vector< double > & history() const
Returns the scheme history.
The class representing a tabular database as used by learning tasks.
double maxTime() const
returns the timeout (in seconds)
void useScoreAIC()
indicate that we wish to use an AIC score
MixedGraph learnMixedStructure()
learn a partial structure from a file (must have read the db before and must have selected miic or 3o...
void useK2(const Sequence< NodeId > &order)
indicate that we wish to use K2
StructuralConstraintIndegree __constraint_Indegree
the constraint for indegrees
ScoreType __score_type
the score selected for learning
const std::vector< std::pair< std::size_t, std::size_t > > & databaseRanges() const
returns the current database rows' ranges used for learning
ApproximationSchemeSTATE stateApproximationScheme() const
Returns the approximation scheme state.
void setMaxIter(Size max)
Stopping criterion on number of iterations.
void addForbiddenArc(const Arc &arc)
virtual ~genericBNLearner()
destructor
void disableMinEpsilonRate()
Disable stopping criterion on epsilon rate.
The local search with tabu list learning algorithm (for directed graphs)
bool isEnabledMaxIter() const
Returns true if stopping criterion on max iterations is enabled, false otherwise. ...
const std::vector< double > & history() const
The basic class for computing the set of digraph changes allowed by the user to be executed by the le...
the class for computing Bayesian Dirichlet (BD) log2 scores
A pack of learning algorithms that can easily be used.
class for packing sets of generators
void disableMaxIter()
Disable stopping criterion on max iterations.
static DatabaseTable __readFile(const std::string &filename, const std::vector< std::string > &missing_symbols)
reads a file and returns a databaseVectInRam
void useEM(const double epsilon)
use The EM algorithm to learn paramters
bool verbosity() const
Returns true if verbosity is enabled.
void useAprioriDirichlet(const std::string &filename, double weight=1)
use the Dirichlet apriori
void useDatabaseRanges(const std::vector< std::pair< std::size_t, std::size_t >, XALLOC< std::pair< std::size_t, std::size_t > > > &new_ranges)
use a new set of database rows' ranges to perform learning
double __apriori_weight
the weight of the apriori
double epsilon() const
Returns the value of epsilon.
const std::vector< std::size_t > & domainSizes() const
returns the domain sizes of the variables in the database
Size periodSize() const
how many samples between 2 stopping isEnableds
bool isEnabledEpsilon() const
Returns true if stopping criterion on epsilon is enabled, false otherwise.
void __createApriori()
create the apriori used for learning
bool isEnabledMaxTime() const
Returns true if stopping criterion on timeout is enabled, false otherwise.
The class imposing a N-sized tabu list as a structural constraints for learning algorithms.
void setEpsilon(double eps)
Given that we approximate f(t), stopping criterion on |f(t+1)-f(t)|.
double currentTime() const
Returns the current running time in second.
The miic learning algorithm.
void disableMaxIter()
Disable stopping criterion on max iterations.
A pack of learning algorithms that can easily be used.
the class for computing AIC scores
std::vector< std::size_t > __domain_sizes
the domain sizes of the variables (useful to speed-up computations)
void setForbiddenArcs(const ArcSet &set)
assign a set of forbidden arcs
A class that, given a structure and a parameter estimator returns a full Bayes net.
bool isEnabledMaxIter() const
The class for parsing DatabaseTable rows and generating output rows.
Bijection< NodeId, std::size_t > __nodeId2cols
a bijection assigning to each variable name its NodeId
bool verbosity() const
verbosity
bool isEnabledMinEpsilonRate() const
void useAprioriBDeu(double weight=1)
use the BDeu apriori
std::size_t Size
In aGrUM, hashed values are unsigned long int.
const DatabaseTable & database() const
returns the database used by the BNLearner
StructuralConstraintForbiddenArcs __constraint_ForbiddenArcs
the constraint on forbidden arcs
void setSliceOrder(const NodeProperty< NodeId > &slice_order)
sets a partial order on the nodes
ApproximationSchemeSTATE
The different state of an approximation scheme.
The databases' cell translators for labelized variables.
a helper to easily read databases
The base class for estimating parameters of CPTs.
#define GUM_EMIT3(signal, arg1, arg2, arg3)
void useMIIC()
indicate that we wish to use MIIC
the class used to read a row in the database and to transform it into a set of DBRow instances that c...
void useScoreBD()
indicate that we wish to use a BD score
void useMDL()
indicate that we wish to use the MDL correction for 3off2
void useScoreBDeu()
indicate that we wish to use a BDeu score
The greedy hill learning algorithm (for directed graphs)
void setEpsilon(double eps)
Given that we approximate f(t), stopping criterion on |f(t+1)-f(t)| If the criterion was disabled it ...
The local search learning with tabu list algorithm (for directed graphs)
A DBRowGenerator class that returns the rows that are complete (fully observed) w.r.t.
Size NodeId
Type for node ids.
Set of pairs of elements with fast search for both elements.
void setMaxIter(Size max)
stopping criterion on number of iterationsIf the criterion was disabled it will be enabled ...
the no a priori class: corresponds to 0 weight-sample
Base classes for directed acyclic graphs.
void enableMaxTime()
Enable stopping criterion on timeout.
the class for estimating parameters of CPTs using Maximum Likelihood
#define GUM_ERROR(type, msg)
the internal apriori for the BDeu score (N' / (r_i * q_i)
Base class for mixed graphs.
the structural constraint imposing a partial order over nodes
void enableEpsilon()
Enable stopping criterion on epsilon.