33 #ifndef GUM_LEARNING_GENERIC_BN_LEARNER_H 34 #define GUM_LEARNING_GENERIC_BN_LEARNER_H 98 class BNLearnerListener;
121 DIRICHLET_FROM_DATABASE,
128 GREEDY_HILL_CLIMBING,
129 LOCAL_SEARCH_WITH_TABU_LIST,
146 explicit Database(
const std::string& file,
147 const std::vector< std::string >& missing_symbols);
165 Database(
const std::string& filename,
167 const std::vector< std::string >& missing_symbols);
176 template <
typename GUM_SCALAR >
177 Database(
const std::string& filename,
179 const std::vector< std::string >& missing_symbols);
214 const std::vector< std::size_t >&
domainSizes()
const;
217 const std::vector< std::string >&
names()
const;
236 const std::vector< std::string >& missingSymbols()
const;
239 std::size_t
nbRows()
const;
242 std::size_t size()
const;
248 void setWeight(
const std::size_t i,
const double weight);
253 double weight(
const std::size_t i)
const;
256 double weight()
const;
275 #if defined(_OPENMP) && !defined(GUM_DEBUG_MODE) 282 Size __min_nb_rows_per_thread{100};
287 template <
typename GUM_SCALAR >
306 const std::vector< std::string >& missing_symbols);
328 template <
typename GUM_SCALAR >
331 const std::vector< std::string >& missing_symbols);
373 const std::vector< std::string >&
names()
const;
376 const std::vector< std::size_t >&
domainSizes()
const;
416 template <
template <
typename >
class XALLOC >
418 const std::vector< std::pair< std::size_t, std::size_t >,
419 XALLOC< std::pair< std::size_t, std::size_t > > >&
429 const std::vector< std::pair< std::size_t, std::size_t > >&
453 std::pair< std::size_t, std::size_t >
455 const std::size_t k_fold);
465 std::pair< double, double >
chi2(
const NodeId id1,
467 const std::vector< NodeId >& knowing = {});
475 std::pair< double, double >
476 chi2(
const std::string& name1,
477 const std::string& name2,
478 const std::vector< std::string >& knowing = {});
487 std::pair< double, double >
G2(
const NodeId id1,
489 const std::vector< NodeId >& knowing = {});
497 std::pair< double, double >
498 G2(
const std::string& name1,
499 const std::string& name2,
500 const std::vector< std::string >& knowing = {});
510 const std::vector< NodeId >& knowing = {});
520 const std::vector< std::string >& knowing = {});
619 void useK2(
const std::vector< NodeId >& order);
667 void setSliceOrder(
const std::vector< std::vector< std::string > >& slices);
676 void addForbiddenArc(
const std::string& tail,
const std::string& head);
693 void addMandatoryArc(
const std::string& tail,
const std::string& head);
717 void addPossibleEdge(
const std::string& tail,
const std::string& head);
800 std::vector< std::pair< std::size_t, std::size_t > >
__ranges;
817 const std::vector< std::string >& missing_symbols);
831 bool take_into_account_score =
true);
873 std::string message) {
887 __local_search_with_tabu_list.
setEpsilon(eps);
977 __local_search_with_tabu_list.
setMaxIter(max);
1022 __local_search_with_tabu_list.
setMaxTime(timeout);
1133 #ifndef GUM_NO_INLINE AlgoType __selected_algo
the selected learning algorithm
the class for structural constraints limiting the number of parents of nodes in a directed graph ...
INLINE void setCurrentApproximationScheme(const ApproximationScheme *approximationScheme)
{@ /// distribute signals
Copyright 2005-2019 Pierre-Henri WUILLEMIN et Christophe GONZALES (LIP6) {prenom.nom}_at_lip6.fr.
void setMaxIndegree(Size max_indegree)
sets the max indegree
void useScoreBIC()
indicate that we wish to use a BIC score
Class representing a Bayesian Network.
double minEpsilonRate() const
Returns the value of the minimal epsilon rate.
void enableMaxTime()
stopping criterion on timeout If the criterion was disabled it will be enabled
ApproximationScheme & approximationScheme()
returns the approximation policy of the learning algorithm
Score * __score
the score used
AprioriNoApriori * __no_apriori
Copyright 2005-2019 Pierre-Henri WUILLEMIN et Christophe GONZALES (LIP6) {prenom.nom}_at_lip6.fr.
bool hasMissingValues() const
returns true if the learner's database has missing values
Database __score_database
the database to be used by the scores and parameter estimators
Copyright 2005-2019 Pierre-Henri WUILLEMIN et Christophe GONZALES (LIP6) {prenom.nom}_at_lip6.fr.
ApproximationSchemeSTATE stateApproximationScheme() const
history
void setMaxTime(double timeout)
stopping criterion on timeout If the criterion was disabled it will be enabled
CorrectedMutualInformation ::KModeTypes __3off2_kmode
the penalty used in 3off2
the structural constraint for forbidding the creation of some arcs during structure learning ...
Signaler3< Size, double, double > onProgress
Progression, error and time.
void setVerbosity(bool v)
verbosity
void disableMinEpsilonRate()
Disable stopping criterion on epsilon rate.
AprioriType
an enumeration to select the apriori
Copyright 2005-2019 Pierre-Henri WUILLEMIN et Christophe GONZALES (LIP6) {prenom.nom}_at_lip6.fr.
const std::string & __getAprioriType() const
returns the type (as a string) of a given apriori
double __EMepsilon
epsilon for EM. if espilon=0.0 : no EM
Copyright 2005-2019 Pierre-Henri WUILLEMIN et Christophe GONZALES (LIP6) {prenom.nom}_at_lip6.fr.
double maxTime() const
Returns the timeout (in seconds).
std::pair< std::size_t, std::size_t > useCrossValidationFold(const std::size_t learning_fold, const std::size_t k_fold)
sets the ranges of rows to be used for cross-validation learning
Copyright 2005-2019 Pierre-Henri WUILLEMIN et Christophe GONZALES (LIP6) {prenom.nom}_at_lip6.fr.
static void __checkFileName(const std::string &filename)
checks whether the extension of a CSV filename is correct
Copyright 2005-2019 Pierre-Henri WUILLEMIN et Christophe GONZALES (LIP6) {prenom.nom}_at_lip6.fr.
The base class for all the scores used for learning (BIC, BDeu, etc)
void setDatabaseWeight(const double new_weight)
assign a weight to all the rows of the learning database so that the sum of their weights is equal to...
void clearDatabaseRanges()
reset the ranges to the one range corresponding to the whole database
bool isEnabledEpsilon() const
void setRecordWeight(const std::size_t i, const double weight)
sets the weight of the ith record of the database
#define GUM_EMIT1(signal, arg1)
Copyright 2005-2019 Pierre-Henri WUILLEMIN et Christophe GONZALES (LIP6) {prenom.nom}_at_lip6.fr.
Copyright 2005-2019 Pierre-Henri WUILLEMIN et Christophe GONZALES (LIP6) {prenom.nom}_at_lip6.fr.
void setMinEpsilonRate(double rate)
Given that we approximate f(t), stopping criterion on d/dt(|f(t+1)-f(t)|) If the criterion was disabl...
Copyright 2005-2019 Pierre-Henri WUILLEMIN et Christophe GONZALES (LIP6) {prenom.nom}_at_lip6.fr.
ScoreType
an enumeration enabling to select easily the score we wish to use
Copyright 2005-2019 Pierre-Henri WUILLEMIN et Christophe GONZALES (LIP6) {prenom.nom}_at_lip6.fr.
void enableMinEpsilonRate()
Enable stopping criterion on epsilon rate.
void __createScore()
create the score used for learning
void addMandatoryArc(const Arc &arc)
the structural constraint for forbidding the creation of some arcs except those defined in the class ...
INLINE void distributeStop(const ApproximationScheme *approximationScheme, std::string message)
distribute signals
StructuralConstraintSliceOrder __constraint_SliceOrder
the constraint for 2TBNs
the structural constraint indicating that some arcs shall never be removed or reversed ...
Miic __miic_3off2
the 3off2 algorithm
ParamEstimatorType
an enumeration to select the type of parameter estimation we shall apply
void setInitialDAG(const DAG &)
sets an initial DAG structure
INLINE void distributeProgress(const ApproximationScheme *approximationScheme, Size pourcent, double error, double time)
{@ /// distribute signals
const std::string & nameFromId(NodeId id) const
returns the variable name corresponding to a given node id
ParamEstimatorType __param_estimator_type
the type of the parameter estimator
Copyright 2005-2019 Pierre-Henri WUILLEMIN et Christophe GONZALES (LIP6) {prenom.nom}_at_lip6.fr.
Copyright 2005-2019 Pierre-Henri WUILLEMIN et Christophe GONZALES (LIP6) {prenom.nom}_at_lip6.fr.
the base class for all a priori
unsigned int getMaxNumberOfThreads()
Returns the maximum number of threads at any time.
void setPossibleSkeleton(const UndiGraph &skeleton)
assign a set of forbidden edges
void setPeriodSize(Size p)
How many samples between two stopping is enable.
DatabaseTable __database
the database itself
Copyright 2005-2019 Pierre-Henri WUILLEMIN et Christophe GONZALES (LIP6) {prenom.nom}_at_lip6.fr.
MixedGraph __prepare_miic_3off2()
prepares the initial graph for 3off2 or miic
Copyright 2005-2019 Pierre-Henri WUILLEMIN et Christophe GONZALES (LIP6) {prenom.nom}_at_lip6.fr.
Copyright 2005-2019 Pierre-Henri WUILLEMIN et Christophe GONZALES (LIP6) {prenom.nom}_at_lip6.fr.
void use3off2()
indicate that we wish to use 3off2
void useNoApriori()
use no apriori
std::pair< double, double > chi2(const NodeId id1, const NodeId id2, const std::vector< NodeId > &knowing={})
Return the <statistic,pvalue> pair for chi2 test in the database.
double epsilon() const
Get the value of epsilon.
Copyright 2005-2019 Pierre-Henri WUILLEMIN et Christophe GONZALES (LIP6) {prenom.nom}_at_lip6.fr.
Copyright 2005-2019 Pierre-Henri WUILLEMIN et Christophe GONZALES (LIP6) {prenom.nom}_at_lip6.fr.
Copyright 2005-2019 Pierre-Henri WUILLEMIN et Christophe GONZALES (LIP6) {prenom.nom}_at_lip6.fr.
void useScoreK2()
indicate that we wish to use a K2 score
Copyright 2005-2019 Pierre-Henri WUILLEMIN et Christophe GONZALES (LIP6) {prenom.nom}_at_lip6.fr.
void setMinEpsilonRate(double rate)
Given that we approximate f(t), stopping criterion on d/dt(|f(t+1)-f(t)|).
Copyright 2005-2019 Pierre-Henri WUILLEMIN et Christophe GONZALES (LIP6) {prenom.nom}_at_lip6.fr.
void enableMinEpsilonRate()
Enable stopping criterion on epsilon rate.
double logLikelihood(const std::vector< NodeId > &vars, const std::vector< NodeId > &knowing={})
Return the loglikelihood of vars in the base, conditioned by knowing for the BNLearner.
std::pair< double, double > G2(const NodeId id1, const NodeId id2, const std::vector< NodeId > &knowing={})
Return the <statistic,pvalue> pair for for G2 test in the database.
const std::vector< std::string > & names() const
returns the names of the variables in the database
double minEpsilonRate() const
Get the value of the minimal epsilon rate.
AprioriType __apriori_type
the a priori selected for the score and parameters
void setVerbosity(bool v)
Set the verbosity on (true) or off (false).
NodeId idFromName(const std::string &var_name) const
returns the node id corresponding to a variable name
Copyright 2005-2019 Pierre-Henri WUILLEMIN et Christophe GONZALES (LIP6) {prenom.nom}_at_lip6.fr.
The class for generic Hash Tables.
Size periodSize() const
Returns the period size.
void useScoreLog2Likelihood()
indicate that we wish to use a Log2Likelihood score
CorrectedMutualInformation * __mutual_info
the selected correction for 3off2 and miic
Copyright 2005-2019 Pierre-Henri WUILLEMIN et Christophe GONZALES (LIP6) {prenom.nom}_at_lip6.fr.
void setMaxTime(double timeout)
Stopping criterion on timeout.
Copyright 2005-2019 Pierre-Henri WUILLEMIN et Christophe GONZALES (LIP6) {prenom.nom}_at_lip6.fr.
AlgoType
an enumeration to select easily the learning algorithm to use
const std::vector< Arc > latentVariables() const
get the list of arcs hiding latent variables
void disableEpsilon()
Disable stopping criterion on epsilon.
DAG __initial_dag
an initial DAG given to learners
void useNML()
indicate that we wish to use the NML correction for 3off2
Size nbrIterations() const
Returns the number of iterations.
void enableMaxIter()
Enable stopping criterion on max iterations.
Copyright 2005-2019 Pierre-Henri WUILLEMIN et Christophe GONZALES (LIP6) {prenom.nom}_at_lip6.fr.
StructuralConstraintMandatoryArcs __constraint_MandatoryArcs
the constraint on forbidden arcs
Copyright 2005-2019 Pierre-Henri WUILLEMIN et Christophe GONZALES (LIP6) {prenom.nom}_at_lip6.fr.
genericBNLearner(const std::string &filename, const std::vector< std::string > &missing_symbols)
default constructor
void disableMaxTime()
Disable stopping criterion on timeout.
Copyright 2005-2019 Pierre-Henri WUILLEMIN et Christophe GONZALES (LIP6) {prenom.nom}_at_lip6.fr.
void setMandatoryArcs(const ArcSet &set)
assign a set of forbidden arcs
void disableEpsilon()
Disable stopping criterion on epsilon.
void eraseMandatoryArc(const Arc &arc)
Database * __apriori_database
the database used by the Dirichlet a priori
LocalSearchWithTabuList __local_search_with_tabu_list
the local search with tabu list algorithm
void erasePossibleEdge(const Edge &edge)
const ApproximationScheme * __current_algorithm
bool isEnabledMaxTime() const
Copyright 2005-2019 Pierre-Henri WUILLEMIN et Christophe GONZALES (LIP6) {prenom.nom}_at_lip6.fr.
Copyright 2005-2019 Pierre-Henri WUILLEMIN et Christophe GONZALES (LIP6) {prenom.nom}_at_lip6.fr.
void setPeriodSize(Size p)
how many samples between 2 stopping isEnableds
void enableEpsilon()
Enable stopping criterion on epsilon.
bool isEnabledMinEpsilonRate() const
Returns true if stopping criterion on epsilon rate is enabled, false otherwise.
ParamEstimator * __createParamEstimator(DBRowGeneratorParser<> &parser, bool take_into_account_score=true)
create the parameter estimator used for learning
Signaler1< std::string > onStop
Criteria messageApproximationScheme.
double databaseWeight() const
returns the weight of the whole database
void __createCorrectedMutualInformation()
create the Corrected Mutual Information instance for Miic/3off2
Size nbrIterations() const
The base class for all directed edgesThis class is used as a basis for manipulating all directed edge...
double currentTime() const
get the current running time in second (double)
Apriori * __apriori
the apriori used
double recordWeight(const std::size_t i) const
returns the weight of the ith record
void useNoCorr()
indicate that we wish to use the NoCorr correction for 3off2
void disableMaxTime()
Disable stopping criterion on timeout.
StructuralConstraintTabuList __constraint_TabuList
the constraint for tabu lists
std::string __apriori_dbname
the filename for the Dirichlet a priori, if any
void enableMaxIter()
Enable stopping criterion on max iterations.
void addPossibleEdge(const Edge &edge)
StructuralConstraintPossibleEdges __constraint_PossibleEdges
the constraint on possible Edges
DAG __learnDAG()
returns the DAG learnt
Size maxIter() const
Returns the criterion on number of iterations.
GreedyHillClimbing __greedy_hill_climbing
the greedy hill climbing algorithm
void useLocalSearchWithTabuList(Size tabu_size=100, Size nb_decrease=2)
indicate that we wish to use a local search with tabu list
genericBNLearner & operator=(const genericBNLearner &)
copy operator
void __setAprioriWeight(double weight)
sets the apriori weight
std::string checkScoreAprioriCompatibility()
checks whether the current score and apriori are compatible
void eraseForbiddenArc(const Arc &arc)
DAG2BNLearner __Dag2BN
the parametric EM
void useGreedyHillClimbing()
indicate that we wish to use a greedy hill climbing algorithm
DAG learnDAG()
learn a structure from a file (must have read the db before)
Copyright 2005-2019 Pierre-Henri WUILLEMIN et Christophe GONZALES (LIP6) {prenom.nom}_at_lip6.fr.
std::vector< std::pair< std::size_t, std::size_t > > __ranges
the set of rows' ranges within the database in which learning is done
void useAprioriSmoothing(double weight=1)
use the apriori smoothing
The greedy hill climbing learning algorithm (for directed graphs)
const std::vector< double > & history() const
Returns the scheme history.
The class representing a tabular database as used by learning tasks.
double maxTime() const
returns the timeout (in seconds)
void useScoreAIC()
indicate that we wish to use an AIC score
MixedGraph learnMixedStructure()
learn a partial structure from a file (must have read the db before and must have selected miic or 3o...
void useK2(const Sequence< NodeId > &order)
indicate that we wish to use K2
Copyright 2005-2019 Pierre-Henri WUILLEMIN et Christophe GONZALES (LIP6) {prenom.nom}_at_lip6.fr.
StructuralConstraintIndegree __constraint_Indegree
the constraint for indegrees
ScoreType __score_type
the score selected for learning
const std::vector< std::pair< std::size_t, std::size_t > > & databaseRanges() const
returns the current database rows' ranges used for learning
ApproximationSchemeSTATE stateApproximationScheme() const
Returns the approximation scheme state.
void setMaxIter(Size max)
Stopping criterion on number of iterations.
void addForbiddenArc(const Arc &arc)
virtual ~genericBNLearner()
destructor
void disableMinEpsilonRate()
Disable stopping criterion on epsilon rate.
The local search with tabu list learning algorithm (for directed graphs)
void setPossibleEdges(const EdgeSet &set)
assign a set of forbidden edges
bool isEnabledMaxIter() const
Returns true if stopping criterion on max iterations is enabled, false otherwise. ...
The base class for all undirected edges.
const std::vector< double > & history() const
Copyright 2005-2019 Pierre-Henri WUILLEMIN et Christophe GONZALES (LIP6) {prenom.nom}_at_lip6.fr.
Copyright 2005-2019 Pierre-Henri WUILLEMIN et Christophe GONZALES (LIP6) {prenom.nom}_at_lip6.fr.
A pack of learning algorithms that can easily be used.
Copyright 2005-2019 Pierre-Henri WUILLEMIN et Christophe GONZALES (LIP6) {prenom.nom}_at_lip6.fr.
void disableMaxIter()
Disable stopping criterion on max iterations.
static DatabaseTable __readFile(const std::string &filename, const std::vector< std::string > &missing_symbols)
reads a file and returns a databaseVectInRam
void useEM(const double epsilon)
use The EM algorithm to learn paramters
bool verbosity() const
Returns true if verbosity is enabled.
void useAprioriDirichlet(const std::string &filename, double weight=1)
use the Dirichlet apriori
void useDatabaseRanges(const std::vector< std::pair< std::size_t, std::size_t >, XALLOC< std::pair< std::size_t, std::size_t > > > &new_ranges)
use a new set of database rows' ranges to perform learning
double __apriori_weight
the weight of the apriori
double epsilon() const
Returns the value of epsilon.
const std::vector< std::size_t > & domainSizes() const
returns the domain sizes of the variables in the database
Size periodSize() const
how many samples between 2 stopping isEnableds
bool isEnabledEpsilon() const
Returns true if stopping criterion on epsilon is enabled, false otherwise.
void __createApriori()
create the apriori used for learning
Base class for undirected graphs.
bool isEnabledMaxTime() const
Returns true if stopping criterion on timeout is enabled, false otherwise.
The class imposing a N-sized tabu list as a structural constraints for learning algorithms.
void setEpsilon(double eps)
Given that we approximate f(t), stopping criterion on |f(t+1)-f(t)|.
double currentTime() const
Returns the current running time in second.
The miic learning algorithm.
void disableMaxIter()
Disable stopping criterion on max iterations.
Copyright 2005-2019 Pierre-Henri WUILLEMIN et Christophe GONZALES (LIP6) {prenom.nom}_at_lip6.fr.
Copyright 2005-2019 Pierre-Henri WUILLEMIN et Christophe GONZALES (LIP6) {prenom.nom}_at_lip6.fr.
std::vector< std::size_t > __domain_sizes
the domain sizes of the variables (useful to speed-up computations)
void setForbiddenArcs(const ArcSet &set)
assign a set of forbidden arcs
A class that, given a structure and a parameter estimator returns a full Bayes net.
bool isEnabledMaxIter() const
The class for parsing DatabaseTable rows and generating output rows.
Bijection< NodeId, std::size_t > __nodeId2cols
a bijection assigning to each variable name its NodeId
bool verbosity() const
verbosity
bool isEnabledMinEpsilonRate() const
void useAprioriBDeu(double weight=1)
use the BDeu apriori
std::size_t Size
In aGrUM, hashed values are unsigned long int.
const DatabaseTable & database() const
returns the database used by the BNLearner
StructuralConstraintForbiddenArcs __constraint_ForbiddenArcs
the constraint on forbidden arcs
void setSliceOrder(const NodeProperty< NodeId > &slice_order)
sets a partial order on the nodes
ApproximationSchemeSTATE
The different state of an approximation scheme.
Copyright 2005-2019 Pierre-Henri WUILLEMIN et Christophe GONZALES (LIP6) {prenom.nom}_at_lip6.fr.
a helper to easily read databases
The base class for estimating parameters of CPTs.
#define GUM_EMIT3(signal, arg1, arg2, arg3)
void useMIIC()
indicate that we wish to use MIIC
the class used to read a row in the database and to transform it into a set of DBRow instances that c...
void useScoreBD()
indicate that we wish to use a BD score
void useMDL()
indicate that we wish to use the MDL correction for 3off2
void useScoreBDeu()
indicate that we wish to use a BDeu score
Copyright 2005-2019 Pierre-Henri WUILLEMIN et Christophe GONZALES (LIP6) {prenom.nom}_at_lip6.fr.
void setEpsilon(double eps)
Given that we approximate f(t), stopping criterion on |f(t+1)-f(t)| If the criterion was disabled it ...
Copyright 2005-2019 Pierre-Henri WUILLEMIN et Christophe GONZALES (LIP6) {prenom.nom}_at_lip6.fr.
Copyright 2005-2019 Pierre-Henri WUILLEMIN et Christophe GONZALES (LIP6) {prenom.nom}_at_lip6.fr.
Size NodeId
Type for node ids.
Copyright 2005-2019 Pierre-Henri WUILLEMIN et Christophe GONZALES (LIP6) {prenom.nom}_at_lip6.fr.
void setMaxIter(Size max)
stopping criterion on number of iterationsIf the criterion was disabled it will be enabled ...
the no a priori class: corresponds to 0 weight-sample
Copyright 2005-2019 Pierre-Henri WUILLEMIN et Christophe GONZALES (LIP6) {prenom.nom}_at_lip6.fr.
void enableMaxTime()
Enable stopping criterion on timeout.
Copyright 2005-2019 Pierre-Henri WUILLEMIN et Christophe GONZALES (LIP6) {prenom.nom}_at_lip6.fr.
#define GUM_ERROR(type, msg)
Copyright 2005-2019 Pierre-Henri WUILLEMIN et Christophe GONZALES (LIP6) {prenom.nom}_at_lip6.fr.
Base class for mixed graphs.
the structural constraint imposing a partial order over nodes
void enableEpsilon()
Enable stopping criterion on epsilon.