aGrUM  0.16.0
genericBNLearner.h
Go to the documentation of this file.
1 
33 #ifndef GUM_LEARNING_GENERIC_BN_LEARNER_H
34 #define GUM_LEARNING_GENERIC_BN_LEARNER_H
35 
36 #include <sstream>
37 #include <memory>
38 
39 #include <agrum/BN/BayesNet.h>
40 #include <agrum/agrum.h>
41 #include <agrum/core/bijection.h>
42 #include <agrum/core/sequence.h>
43 #include <agrum/graphs/DAG.h>
44 
53 
60 
65 
75 
80 
83 
86 
87 #include <agrum/learning/K2.h>
88 #include <agrum/learning/Miic.h>
91 
93 
94 namespace gum {
95 
96  namespace learning {
97 
98  class BNLearnerListener;
99 
108  // private:
109  public:
111  enum class ScoreType { AIC, BD, BDeu, BIC, K2, LOG2LIKELIHOOD };
112 
115  enum class ParamEstimatorType { ML };
116 
118  enum class AprioriType {
119  NO_APRIORI,
120  SMOOTHING,
121  DIRICHLET_FROM_DATABASE,
122  BDEU
123  };
124 
126  enum class AlgoType {
127  K2,
128  GREEDY_HILL_CLIMBING,
129  LOCAL_SEARCH_WITH_TABU_LIST,
130  MIIC_THREE_OFF_TWO
131  };
132 
133 
135  class Database {
136  public:
137  // ########################################################################
139  // ########################################################################
141 
143 
146  explicit Database(const std::string& file,
147  const std::vector< std::string >& missing_symbols);
148 
150 
152  explicit Database(const DatabaseTable<>& db);
153 
155 
165  Database(const std::string& filename,
166  Database& score_database,
167  const std::vector< std::string >& missing_symbols);
168 
170 
176  template < typename GUM_SCALAR >
177  Database(const std::string& filename,
178  const gum::BayesNet< GUM_SCALAR >& bn,
179  const std::vector< std::string >& missing_symbols);
180 
182  Database(const Database& from);
183 
185  Database(Database&& from);
186 
188  ~Database();
189 
191 
192  // ########################################################################
194  // ########################################################################
196 
198  Database& operator=(const Database& from);
199 
201  Database& operator=(Database&& from);
202 
204 
205  // ########################################################################
207  // ########################################################################
209 
211  DBRowGeneratorParser<>& parser();
212 
214  const std::vector< std::size_t >& domainSizes() const;
215 
217  const std::vector< std::string >& names() const;
218 
220  NodeId idFromName(const std::string& var_name) const;
221 
223  const std::string& nameFromId(NodeId id) const;
224 
226  const DatabaseTable<>& databaseTable() const;
227 
230  void setDatabaseWeight(const double new_weight);
231 
233  const Bijection< NodeId, std::size_t >& nodeId2Columns() const;
234 
236  const std::vector< std::string >& missingSymbols() const;
237 
239  std::size_t nbRows() const;
240 
242  std::size_t size() const;
243 
245 
248  void setWeight(const std::size_t i, const double weight);
249 
251 
253  double weight(const std::size_t i) const;
254 
256  double weight() const;
257 
258 
260 
261  protected:
264 
266  DBRowGeneratorParser<>* __parser{nullptr};
267 
269  std::vector< std::size_t > __domain_sizes;
270 
273 
275 #if defined(_OPENMP) && !defined(GUM_DEBUG_MODE)
276  Size __max_threads_number{getMaxNumberOfThreads()};
277 #else
278  Size __max_threads_number{1};
279 #endif /* GUM_DEBUG_MODE */
280 
282  Size __min_nb_rows_per_thread{100};
283 
284  private:
285  // returns the set of variables as a BN. This is convenient for
286  // the constructors of apriori Databases
287  template < typename GUM_SCALAR >
288  BayesNet< GUM_SCALAR > __BNVars() const;
289  };
290 
292  void __setAprioriWeight(double weight);
293 
294  public:
295  // ##########################################################################
297  // ##########################################################################
299 
301 
305  genericBNLearner(const std::string& filename,
306  const std::vector< std::string >& missing_symbols);
308 
328  template < typename GUM_SCALAR >
329  genericBNLearner(const std::string& filename,
330  const gum::BayesNet< GUM_SCALAR >& src,
331  const std::vector< std::string >& missing_symbols);
332 
335 
338 
340  virtual ~genericBNLearner();
341 
343 
344  // ##########################################################################
346  // ##########################################################################
348 
351 
354 
356 
357  // ##########################################################################
359  // ##########################################################################
361 
363  DAG learnDAG();
364 
368 
370  void setInitialDAG(const DAG&);
371 
373  const std::vector< std::string >& names() const;
374 
376  const std::vector< std::size_t >& domainSizes() const;
377 
379 
383  NodeId idFromName(const std::string& var_name) const;
384 
386  const DatabaseTable<>& database() const;
387 
390  void setDatabaseWeight(const double new_weight);
391 
393 
396  void setRecordWeight(const std::size_t i, const double weight);
397 
399 
401  double recordWeight(const std::size_t i) const;
402 
404  double databaseWeight() const;
405 
407  const std::string& nameFromId(NodeId id) const;
408 
410 
416  template < template < typename > class XALLOC >
417  void useDatabaseRanges(
418  const std::vector< std::pair< std::size_t, std::size_t >,
419  XALLOC< std::pair< std::size_t, std::size_t > > >&
420  new_ranges);
421 
423  void clearDatabaseRanges();
424 
426 
429  const std::vector< std::pair< std::size_t, std::size_t > >&
430  databaseRanges() const;
431 
433 
453  std::pair< std::size_t, std::size_t >
454  useCrossValidationFold(const std::size_t learning_fold,
455  const std::size_t k_fold);
456 
457 
465  std::pair< double, double > chi2(const NodeId id1,
466  const NodeId id2,
467  const std::vector< NodeId >& knowing = {});
475  std::pair< double, double >
476  chi2(const std::string& name1,
477  const std::string& name2,
478  const std::vector< std::string >& knowing = {});
479 
487  std::pair< double, double > G2(const NodeId id1,
488  const NodeId id2,
489  const std::vector< NodeId >& knowing = {});
497  std::pair< double, double >
498  G2(const std::string& name1,
499  const std::string& name2,
500  const std::vector< std::string >& knowing = {});
501 
509  double logLikelihood(const std::vector< NodeId >& vars,
510  const std::vector< NodeId >& knowing = {});
511 
519  double logLikelihood(const std::vector< std::string >& vars,
520  const std::vector< std::string >& knowing = {});
521 
522 
527  Size nbCols() const;
528 
533  Size nbRows() const;
534 
539  void useEM(const double epsilon);
540 
542  bool hasMissingValues() const;
543 
545 
546  // ##########################################################################
548  // ##########################################################################
550 
552  void useScoreAIC();
553 
555  void useScoreBD();
556 
558  void useScoreBDeu();
559 
561  void useScoreBIC();
562 
564  void useScoreK2();
565 
567  void useScoreLog2Likelihood();
568 
570 
571  // ##########################################################################
573  // ##########################################################################
575 
577  void useNoApriori();
578 
580 
583  void useAprioriBDeu(double weight = 1);
584 
586 
589  void useAprioriSmoothing(double weight = 1);
590 
592  void useAprioriDirichlet(const std::string& filename, double weight = 1);
593 
594 
596 
598  std::string checkScoreAprioriCompatibility();
600 
601  // ##########################################################################
603  // ##########################################################################
605 
607  void useGreedyHillClimbing();
608 
610 
613  void useLocalSearchWithTabuList(Size tabu_size = 100, Size nb_decrease = 2);
614 
616  void useK2(const Sequence< NodeId >& order);
617 
619  void useK2(const std::vector< NodeId >& order);
620 
622  void use3off2();
623 
625  void useMIIC();
626 
628 
629  // ##########################################################################
631  // ##########################################################################
635  void useNML();
638  void useMDL();
641  void useNoCorr();
642 
645  const std::vector< Arc > latentVariables() const;
646 
648  // ##########################################################################
650  // ##########################################################################
652 
654  void setMaxIndegree(Size max_indegree);
655 
661  void setSliceOrder(const NodeProperty< NodeId >& slice_order);
662 
667  void setSliceOrder(const std::vector< std::vector< std::string > >& slices);
668 
670  void setForbiddenArcs(const ArcSet& set);
671 
674  void addForbiddenArc(const Arc& arc);
675  void addForbiddenArc(const NodeId tail, const NodeId head);
676  void addForbiddenArc(const std::string& tail, const std::string& head);
678 
681  void eraseForbiddenArc(const Arc& arc);
682  void eraseForbiddenArc(const NodeId tail, const NodeId head);
683  void eraseForbiddenArc(const std::string& tail, const std::string& head);
685 
687  void setMandatoryArcs(const ArcSet& set);
688 
691  void addMandatoryArc(const Arc& arc);
692  void addMandatoryArc(const NodeId tail, const NodeId head);
693  void addMandatoryArc(const std::string& tail, const std::string& head);
695 
698  void eraseMandatoryArc(const Arc& arc);
699  void eraseMandatoryArc(const NodeId tail, const NodeId head);
700  void eraseMandatoryArc(const std::string& tail, const std::string& head);
702 
707  void setPossibleEdges(const EdgeSet& set);
708  void setPossibleSkeleton(const UndiGraph& skeleton);
710 
715  void addPossibleEdge(const Edge& edge);
716  void addPossibleEdge(const NodeId tail, const NodeId head);
717  void addPossibleEdge(const std::string& tail, const std::string& head);
719 
722  void erasePossibleEdge(const Edge& edge);
723  void erasePossibleEdge(const NodeId tail, const NodeId head);
724  void erasePossibleEdge(const std::string& tail, const std::string& head);
726 
728 
729  protected:
732 
734  Score<>* __score{nullptr};
735 
738 
740  double __EMepsilon{0.0};
741 
744 
747 
749  Apriori<>* __apriori{nullptr};
750 
752 
754  double __apriori_weight{1.0f};
755 
758 
761 
764 
767 
770 
773 
776 
779 
782 
786 
789 
792 
795 
798 
800  std::vector< std::pair< std::size_t, std::size_t > > __ranges;
801 
804 
806  std::string __apriori_dbname;
807 
810 
811  // the current algorithm as an approximationScheme
813 
815  static DatabaseTable<>
816  __readFile(const std::string& filename,
817  const std::vector< std::string >& missing_symbols);
818 
820  static void __checkFileName(const std::string& filename);
821 
823  void __createApriori();
824 
826  void __createScore();
827 
831  bool take_into_account_score = true);
832 
834  DAG __learnDAG();
835 
838 
840  const std::string& __getAprioriType() const;
841 
844 
845 
846  public:
847  // ##########################################################################
850  // ##########################################################################
851  // in order to not pollute the proper code of genericBNLearner, we
852  // directly
853  // implement those
854  // very simples methods here.
857  const ApproximationScheme* approximationScheme) {
858  __current_algorithm = approximationScheme;
859  }
860 
861  INLINE void
862  distributeProgress(const ApproximationScheme* approximationScheme,
863  Size pourcent,
864  double error,
865  double time) {
866  setCurrentApproximationScheme(approximationScheme);
867 
868  if (onProgress.hasListener()) GUM_EMIT3(onProgress, pourcent, error, time);
869  };
870 
872  INLINE void distributeStop(const ApproximationScheme* approximationScheme,
873  std::string message) {
874  setCurrentApproximationScheme(approximationScheme);
875 
876  if (onStop.hasListener()) GUM_EMIT1(onStop, message);
877  };
879 
884  void setEpsilon(double eps) {
885  __K2.approximationScheme().setEpsilon(eps);
886  __greedy_hill_climbing.setEpsilon(eps);
887  __local_search_with_tabu_list.setEpsilon(eps);
888  __Dag2BN.setEpsilon(eps);
889  };
890 
892  double epsilon() const {
893  if (__current_algorithm != nullptr)
894  return __current_algorithm->epsilon();
895  else
896  GUM_ERROR(FatalError, "No chosen algorithm for learning");
897  };
898 
900  void disableEpsilon() {
902  __greedy_hill_climbing.disableEpsilon();
903  __local_search_with_tabu_list.disableEpsilon();
904  __Dag2BN.disableEpsilon();
905  };
906 
908  void enableEpsilon() {
910  __greedy_hill_climbing.enableEpsilon();
911  __local_search_with_tabu_list.enableEpsilon();
912  __Dag2BN.enableEpsilon();
913  };
914 
917  bool isEnabledEpsilon() const {
918  if (__current_algorithm != nullptr)
920  else
921  GUM_ERROR(FatalError, "No chosen algorithm for learning");
922  };
924 
930  void setMinEpsilonRate(double rate) {
932  __greedy_hill_climbing.setMinEpsilonRate(rate);
933  __local_search_with_tabu_list.setMinEpsilonRate(rate);
934  __Dag2BN.setMinEpsilonRate(rate);
935  };
936 
938  double minEpsilonRate() const {
939  if (__current_algorithm != nullptr)
941  else
942  GUM_ERROR(FatalError, "No chosen algorithm for learning");
943  };
944 
948  __greedy_hill_climbing.disableMinEpsilonRate();
949  __local_search_with_tabu_list.disableMinEpsilonRate();
950  __Dag2BN.disableMinEpsilonRate();
951  };
955  __greedy_hill_climbing.enableMinEpsilonRate();
956  __local_search_with_tabu_list.enableMinEpsilonRate();
957  __Dag2BN.enableMinEpsilonRate();
958  };
961  bool isEnabledMinEpsilonRate() const {
962  if (__current_algorithm != nullptr)
964  else
965  GUM_ERROR(FatalError, "No chosen algorithm for learning");
966  };
968 
974  void setMaxIter(Size max) {
975  __K2.approximationScheme().setMaxIter(max);
976  __greedy_hill_climbing.setMaxIter(max);
977  __local_search_with_tabu_list.setMaxIter(max);
978  __Dag2BN.setMaxIter(max);
979  };
980 
982  Size maxIter() const {
983  if (__current_algorithm != nullptr)
984  return __current_algorithm->maxIter();
985  else
986  GUM_ERROR(FatalError, "No chosen algorithm for learning");
987  };
988 
990  void disableMaxIter() {
992  __greedy_hill_climbing.disableMaxIter();
993  __local_search_with_tabu_list.disableMaxIter();
994  __Dag2BN.disableMaxIter();
995  };
997  void enableMaxIter() {
999  __greedy_hill_climbing.enableMaxIter();
1000  __local_search_with_tabu_list.enableMaxIter();
1001  __Dag2BN.enableMaxIter();
1002  };
1005  bool isEnabledMaxIter() const {
1006  if (__current_algorithm != nullptr)
1008  else
1009  GUM_ERROR(FatalError, "No chosen algorithm for learning");
1010  };
1012 
1017 
1019  void setMaxTime(double timeout) {
1020  __K2.approximationScheme().setMaxTime(timeout);
1021  __greedy_hill_climbing.setMaxTime(timeout);
1022  __local_search_with_tabu_list.setMaxTime(timeout);
1023  __Dag2BN.setMaxTime(timeout);
1024  }
1025 
1027  double maxTime() const {
1028  if (__current_algorithm != nullptr)
1029  return __current_algorithm->maxTime();
1030  else
1031  GUM_ERROR(FatalError, "No chosen algorithm for learning");
1032  };
1033 
1035  double currentTime() const {
1036  if (__current_algorithm != nullptr)
1037  return __current_algorithm->currentTime();
1038  else
1039  GUM_ERROR(FatalError, "No chosen algorithm for learning");
1040  };
1041 
1045  __greedy_hill_climbing.disableMaxTime();
1046  __local_search_with_tabu_list.disableMaxTime();
1047  __Dag2BN.disableMaxTime();
1048  };
1049  void enableMaxTime() {
1051  __greedy_hill_climbing.enableMaxTime();
1052  __local_search_with_tabu_list.enableMaxTime();
1053  __Dag2BN.enableMaxTime();
1054  };
1057  bool isEnabledMaxTime() const {
1058  if (__current_algorithm != nullptr)
1060  else
1061  GUM_ERROR(FatalError, "No chosen algorithm for learning");
1062  };
1064 
1070  __greedy_hill_climbing.setPeriodSize(p);
1071  __local_search_with_tabu_list.setPeriodSize(p);
1072  __Dag2BN.setPeriodSize(p);
1073  };
1074 
1075  Size periodSize() const {
1076  if (__current_algorithm != nullptr)
1077  return __current_algorithm->periodSize();
1078  else
1079  GUM_ERROR(FatalError, "No chosen algorithm for learning");
1080  };
1082 
1085  void setVerbosity(bool v) {
1087  __greedy_hill_climbing.setVerbosity(v);
1088  __local_search_with_tabu_list.setVerbosity(v);
1089  __Dag2BN.setVerbosity(v);
1090  };
1091 
1092  bool verbosity() const {
1093  if (__current_algorithm != nullptr)
1094  return __current_algorithm->verbosity();
1095  else
1096  GUM_ERROR(FatalError, "No chosen algorithm for learning");
1097  };
1099 
1102 
1104  if (__current_algorithm != nullptr)
1106  else
1107  GUM_ERROR(FatalError, "No chosen algorithm for learning");
1108  };
1109 
1112  if (__current_algorithm != nullptr)
1114  else
1115  GUM_ERROR(FatalError, "No chosen algorithm for learning");
1116  };
1117 
1119  const std::vector< double >& history() const {
1120  if (__current_algorithm != nullptr)
1121  return __current_algorithm->history();
1122  else
1123  GUM_ERROR(FatalError, "No chosen algorithm for learning");
1124  };
1126  };
1127 
1128  } /* namespace learning */
1129 
1130 } /* namespace gum */
1131 
1133 #ifndef GUM_NO_INLINE
1135 #endif /* GUM_NO_INLINE */
1136 
1138 
1139 #endif /* GUM_LEARNING_GENERIC_BN_LEARNER_H */
AlgoType __selected_algo
the selected learning algorithm
the class for structural constraints limiting the number of parents of nodes in a directed graph ...
INLINE void setCurrentApproximationScheme(const ApproximationScheme *approximationScheme)
{@ /// distribute signals
Copyright 2005-2019 Pierre-Henri WUILLEMIN et Christophe GONZALES (LIP6) {prenom.nom}_at_lip6.fr.
void setMaxIndegree(Size max_indegree)
sets the max indegree
void useScoreBIC()
indicate that we wish to use a BIC score
Class representing a Bayesian Network.
Definition: BayesNet.h:78
double minEpsilonRate() const
Returns the value of the minimal epsilon rate.
void enableMaxTime()
stopping criterion on timeout If the criterion was disabled it will be enabled
KModeTypes
the description type for the complexity correction
ApproximationScheme & approximationScheme()
returns the approximation policy of the learning algorithm
Score * __score
the score used
Copyright 2005-2019 Pierre-Henri WUILLEMIN et Christophe GONZALES (LIP6) {prenom.nom}_at_lip6.fr.
bool hasMissingValues() const
returns true if the learner&#39;s database has missing values
Database __score_database
the database to be used by the scores and parameter estimators
Copyright 2005-2019 Pierre-Henri WUILLEMIN et Christophe GONZALES (LIP6) {prenom.nom}_at_lip6.fr.
ApproximationSchemeSTATE stateApproximationScheme() const
history
void setMaxTime(double timeout)
stopping criterion on timeout If the criterion was disabled it will be enabled
CorrectedMutualInformation ::KModeTypes __3off2_kmode
the penalty used in 3off2
the structural constraint for forbidding the creation of some arcs during structure learning ...
Signaler3< Size, double, double > onProgress
Progression, error and time.
void setVerbosity(bool v)
verbosity
void disableMinEpsilonRate()
Disable stopping criterion on epsilon rate.
AprioriType
an enumeration to select the apriori
Copyright 2005-2019 Pierre-Henri WUILLEMIN et Christophe GONZALES (LIP6) {prenom.nom}_at_lip6.fr.
const std::string & __getAprioriType() const
returns the type (as a string) of a given apriori
double __EMepsilon
epsilon for EM. if espilon=0.0 : no EM
Copyright 2005-2019 Pierre-Henri WUILLEMIN et Christophe GONZALES (LIP6) {prenom.nom}_at_lip6.fr.
double maxTime() const
Returns the timeout (in seconds).
std::pair< std::size_t, std::size_t > useCrossValidationFold(const std::size_t learning_fold, const std::size_t k_fold)
sets the ranges of rows to be used for cross-validation learning
The class computing n times the corrected mutual information, as used in the 3off2 algorithm...
Copyright 2005-2019 Pierre-Henri WUILLEMIN et Christophe GONZALES (LIP6) {prenom.nom}_at_lip6.fr.
static void __checkFileName(const std::string &filename)
checks whether the extension of a CSV filename is correct
Copyright 2005-2019 Pierre-Henri WUILLEMIN et Christophe GONZALES (LIP6) {prenom.nom}_at_lip6.fr.
The base class for all the scores used for learning (BIC, BDeu, etc)
Definition: score.h:52
void setDatabaseWeight(const double new_weight)
assign a weight to all the rows of the learning database so that the sum of their weights is equal to...
void clearDatabaseRanges()
reset the ranges to the one range corresponding to the whole database
void setRecordWeight(const std::size_t i, const double weight)
sets the weight of the ith record of the database
#define GUM_EMIT1(signal, arg1)
Definition: signaler1.h:42
Approximation Scheme.
Copyright 2005-2019 Pierre-Henri WUILLEMIN et Christophe GONZALES (LIP6) {prenom.nom}_at_lip6.fr.
Copyright 2005-2019 Pierre-Henri WUILLEMIN et Christophe GONZALES (LIP6) {prenom.nom}_at_lip6.fr.
void setMinEpsilonRate(double rate)
Given that we approximate f(t), stopping criterion on d/dt(|f(t+1)-f(t)|) If the criterion was disabl...
Copyright 2005-2019 Pierre-Henri WUILLEMIN et Christophe GONZALES (LIP6) {prenom.nom}_at_lip6.fr.
ScoreType
an enumeration enabling to select easily the score we wish to use
Copyright 2005-2019 Pierre-Henri WUILLEMIN et Christophe GONZALES (LIP6) {prenom.nom}_at_lip6.fr.
void enableMinEpsilonRate()
Enable stopping criterion on epsilon rate.
void __createScore()
create the score used for learning
the structural constraint for forbidding the creation of some arcs except those defined in the class ...
INLINE void distributeStop(const ApproximationScheme *approximationScheme, std::string message)
distribute signals
StructuralConstraintSliceOrder __constraint_SliceOrder
the constraint for 2TBNs
the structural constraint indicating that some arcs shall never be removed or reversed ...
Miic __miic_3off2
the 3off2 algorithm
ParamEstimatorType
an enumeration to select the type of parameter estimation we shall apply
void setInitialDAG(const DAG &)
sets an initial DAG structure
INLINE void distributeProgress(const ApproximationScheme *approximationScheme, Size pourcent, double error, double time)
{@ /// distribute signals
const std::string & nameFromId(NodeId id) const
returns the variable name corresponding to a given node id
ParamEstimatorType __param_estimator_type
the type of the parameter estimator
Copyright 2005-2019 Pierre-Henri WUILLEMIN et Christophe GONZALES (LIP6) {prenom.nom}_at_lip6.fr.
Copyright 2005-2019 Pierre-Henri WUILLEMIN et Christophe GONZALES (LIP6) {prenom.nom}_at_lip6.fr.
the base class for all a priori
Definition: apriori.h:50
unsigned int getMaxNumberOfThreads()
Returns the maximum number of threads at any time.
void setPossibleSkeleton(const UndiGraph &skeleton)
assign a set of forbidden edges
void setPeriodSize(Size p)
How many samples between two stopping is enable.
DatabaseTable __database
the database itself
Copyright 2005-2019 Pierre-Henri WUILLEMIN et Christophe GONZALES (LIP6) {prenom.nom}_at_lip6.fr.
MixedGraph __prepare_miic_3off2()
prepares the initial graph for 3off2 or miic
Copyright 2005-2019 Pierre-Henri WUILLEMIN et Christophe GONZALES (LIP6) {prenom.nom}_at_lip6.fr.
Copyright 2005-2019 Pierre-Henri WUILLEMIN et Christophe GONZALES (LIP6) {prenom.nom}_at_lip6.fr.
void use3off2()
indicate that we wish to use 3off2
std::pair< double, double > chi2(const NodeId id1, const NodeId id2, const std::vector< NodeId > &knowing={})
Return the <statistic,pvalue> pair for chi2 test in the database.
double epsilon() const
Get the value of epsilon.
Copyright 2005-2019 Pierre-Henri WUILLEMIN et Christophe GONZALES (LIP6) {prenom.nom}_at_lip6.fr.
Copyright 2005-2019 Pierre-Henri WUILLEMIN et Christophe GONZALES (LIP6) {prenom.nom}_at_lip6.fr.
Copyright 2005-2019 Pierre-Henri WUILLEMIN et Christophe GONZALES (LIP6) {prenom.nom}_at_lip6.fr.
void useScoreK2()
indicate that we wish to use a K2 score
Copyright 2005-2019 Pierre-Henri WUILLEMIN et Christophe GONZALES (LIP6) {prenom.nom}_at_lip6.fr.
Definition: agrum.h:25
void setMinEpsilonRate(double rate)
Given that we approximate f(t), stopping criterion on d/dt(|f(t+1)-f(t)|).
Copyright 2005-2019 Pierre-Henri WUILLEMIN et Christophe GONZALES (LIP6) {prenom.nom}_at_lip6.fr.
void enableMinEpsilonRate()
Enable stopping criterion on epsilon rate.
double logLikelihood(const std::vector< NodeId > &vars, const std::vector< NodeId > &knowing={})
Return the loglikelihood of vars in the base, conditioned by knowing for the BNLearner.
std::pair< double, double > G2(const NodeId id1, const NodeId id2, const std::vector< NodeId > &knowing={})
Return the <statistic,pvalue> pair for for G2 test in the database.
const std::vector< std::string > & names() const
returns the names of the variables in the database
double minEpsilonRate() const
Get the value of the minimal epsilon rate.
AprioriType __apriori_type
the a priori selected for the score and parameters
void setVerbosity(bool v)
Set the verbosity on (true) or off (false).
NodeId idFromName(const std::string &var_name) const
returns the node id corresponding to a variable name
Copyright 2005-2019 Pierre-Henri WUILLEMIN et Christophe GONZALES (LIP6) {prenom.nom}_at_lip6.fr.
The class for generic Hash Tables.
Definition: hashTable.h:679
Size periodSize() const
Returns the period size.
void useScoreLog2Likelihood()
indicate that we wish to use a Log2Likelihood score
CorrectedMutualInformation * __mutual_info
the selected correction for 3off2 and miic
Copyright 2005-2019 Pierre-Henri WUILLEMIN et Christophe GONZALES (LIP6) {prenom.nom}_at_lip6.fr.
void setMaxTime(double timeout)
Stopping criterion on timeout.
Copyright 2005-2019 Pierre-Henri WUILLEMIN et Christophe GONZALES (LIP6) {prenom.nom}_at_lip6.fr.
AlgoType
an enumeration to select easily the learning algorithm to use
const std::vector< Arc > latentVariables() const
get the list of arcs hiding latent variables
void disableEpsilon()
Disable stopping criterion on epsilon.
DAG __initial_dag
an initial DAG given to learners
void useNML()
indicate that we wish to use the NML correction for 3off2
Size nbrIterations() const
Returns the number of iterations.
void enableMaxIter()
Enable stopping criterion on max iterations.
Copyright 2005-2019 Pierre-Henri WUILLEMIN et Christophe GONZALES (LIP6) {prenom.nom}_at_lip6.fr.
StructuralConstraintMandatoryArcs __constraint_MandatoryArcs
the constraint on forbidden arcs
Copyright 2005-2019 Pierre-Henri WUILLEMIN et Christophe GONZALES (LIP6) {prenom.nom}_at_lip6.fr.
genericBNLearner(const std::string &filename, const std::vector< std::string > &missing_symbols)
default constructor
void disableMaxTime()
Disable stopping criterion on timeout.
Copyright 2005-2019 Pierre-Henri WUILLEMIN et Christophe GONZALES (LIP6) {prenom.nom}_at_lip6.fr.
void setMandatoryArcs(const ArcSet &set)
assign a set of forbidden arcs
void disableEpsilon()
Disable stopping criterion on epsilon.
Database * __apriori_database
the database used by the Dirichlet a priori
LocalSearchWithTabuList __local_search_with_tabu_list
the local search with tabu list algorithm
void erasePossibleEdge(const Edge &edge)
const ApproximationScheme * __current_algorithm
Copyright 2005-2019 Pierre-Henri WUILLEMIN et Christophe GONZALES (LIP6) {prenom.nom}_at_lip6.fr.
Copyright 2005-2019 Pierre-Henri WUILLEMIN et Christophe GONZALES (LIP6) {prenom.nom}_at_lip6.fr.
void setPeriodSize(Size p)
how many samples between 2 stopping isEnableds
void enableEpsilon()
Enable stopping criterion on epsilon.
bool isEnabledMinEpsilonRate() const
Returns true if stopping criterion on epsilon rate is enabled, false otherwise.
ParamEstimator * __createParamEstimator(DBRowGeneratorParser<> &parser, bool take_into_account_score=true)
create the parameter estimator used for learning
Signaler1< std::string > onStop
Criteria messageApproximationScheme.
double databaseWeight() const
returns the weight of the whole database
void __createCorrectedMutualInformation()
create the Corrected Mutual Information instance for Miic/3off2
The base class for all directed edgesThis class is used as a basis for manipulating all directed edge...
double currentTime() const
get the current running time in second (double)
Apriori * __apriori
the apriori used
double recordWeight(const std::size_t i) const
returns the weight of the ith record
void useNoCorr()
indicate that we wish to use the NoCorr correction for 3off2
void disableMaxTime()
Disable stopping criterion on timeout.
StructuralConstraintTabuList __constraint_TabuList
the constraint for tabu lists
std::string __apriori_dbname
the filename for the Dirichlet a priori, if any
void enableMaxIter()
Enable stopping criterion on max iterations.
void addPossibleEdge(const Edge &edge)
StructuralConstraintPossibleEdges __constraint_PossibleEdges
the constraint on possible Edges
DAG __learnDAG()
returns the DAG learnt
Size maxIter() const
Returns the criterion on number of iterations.
GreedyHillClimbing __greedy_hill_climbing
the greedy hill climbing algorithm
void useLocalSearchWithTabuList(Size tabu_size=100, Size nb_decrease=2)
indicate that we wish to use a local search with tabu list
genericBNLearner & operator=(const genericBNLearner &)
copy operator
void __setAprioriWeight(double weight)
sets the apriori weight
std::string checkScoreAprioriCompatibility()
checks whether the current score and apriori are compatible
DAG2BNLearner __Dag2BN
the parametric EM
void useGreedyHillClimbing()
indicate that we wish to use a greedy hill climbing algorithm
DAG learnDAG()
learn a structure from a file (must have read the db before)
Copyright 2005-2019 Pierre-Henri WUILLEMIN et Christophe GONZALES (LIP6) {prenom.nom}_at_lip6.fr.
std::vector< std::pair< std::size_t, std::size_t > > __ranges
the set of rows&#39; ranges within the database in which learning is done
void useAprioriSmoothing(double weight=1)
use the apriori smoothing
The greedy hill climbing learning algorithm (for directed graphs)
const std::vector< double > & history() const
Returns the scheme history.
The class representing a tabular database as used by learning tasks.
double maxTime() const
returns the timeout (in seconds)
void useScoreAIC()
indicate that we wish to use an AIC score
MixedGraph learnMixedStructure()
learn a partial structure from a file (must have read the db before and must have selected miic or 3o...
void useK2(const Sequence< NodeId > &order)
indicate that we wish to use K2
Copyright 2005-2019 Pierre-Henri WUILLEMIN et Christophe GONZALES (LIP6) {prenom.nom}_at_lip6.fr.
StructuralConstraintIndegree __constraint_Indegree
the constraint for indegrees
ScoreType __score_type
the score selected for learning
const std::vector< std::pair< std::size_t, std::size_t > > & databaseRanges() const
returns the current database rows&#39; ranges used for learning
ApproximationSchemeSTATE stateApproximationScheme() const
Returns the approximation scheme state.
void setMaxIter(Size max)
Stopping criterion on number of iterations.
virtual ~genericBNLearner()
destructor
void disableMinEpsilonRate()
Disable stopping criterion on epsilon rate.
The local search with tabu list learning algorithm (for directed graphs)
void setPossibleEdges(const EdgeSet &set)
assign a set of forbidden edges
bool isEnabledMaxIter() const
Returns true if stopping criterion on max iterations is enabled, false otherwise. ...
The base class for all undirected edges.
const std::vector< double > & history() const
Copyright 2005-2019 Pierre-Henri WUILLEMIN et Christophe GONZALES (LIP6) {prenom.nom}_at_lip6.fr.
Copyright 2005-2019 Pierre-Henri WUILLEMIN et Christophe GONZALES (LIP6) {prenom.nom}_at_lip6.fr.
A pack of learning algorithms that can easily be used.
Copyright 2005-2019 Pierre-Henri WUILLEMIN et Christophe GONZALES (LIP6) {prenom.nom}_at_lip6.fr.
void disableMaxIter()
Disable stopping criterion on max iterations.
static DatabaseTable __readFile(const std::string &filename, const std::vector< std::string > &missing_symbols)
reads a file and returns a databaseVectInRam
void useEM(const double epsilon)
use The EM algorithm to learn paramters
bool verbosity() const
Returns true if verbosity is enabled.
void useAprioriDirichlet(const std::string &filename, double weight=1)
use the Dirichlet apriori
void useDatabaseRanges(const std::vector< std::pair< std::size_t, std::size_t >, XALLOC< std::pair< std::size_t, std::size_t > > > &new_ranges)
use a new set of database rows&#39; ranges to perform learning
double __apriori_weight
the weight of the apriori
double epsilon() const
Returns the value of epsilon.
const std::vector< std::size_t > & domainSizes() const
returns the domain sizes of the variables in the database
Size periodSize() const
how many samples between 2 stopping isEnableds
bool isEnabledEpsilon() const
Returns true if stopping criterion on epsilon is enabled, false otherwise.
void __createApriori()
create the apriori used for learning
Base class for undirected graphs.
Definition: undiGraph.h:109
bool isEnabledMaxTime() const
Returns true if stopping criterion on timeout is enabled, false otherwise.
The class imposing a N-sized tabu list as a structural constraints for learning algorithms.
void setEpsilon(double eps)
Given that we approximate f(t), stopping criterion on |f(t+1)-f(t)|.
double currentTime() const
Returns the current running time in second.
The K2 algorithm.
Definition: K2.h:47
The miic learning algorithm.
Definition: Miic.h:106
void disableMaxIter()
Disable stopping criterion on max iterations.
Copyright 2005-2019 Pierre-Henri WUILLEMIN et Christophe GONZALES (LIP6) {prenom.nom}_at_lip6.fr.
Copyright 2005-2019 Pierre-Henri WUILLEMIN et Christophe GONZALES (LIP6) {prenom.nom}_at_lip6.fr.
std::vector< std::size_t > __domain_sizes
the domain sizes of the variables (useful to speed-up computations)
void setForbiddenArcs(const ArcSet &set)
assign a set of forbidden arcs
A class that, given a structure and a parameter estimator returns a full Bayes net.
Definition: DAG2BNLearner.h:52
The class for parsing DatabaseTable rows and generating output rows.
Bijection< NodeId, std::size_t > __nodeId2cols
a bijection assigning to each variable name its NodeId
bool verbosity() const
verbosity
void useAprioriBDeu(double weight=1)
use the BDeu apriori
std::size_t Size
In aGrUM, hashed values are unsigned long int.
Definition: types.h:48
const DatabaseTable & database() const
returns the database used by the BNLearner
StructuralConstraintForbiddenArcs __constraint_ForbiddenArcs
the constraint on forbidden arcs
void setSliceOrder(const NodeProperty< NodeId > &slice_order)
sets a partial order on the nodes
ApproximationSchemeSTATE
The different state of an approximation scheme.
Copyright 2005-2019 Pierre-Henri WUILLEMIN et Christophe GONZALES (LIP6) {prenom.nom}_at_lip6.fr.
a helper to easily read databases
The base class for estimating parameters of CPTs.
#define GUM_EMIT3(signal, arg1, arg2, arg3)
Definition: signaler3.h:42
void useMIIC()
indicate that we wish to use MIIC
the class used to read a row in the database and to transform it into a set of DBRow instances that c...
void useScoreBD()
indicate that we wish to use a BD score
void useMDL()
indicate that we wish to use the MDL correction for 3off2
void useScoreBDeu()
indicate that we wish to use a BDeu score
Copyright 2005-2019 Pierre-Henri WUILLEMIN et Christophe GONZALES (LIP6) {prenom.nom}_at_lip6.fr.
Base class for dag.
Definition: DAG.h:102
void setEpsilon(double eps)
Given that we approximate f(t), stopping criterion on |f(t+1)-f(t)| If the criterion was disabled it ...
Copyright 2005-2019 Pierre-Henri WUILLEMIN et Christophe GONZALES (LIP6) {prenom.nom}_at_lip6.fr.
Copyright 2005-2019 Pierre-Henri WUILLEMIN et Christophe GONZALES (LIP6) {prenom.nom}_at_lip6.fr.
Size NodeId
Type for node ids.
Definition: graphElements.h:98
Copyright 2005-2019 Pierre-Henri WUILLEMIN et Christophe GONZALES (LIP6) {prenom.nom}_at_lip6.fr.
void setMaxIter(Size max)
stopping criterion on number of iterationsIf the criterion was disabled it will be enabled ...
the no a priori class: corresponds to 0 weight-sample
Copyright 2005-2019 Pierre-Henri WUILLEMIN et Christophe GONZALES (LIP6) {prenom.nom}_at_lip6.fr.
void enableMaxTime()
Enable stopping criterion on timeout.
Copyright 2005-2019 Pierre-Henri WUILLEMIN et Christophe GONZALES (LIP6) {prenom.nom}_at_lip6.fr.
#define GUM_ERROR(type, msg)
Definition: exceptions.h:55
Copyright 2005-2019 Pierre-Henri WUILLEMIN et Christophe GONZALES (LIP6) {prenom.nom}_at_lip6.fr.
Base class for mixed graphs.
Definition: mixedGraph.h:127
the structural constraint imposing a partial order over nodes
void enableEpsilon()
Enable stopping criterion on epsilon.