aGrUM  0.18.1
a C++ library for (probabilistic) graphical models
genericBNLearner.h
Go to the documentation of this file.
1 
33 #ifndef GUM_LEARNING_GENERIC_BN_LEARNER_H
34 #define GUM_LEARNING_GENERIC_BN_LEARNER_H
35 
36 #include <sstream>
37 #include <memory>
38 
39 #include <agrum/BN/BayesNet.h>
40 #include <agrum/agrum.h>
43 #include <agrum/tools/graphs/DAG.h>
44 
53 
60 
65 
75 
80 
83 
86 
87 #include <agrum/BN/learning/K2.h>
88 #include <agrum/BN/learning/Miic.h>
91 
93 
94 namespace gum {
95 
96  namespace learning {
97 
98  class BNLearnerListener;
99 
108  // private:
109  public:
111  enum class ScoreType { AIC, BD, BDeu, BIC, K2, LOG2LIKELIHOOD };
112 
115  enum class ParamEstimatorType { ML };
116 
118  enum class AprioriType {
119  NO_APRIORI,
120  SMOOTHING,
121  DIRICHLET_FROM_DATABASE,
122  BDEU
123  };
124 
126  enum class AlgoType {
127  K2,
128  GREEDY_HILL_CLIMBING,
129  LOCAL_SEARCH_WITH_TABU_LIST,
130  MIIC_THREE_OFF_TWO
131  };
132 
133 
135  class Database {
136  public:
137  // ########################################################################
139  // ########################################################################
141 
143 
146  explicit Database(const std::string& file,
147  const std::vector< std::string >& missing_symbols);
148 
150 
152  explicit Database(const DatabaseTable<>& db);
153 
155 
165  Database(const std::string& filename,
166  Database& score_database,
167  const std::vector< std::string >& missing_symbols);
168 
170 
176  template < typename GUM_SCALAR >
177  Database(const std::string& filename,
178  const gum::BayesNet< GUM_SCALAR >& bn,
179  const std::vector< std::string >& missing_symbols);
180 
182  Database(const Database& from);
183 
185  Database(Database&& from);
186 
188  ~Database();
189 
191 
192  // ########################################################################
194  // ########################################################################
196 
198  Database& operator=(const Database& from);
199 
201  Database& operator=(Database&& from);
202 
204 
205  // ########################################################################
207  // ########################################################################
209 
211  DBRowGeneratorParser<>& parser();
212 
214  const std::vector< std::size_t >& domainSizes() const;
215 
217  const std::vector< std::string >& names() const;
218 
220  NodeId idFromName(const std::string& var_name) const;
221 
223  const std::string& nameFromId(NodeId id) const;
224 
226  const DatabaseTable<>& databaseTable() const;
227 
230  void setDatabaseWeight(const double new_weight);
231 
233  const Bijection< NodeId, std::size_t >& nodeId2Columns() const;
234 
236  const std::vector< std::string >& missingSymbols() const;
237 
239  std::size_t nbRows() const;
240 
242  std::size_t size() const;
243 
245 
248  void setWeight(const std::size_t i, const double weight);
249 
251 
253  double weight(const std::size_t i) const;
254 
256  double weight() const;
257 
258 
260 
261  protected:
264 
266  DBRowGeneratorParser<>* parser__{nullptr};
267 
269  std::vector< std::size_t > domain_sizes__;
270 
273 
275 #if defined(_OPENMP) && !defined(GUM_DEBUG_MODE)
276  Size max_threads_number__{getMaxNumberOfThreads()};
277 #else
278  Size max_threads_number__{1};
279 #endif /* GUM_DEBUG_MODE */
280 
282  Size min_nb_rows_per_thread__{100};
283 
284  private:
285  // returns the set of variables as a BN. This is convenient for
286  // the constructors of apriori Databases
287  template < typename GUM_SCALAR >
288  BayesNet< GUM_SCALAR > BNVars__() const;
289  };
290 
292  void setAprioriWeight__(double weight);
293 
294  public:
295  // ##########################################################################
297  // ##########################################################################
299 
301 
305  genericBNLearner(const std::string& filename,
306  const std::vector< std::string >& missing_symbols);
308 
328  template < typename GUM_SCALAR >
329  genericBNLearner(const std::string& filename,
330  const gum::BayesNet< GUM_SCALAR >& src,
331  const std::vector< std::string >& missing_symbols);
332 
335 
338 
340  virtual ~genericBNLearner();
341 
343 
344  // ##########################################################################
346  // ##########################################################################
348 
351 
354 
356 
357  // ##########################################################################
359  // ##########################################################################
361 
363  DAG learnDAG();
364 
368 
370  void setInitialDAG(const DAG&);
371 
373  const std::vector< std::string >& names() const;
374 
376  const std::vector< std::size_t >& domainSizes() const;
377  Size domainSize(NodeId var) const;
378  Size domainSize(const std::string& var) const;
379 
381 
385  NodeId idFromName(const std::string& var_name) const;
386 
388  const DatabaseTable<>& database() const;
389 
392  void setDatabaseWeight(const double new_weight);
393 
395 
398  void setRecordWeight(const std::size_t i, const double weight);
399 
401 
403  double recordWeight(const std::size_t i) const;
404 
406  double databaseWeight() const;
407 
409  const std::string& nameFromId(NodeId id) const;
410 
412 
418  template < template < typename > class XALLOC >
419  void useDatabaseRanges(
420  const std::vector< std::pair< std::size_t, std::size_t >,
421  XALLOC< std::pair< std::size_t, std::size_t > > >&
422  new_ranges);
423 
425  void clearDatabaseRanges();
426 
428 
431  const std::vector< std::pair< std::size_t, std::size_t > >&
432  databaseRanges() const;
433 
435 
455  std::pair< std::size_t, std::size_t >
456  useCrossValidationFold(const std::size_t learning_fold,
457  const std::size_t k_fold);
458 
459 
467  std::pair< double, double > chi2(const NodeId id1,
468  const NodeId id2,
469  const std::vector< NodeId >& knowing = {});
477  std::pair< double, double >
478  chi2(const std::string& name1,
479  const std::string& name2,
480  const std::vector< std::string >& knowing = {});
481 
489  std::pair< double, double > G2(const NodeId id1,
490  const NodeId id2,
491  const std::vector< NodeId >& knowing = {});
499  std::pair< double, double >
500  G2(const std::string& name1,
501  const std::string& name2,
502  const std::vector< std::string >& knowing = {});
503 
511  double logLikelihood(const std::vector< NodeId >& vars,
512  const std::vector< NodeId >& knowing = {});
513 
521  double logLikelihood(const std::vector< std::string >& vars,
522  const std::vector< std::string >& knowing = {});
523 
529  std::vector< double > rawPseudoCount(const std::vector< NodeId >& vars);
530 
536  std::vector< double > rawPseudoCount(const std::vector< std::string >& vars);
541  Size nbCols() const;
542 
547  Size nbRows() const;
548 
553  void useEM(const double epsilon);
554 
556  bool hasMissingValues() const;
557 
559 
560  // ##########################################################################
562  // ##########################################################################
564 
566  void useScoreAIC();
567 
569  void useScoreBD();
570 
572  void useScoreBDeu();
573 
575  void useScoreBIC();
576 
578  void useScoreK2();
579 
581  void useScoreLog2Likelihood();
582 
584 
585  // ##########################################################################
587  // ##########################################################################
589 
591  void useNoApriori();
592 
594 
597  void useAprioriBDeu(double weight = 1);
598 
600 
603  void useAprioriSmoothing(double weight = 1);
604 
606  void useAprioriDirichlet(const std::string& filename, double weight = 1);
607 
608 
610 
612  std::string checkScoreAprioriCompatibility();
614 
615  // ##########################################################################
617  // ##########################################################################
619 
621  void useGreedyHillClimbing();
622 
624 
627  void useLocalSearchWithTabuList(Size tabu_size = 100, Size nb_decrease = 2);
628 
630  void useK2(const Sequence< NodeId >& order);
631 
633  void useK2(const std::vector< NodeId >& order);
634 
636  void use3off2();
637 
639  void useMIIC();
640 
642 
643  // ##########################################################################
645  // ##########################################################################
649  void useNML();
652  void useMDL();
655  void useNoCorr();
656 
659  const std::vector< Arc > latentVariables() const;
660 
662  // ##########################################################################
664  // ##########################################################################
666 
668  void setMaxIndegree(Size max_indegree);
669 
675  void setSliceOrder(const NodeProperty< NodeId >& slice_order);
676 
681  void setSliceOrder(const std::vector< std::vector< std::string > >& slices);
682 
684  void setForbiddenArcs(const ArcSet& set);
685 
688  void addForbiddenArc(const Arc& arc);
689  void addForbiddenArc(const NodeId tail, const NodeId head);
690  void addForbiddenArc(const std::string& tail, const std::string& head);
692 
695  void eraseForbiddenArc(const Arc& arc);
696  void eraseForbiddenArc(const NodeId tail, const NodeId head);
697  void eraseForbiddenArc(const std::string& tail, const std::string& head);
699 
701  void setMandatoryArcs(const ArcSet& set);
702 
705  void addMandatoryArc(const Arc& arc);
706  void addMandatoryArc(const NodeId tail, const NodeId head);
707  void addMandatoryArc(const std::string& tail, const std::string& head);
709 
712  void eraseMandatoryArc(const Arc& arc);
713  void eraseMandatoryArc(const NodeId tail, const NodeId head);
714  void eraseMandatoryArc(const std::string& tail, const std::string& head);
716 
721  void setPossibleEdges(const EdgeSet& set);
722  void setPossibleSkeleton(const UndiGraph& skeleton);
724 
729  void addPossibleEdge(const Edge& edge);
730  void addPossibleEdge(const NodeId tail, const NodeId head);
731  void addPossibleEdge(const std::string& tail, const std::string& head);
733 
736  void erasePossibleEdge(const Edge& edge);
737  void erasePossibleEdge(const NodeId tail, const NodeId head);
738  void erasePossibleEdge(const std::string& tail, const std::string& head);
740 
742 
743  protected:
746 
748  Score<>* score__{nullptr};
749 
752 
754  double EMepsilon__{0.0};
755 
758 
761 
763  Apriori<>* apriori__{nullptr};
764 
766 
768  double apriori_weight__{1.0f};
769 
772 
775 
778 
781 
784 
787 
790 
793 
796 
800 
803 
806 
809 
812 
814  std::vector< std::pair< std::size_t, std::size_t > > ranges__;
815 
818 
820  std::string apriori_dbname__;
821 
824 
825  // the current algorithm as an approximationScheme
827 
829  static DatabaseTable<>
830  readFile__(const std::string& filename,
831  const std::vector< std::string >& missing_symbols);
832 
834  static void checkFileName__(const std::string& filename);
835 
837  void createApriori__();
838 
840  void createScore__();
841 
845  bool take_into_account_score = true);
846 
848  DAG learnDAG__();
849 
852 
854  const std::string& getAprioriType__() const;
855 
858 
859 
860  public:
861  // ##########################################################################
864  // ##########################################################################
865  // in order to not pollute the proper code of genericBNLearner, we
866  // directly
867  // implement those
868  // very simples methods here.
871  const ApproximationScheme* approximationScheme) {
872  current_algorithm__ = approximationScheme;
873  }
874 
875  INLINE void
876  distributeProgress(const ApproximationScheme* approximationScheme,
877  Size pourcent,
878  double error,
879  double time) {
880  setCurrentApproximationScheme(approximationScheme);
881 
882  if (onProgress.hasListener()) GUM_EMIT3(onProgress, pourcent, error, time);
883  };
884 
886  INLINE void distributeStop(const ApproximationScheme* approximationScheme,
887  std::string message) {
888  setCurrentApproximationScheme(approximationScheme);
889 
890  if (onStop.hasListener()) GUM_EMIT1(onStop, message);
891  };
893 
898  void setEpsilon(double eps) {
899  K2__.approximationScheme().setEpsilon(eps);
900  greedy_hill_climbing__.setEpsilon(eps);
901  local_search_with_tabu_list__.setEpsilon(eps);
902  Dag2BN__.setEpsilon(eps);
903  };
904 
906  double epsilon() const {
907  if (current_algorithm__ != nullptr)
908  return current_algorithm__->epsilon();
909  else
910  GUM_ERROR(FatalError, "No chosen algorithm for learning");
911  };
912 
914  void disableEpsilon() {
916  greedy_hill_climbing__.disableEpsilon();
917  local_search_with_tabu_list__.disableEpsilon();
918  Dag2BN__.disableEpsilon();
919  };
920 
922  void enableEpsilon() {
924  greedy_hill_climbing__.enableEpsilon();
925  local_search_with_tabu_list__.enableEpsilon();
926  Dag2BN__.enableEpsilon();
927  };
928 
931  bool isEnabledEpsilon() const {
932  if (current_algorithm__ != nullptr)
934  else
935  GUM_ERROR(FatalError, "No chosen algorithm for learning");
936  };
938 
944  void setMinEpsilonRate(double rate) {
946  greedy_hill_climbing__.setMinEpsilonRate(rate);
947  local_search_with_tabu_list__.setMinEpsilonRate(rate);
948  Dag2BN__.setMinEpsilonRate(rate);
949  };
950 
952  double minEpsilonRate() const {
953  if (current_algorithm__ != nullptr)
955  else
956  GUM_ERROR(FatalError, "No chosen algorithm for learning");
957  };
958 
962  greedy_hill_climbing__.disableMinEpsilonRate();
963  local_search_with_tabu_list__.disableMinEpsilonRate();
964  Dag2BN__.disableMinEpsilonRate();
965  };
969  greedy_hill_climbing__.enableMinEpsilonRate();
970  local_search_with_tabu_list__.enableMinEpsilonRate();
971  Dag2BN__.enableMinEpsilonRate();
972  };
975  bool isEnabledMinEpsilonRate() const {
976  if (current_algorithm__ != nullptr)
978  else
979  GUM_ERROR(FatalError, "No chosen algorithm for learning");
980  };
982 
988  void setMaxIter(Size max) {
989  K2__.approximationScheme().setMaxIter(max);
990  greedy_hill_climbing__.setMaxIter(max);
991  local_search_with_tabu_list__.setMaxIter(max);
992  Dag2BN__.setMaxIter(max);
993  };
994 
996  Size maxIter() const {
997  if (current_algorithm__ != nullptr)
998  return current_algorithm__->maxIter();
999  else
1000  GUM_ERROR(FatalError, "No chosen algorithm for learning");
1001  };
1002 
1006  greedy_hill_climbing__.disableMaxIter();
1007  local_search_with_tabu_list__.disableMaxIter();
1008  Dag2BN__.disableMaxIter();
1009  };
1011  void enableMaxIter() {
1013  greedy_hill_climbing__.enableMaxIter();
1014  local_search_with_tabu_list__.enableMaxIter();
1015  Dag2BN__.enableMaxIter();
1016  };
1019  bool isEnabledMaxIter() const {
1020  if (current_algorithm__ != nullptr)
1022  else
1023  GUM_ERROR(FatalError, "No chosen algorithm for learning");
1024  };
1026 
1031 
1033  void setMaxTime(double timeout) {
1034  K2__.approximationScheme().setMaxTime(timeout);
1035  greedy_hill_climbing__.setMaxTime(timeout);
1036  local_search_with_tabu_list__.setMaxTime(timeout);
1037  Dag2BN__.setMaxTime(timeout);
1038  }
1039 
1041  double maxTime() const {
1042  if (current_algorithm__ != nullptr)
1043  return current_algorithm__->maxTime();
1044  else
1045  GUM_ERROR(FatalError, "No chosen algorithm for learning");
1046  };
1047 
1049  double currentTime() const {
1050  if (current_algorithm__ != nullptr)
1051  return current_algorithm__->currentTime();
1052  else
1053  GUM_ERROR(FatalError, "No chosen algorithm for learning");
1054  };
1055 
1059  greedy_hill_climbing__.disableMaxTime();
1060  local_search_with_tabu_list__.disableMaxTime();
1061  Dag2BN__.disableMaxTime();
1062  };
1063  void enableMaxTime() {
1065  greedy_hill_climbing__.enableMaxTime();
1066  local_search_with_tabu_list__.enableMaxTime();
1067  Dag2BN__.enableMaxTime();
1068  };
1071  bool isEnabledMaxTime() const {
1072  if (current_algorithm__ != nullptr)
1074  else
1075  GUM_ERROR(FatalError, "No chosen algorithm for learning");
1076  };
1078 
1084  greedy_hill_climbing__.setPeriodSize(p);
1085  local_search_with_tabu_list__.setPeriodSize(p);
1086  Dag2BN__.setPeriodSize(p);
1087  };
1088 
1089  Size periodSize() const {
1090  if (current_algorithm__ != nullptr)
1091  return current_algorithm__->periodSize();
1092  else
1093  GUM_ERROR(FatalError, "No chosen algorithm for learning");
1094  };
1096 
1099  void setVerbosity(bool v) {
1101  greedy_hill_climbing__.setVerbosity(v);
1102  local_search_with_tabu_list__.setVerbosity(v);
1103  Dag2BN__.setVerbosity(v);
1104  };
1105 
1106  bool verbosity() const {
1107  if (current_algorithm__ != nullptr)
1108  return current_algorithm__->verbosity();
1109  else
1110  GUM_ERROR(FatalError, "No chosen algorithm for learning");
1111  };
1113 
1116 
1118  if (current_algorithm__ != nullptr)
1120  else
1121  GUM_ERROR(FatalError, "No chosen algorithm for learning");
1122  };
1123 
1126  if (current_algorithm__ != nullptr)
1128  else
1129  GUM_ERROR(FatalError, "No chosen algorithm for learning");
1130  };
1131 
1133  const std::vector< double >& history() const {
1134  if (current_algorithm__ != nullptr)
1135  return current_algorithm__->history();
1136  else
1137  GUM_ERROR(FatalError, "No chosen algorithm for learning");
1138  };
1140  };
1141 
1142  } /* namespace learning */
1143 
1144 } /* namespace gum */
1145 
1147 #ifndef GUM_NO_INLINE
1149 #endif /* GUM_NO_INLINE */
1150 
1152 
1153 #endif /* GUM_LEARNING_GENERIC_BN_LEARNER_H */
the class for structural constraints limiting the number of parents of nodes in a directed graph ...
INLINE void setCurrentApproximationScheme(const ApproximationScheme *approximationScheme)
{@ /// distribute signals
Copyright 2005-2020 Pierre-Henri WUILLEMIN() & Christophe GONZALES() info_at_agrum_dot_org.
StructuralConstraintPossibleEdges constraint_PossibleEdges__
the constraint on possible Edges
void setMaxIndegree(Size max_indegree)
sets the max indegree
void useScoreBIC()
indicate that we wish to use a BIC score
Class representing a Bayesian Network.
Definition: BayesNet.h:78
double minEpsilonRate() const
Returns the value of the minimal epsilon rate.
void enableMaxTime()
stopping criterion on timeout If the criterion was disabled it will be enabled
KModeTypes
the description type for the complexity correction
ApproximationScheme & approximationScheme()
returns the approximation policy of the learning algorithm
Copyright 2005-2020 Pierre-Henri WUILLEMIN() & Christophe GONZALES() info_at_agrum_dot_org.
double EMepsilon__
epsilon for EM. if espilon=0.0 : no EM
bool hasMissingValues() const
returns true if the learner&#39;s database has missing values
StructuralConstraintIndegree constraint_Indegree__
the constraint for indegrees
Copyright 2005-2020 Pierre-Henri WUILLEMIN() & Christophe GONZALES() info_at_agrum_dot_org.
ApproximationSchemeSTATE stateApproximationScheme() const
history
void setMaxTime(double timeout)
stopping criterion on timeout If the criterion was disabled it will be enabled
the structural constraint for forbidding the creation of some arcs during structure learning ...
Signaler3< Size, double, double > onProgress
Progression, error and time.
void setVerbosity(bool v)
verbosity
void disableMinEpsilonRate()
Disable stopping criterion on epsilon rate.
AprioriType
an enumeration to select the apriori
Copyright 2005-2020 Pierre-Henri WUILLEMIN() & Christophe GONZALES() info_at_agrum_dot_org.
Database score_database__
the database to be used by the scores and parameter estimators
Copyright 2005-2020 Pierre-Henri WUILLEMIN() & Christophe GONZALES() info_at_agrum_dot_org.
double maxTime() const
Returns the timeout (in seconds).
std::pair< std::size_t, std::size_t > useCrossValidationFold(const std::size_t learning_fold, const std::size_t k_fold)
sets the ranges of rows to be used for cross-validation learning
The class computing n times the corrected mutual information, as used in the 3off2 algorithm...
Copyright 2005-2020 Pierre-Henri WUILLEMIN() & Christophe GONZALES() info_at_agrum_dot_org.
Copyright 2005-2020 Pierre-Henri WUILLEMIN() & Christophe GONZALES() info_at_agrum_dot_org.
The base class for all the scores used for learning (BIC, BDeu, etc)
Definition: score.h:52
void setDatabaseWeight(const double new_weight)
assign a weight to all the rows of the learning database so that the sum of their weights is equal to...
void clearDatabaseRanges()
reset the ranges to the one range corresponding to the whole database
void setRecordWeight(const std::size_t i, const double weight)
sets the weight of the ith record of the database
#define GUM_EMIT1(signal, arg1)
Definition: signaler1.h:42
Approximation Scheme.
Copyright 2005-2020 Pierre-Henri WUILLEMIN() & Christophe GONZALES() info_at_agrum_dot_org.
Copyright 2005-2020 Pierre-Henri WUILLEMIN() & Christophe GONZALES() info_at_agrum_dot_org.
void setMinEpsilonRate(double rate)
Given that we approximate f(t), stopping criterion on d/dt(|f(t+1)-f(t)|) If the criterion was disabl...
Copyright 2005-2020 Pierre-Henri WUILLEMIN() & Christophe GONZALES() info_at_agrum_dot_org.
ScoreType
an enumeration enabling to select easily the score we wish to use
Copyright 2005-2020 Pierre-Henri WUILLEMIN() & Christophe GONZALES() info_at_agrum_dot_org.
void enableMinEpsilonRate()
Enable stopping criterion on epsilon rate.
the structural constraint for forbidding the creation of some arcs except those defined in the class ...
INLINE void distributeStop(const ApproximationScheme *approximationScheme, std::string message)
distribute signals
the structural constraint indicating that some arcs shall never be removed or reversed ...
ParamEstimatorType
an enumeration to select the type of parameter estimation we shall apply
void setInitialDAG(const DAG &)
sets an initial DAG structure
INLINE void distributeProgress(const ApproximationScheme *approximationScheme, Size pourcent, double error, double time)
{@ /// distribute signals
const std::string & nameFromId(NodeId id) const
returns the variable name corresponding to a given node id
Copyright 2005-2020 Pierre-Henri WUILLEMIN() & Christophe GONZALES() info_at_agrum_dot_org.
Copyright 2005-2020 Pierre-Henri WUILLEMIN() & Christophe GONZALES() info_at_agrum_dot_org.
StructuralConstraintSliceOrder constraint_SliceOrder__
the constraint for 2TBNs
void createCorrectedMutualInformation__()
create the Corrected Mutual Information instance for Miic/3off2
the base class for all a priori
Definition: apriori.h:50
unsigned int getMaxNumberOfThreads()
Returns the maximum number of threads at any time.
void setPossibleSkeleton(const UndiGraph &skeleton)
assign a set of forbidden edges
void setPeriodSize(Size p)
How many samples between two stopping is enable.
AlgoType selected_algo__
the selected learning algorithm
void setAprioriWeight__(double weight)
sets the apriori weight
double apriori_weight__
the weight of the apriori
Copyright 2005-2020 Pierre-Henri WUILLEMIN() & Christophe GONZALES() info_at_agrum_dot_org.
Copyright 2005-2020 Pierre-Henri WUILLEMIN() & Christophe GONZALES() info_at_agrum_dot_org.
Copyright 2005-2020 Pierre-Henri WUILLEMIN() & Christophe GONZALES() info_at_agrum_dot_org.
void use3off2()
indicate that we wish to use 3off2
std::pair< double, double > chi2(const NodeId id1, const NodeId id2, const std::vector< NodeId > &knowing={})
Return the <statistic,pvalue> pair for chi2 test in the database.
double epsilon() const
Get the value of epsilon.
Copyright 2005-2020 Pierre-Henri WUILLEMIN() & Christophe GONZALES() info_at_agrum_dot_org.
Copyright 2005-2020 Pierre-Henri WUILLEMIN() & Christophe GONZALES() info_at_agrum_dot_org.
Copyright 2005-2020 Pierre-Henri WUILLEMIN() & Christophe GONZALES() info_at_agrum_dot_org.
void useScoreK2()
indicate that we wish to use a K2 score
static void checkFileName__(const std::string &filename)
checks whether the extension of a CSV filename is correct
std::vector< std::pair< std::size_t, std::size_t > > ranges__
the set of rows&#39; ranges within the database in which learning is done
Copyright 2005-2020 Pierre-Henri WUILLEMIN() & Christophe GONZALES() info_at_agrum_dot_org.
Definition: agrum.h:25
void setMinEpsilonRate(double rate)
Given that we approximate f(t), stopping criterion on d/dt(|f(t+1)-f(t)|).
const ApproximationScheme * current_algorithm__
Copyright 2005-2020 Pierre-Henri WUILLEMIN() & Christophe GONZALES() info_at_agrum_dot_org.
Score * score__
the score used
void enableMinEpsilonRate()
Enable stopping criterion on epsilon rate.
double logLikelihood(const std::vector< NodeId > &vars, const std::vector< NodeId > &knowing={})
Return the loglikelihood of vars in the base, conditioned by knowing for the BNLearner.
std::pair< double, double > G2(const NodeId id1, const NodeId id2, const std::vector< NodeId > &knowing={})
Return the <statistic,pvalue> pair for for G2 test in the database.
const std::vector< std::string > & names() const
returns the names of the variables in the database
double minEpsilonRate() const
Get the value of the minimal epsilon rate.
Bijection< NodeId, std::size_t > nodeId2cols__
a bijection assigning to each variable name its NodeId
void setVerbosity(bool v)
Set the verbosity on (true) or off (false).
NodeId idFromName(const std::string &var_name) const
returns the node id corresponding to a variable name
Copyright 2005-2020 Pierre-Henri WUILLEMIN() & Christophe GONZALES() info_at_agrum_dot_org.
The class for generic Hash Tables.
Definition: hashTable.h:679
Size periodSize() const
Returns the period size.
void useScoreLog2Likelihood()
indicate that we wish to use a Log2Likelihood score
Copyright 2005-2020 Pierre-Henri WUILLEMIN() & Christophe GONZALES() info_at_agrum_dot_org.
void setMaxTime(double timeout)
Stopping criterion on timeout.
std::vector< double > rawPseudoCount(const std::vector< NodeId > &vars)
Return the pseudoconts ofNodeIds vars in the base in a raw array.
Copyright 2005-2020 Pierre-Henri WUILLEMIN() & Christophe GONZALES() info_at_agrum_dot_org.
AlgoType
an enumeration to select easily the learning algorithm to use
const std::vector< Arc > latentVariables() const
get the list of arcs hiding latent variables
void disableEpsilon()
Disable stopping criterion on epsilon.
ScoreType score_type__
the score selected for learning
void useNML()
indicate that we wish to use the NML correction for 3off2
Size nbrIterations() const
Returns the number of iterations.
void enableMaxIter()
Enable stopping criterion on max iterations.
Copyright 2005-2020 Pierre-Henri WUILLEMIN() & Christophe GONZALES() info_at_agrum_dot_org.
Copyright 2005-2020 Pierre-Henri WUILLEMIN() & Christophe GONZALES() info_at_agrum_dot_org.
genericBNLearner(const std::string &filename, const std::vector< std::string > &missing_symbols)
default constructor
void disableMaxTime()
Disable stopping criterion on timeout.
Copyright 2005-2020 Pierre-Henri WUILLEMIN() & Christophe GONZALES() info_at_agrum_dot_org.
void setMandatoryArcs(const ArcSet &set)
assign a set of forbidden arcs
Size domainSize(NodeId var) const
learn a structure from a file (must have read the db before)
void disableEpsilon()
Disable stopping criterion on epsilon.
void erasePossibleEdge(const Edge &edge)
Copyright 2005-2020 Pierre-Henri WUILLEMIN() & Christophe GONZALES() info_at_agrum_dot_org.
Copyright 2005-2020 Pierre-Henri WUILLEMIN() & Christophe GONZALES() info_at_agrum_dot_org.
void createApriori__()
create the apriori used for learning
void setPeriodSize(Size p)
how many samples between 2 stopping isEnableds
void enableEpsilon()
Enable stopping criterion on epsilon.
bool isEnabledMinEpsilonRate() const
Returns true if stopping criterion on epsilon rate is enabled, false otherwise.
StructuralConstraintMandatoryArcs constraint_MandatoryArcs__
the constraint on forbidden arcs
Signaler1< std::string > onStop
Criteria messageApproximationScheme.
double databaseWeight() const
returns the weight of the whole database
The base class for all directed edgesThis class is used as a basis for manipulating all directed edge...
double currentTime() const
get the current running time in second (double)
double recordWeight(const std::size_t i) const
returns the weight of the ith record
void useNoCorr()
indicate that we wish to use the NoCorr correction for 3off2
void disableMaxTime()
Disable stopping criterion on timeout.
ParamEstimator * createParamEstimator__(DBRowGeneratorParser<> &parser, bool take_into_account_score=true)
create the parameter estimator used for learning
DAG2BNLearner Dag2BN__
the parametric EM
void enableMaxIter()
Enable stopping criterion on max iterations.
void addPossibleEdge(const Edge &edge)
Size maxIter() const
Returns the criterion on number of iterations.
void useLocalSearchWithTabuList(Size tabu_size=100, Size nb_decrease=2)
indicate that we wish to use a local search with tabu list
genericBNLearner & operator=(const genericBNLearner &)
copy operator
std::string checkScoreAprioriCompatibility()
checks whether the current score and apriori are compatible
const std::string & getAprioriType__() const
returns the type (as a string) of a given apriori
Database * apriori_database__
the database used by the Dirichlet a priori
Apriori * apriori__
the apriori used
Miic miic_3off2__
the 3off2 algorithm
void useGreedyHillClimbing()
indicate that we wish to use a greedy hill climbing algorithm
DAG learnDAG()
learn a structure from a file (must have read the db before)
Copyright 2005-2020 Pierre-Henri WUILLEMIN() & Christophe GONZALES() info_at_agrum_dot_org.
DatabaseTable database__
the database itself
void useAprioriSmoothing(double weight=1)
use the apriori smoothing
The greedy hill climbing learning algorithm (for directed graphs)
const std::vector< double > & history() const
Returns the scheme history.
DAG initial_dag__
an initial DAG given to learners
std::string apriori_dbname__
the filename for the Dirichlet a priori, if any
DAG learnDAG__()
returns the DAG learnt
The class representing a tabular database as used by learning tasks.
double maxTime() const
returns the timeout (in seconds)
void useScoreAIC()
indicate that we wish to use an AIC score
std::vector< std::size_t > domain_sizes__
the domain sizes of the variables (useful to speed-up computations)
CorrectedMutualInformation ::KModeTypes kmode_3off2__
the penalty used in 3off2
MixedGraph learnMixedStructure()
learn a partial structure from a file (must have read the db before and must have selected miic or 3o...
void useK2(const Sequence< NodeId > &order)
indicate that we wish to use K2
Copyright 2005-2020 Pierre-Henri WUILLEMIN() & Christophe GONZALES() info_at_agrum_dot_org.
const std::vector< std::pair< std::size_t, std::size_t > > & databaseRanges() const
returns the current database rows&#39; ranges used for learning
ApproximationSchemeSTATE stateApproximationScheme() const
Returns the approximation scheme state.
void setMaxIter(Size max)
Stopping criterion on number of iterations.
virtual ~genericBNLearner()
destructor
void disableMinEpsilonRate()
Disable stopping criterion on epsilon rate.
The local search with tabu list learning algorithm (for directed graphs)
void setPossibleEdges(const EdgeSet &set)
assign a set of forbidden edges
ParamEstimatorType param_estimator_type__
the type of the parameter estimator
bool isEnabledMaxIter() const
Returns true if stopping criterion on max iterations is enabled, false otherwise. ...
The base class for all undirected edges.
const std::vector< double > & history() const
Copyright 2005-2020 Pierre-Henri WUILLEMIN() & Christophe GONZALES() info_at_agrum_dot_org.
Copyright 2005-2020 Pierre-Henri WUILLEMIN() & Christophe GONZALES() info_at_agrum_dot_org.
CorrectedMutualInformation * mutual_info__
the selected correction for 3off2 and miic
A pack of learning algorithms that can easily be used.
Copyright 2005-2020 Pierre-Henri WUILLEMIN() & Christophe GONZALES() info_at_agrum_dot_org.
void disableMaxIter()
Disable stopping criterion on max iterations.
void useEM(const double epsilon)
use The EM algorithm to learn paramters
bool verbosity() const
Returns true if verbosity is enabled.
StructuralConstraintForbiddenArcs constraint_ForbiddenArcs__
the constraint on forbidden arcs
void useAprioriDirichlet(const std::string &filename, double weight=1)
use the Dirichlet apriori
void useDatabaseRanges(const std::vector< std::pair< std::size_t, std::size_t >, XALLOC< std::pair< std::size_t, std::size_t > > > &new_ranges)
use a new set of database rows&#39; ranges to perform learning
double epsilon() const
Returns the value of epsilon.
const std::vector< std::size_t > & domainSizes() const
returns the domain sizes of the variables in the database
Size periodSize() const
how many samples between 2 stopping isEnableds
bool isEnabledEpsilon() const
Returns true if stopping criterion on epsilon is enabled, false otherwise.
Base class for undirected graphs.
Definition: undiGraph.h:109
bool isEnabledMaxTime() const
Returns true if stopping criterion on timeout is enabled, false otherwise.
The class imposing a N-sized tabu list as a structural constraints for learning algorithms.
void setEpsilon(double eps)
Given that we approximate f(t), stopping criterion on |f(t+1)-f(t)|.
double currentTime() const
Returns the current running time in second.
The K2 algorithm.
Definition: K2.h:47
The miic learning algorithm.
Definition: Miic.h:106
LocalSearchWithTabuList local_search_with_tabu_list__
the local search with tabu list algorithm
void disableMaxIter()
Disable stopping criterion on max iterations.
Copyright 2005-2020 Pierre-Henri WUILLEMIN() & Christophe GONZALES() info_at_agrum_dot_org.
Copyright 2005-2020 Pierre-Henri WUILLEMIN() & Christophe GONZALES() info_at_agrum_dot_org.
void createScore__()
create the score used for learning
void setForbiddenArcs(const ArcSet &set)
assign a set of forbidden arcs
A class that, given a structure and a parameter estimator returns a full Bayes net.
Definition: DAG2BNLearner.h:52
static DatabaseTable readFile__(const std::string &filename, const std::vector< std::string > &missing_symbols)
reads a file and returns a databaseVectInRam
GreedyHillClimbing greedy_hill_climbing__
the greedy hill climbing algorithm
The class for parsing DatabaseTable rows and generating output rows.
bool verbosity() const
verbosity
void useAprioriBDeu(double weight=1)
use the BDeu apriori
std::size_t Size
In aGrUM, hashed values are unsigned long int.
Definition: types.h:48
const DatabaseTable & database() const
returns the database used by the BNLearner
AprioriType apriori_type__
the a priori selected for the score and parameters
StructuralConstraintTabuList constraint_TabuList__
the constraint for tabu lists
void setSliceOrder(const NodeProperty< NodeId > &slice_order)
sets a partial order on the nodes
ApproximationSchemeSTATE
The different state of an approximation scheme.
Copyright 2005-2020 Pierre-Henri WUILLEMIN() & Christophe GONZALES() info_at_agrum_dot_org.
a helper to easily read databases
The base class for estimating parameters of CPTs.
#define GUM_EMIT3(signal, arg1, arg2, arg3)
Definition: signaler3.h:42
void useMIIC()
indicate that we wish to use MIIC
the class used to read a row in the database and to transform it into a set of DBRow instances that c...
void useScoreBD()
indicate that we wish to use a BD score
MixedGraph prepare_miic_3off2__()
prepares the initial graph for 3off2 or miic
void useMDL()
indicate that we wish to use the MDL correction for 3off2
void useScoreBDeu()
indicate that we wish to use a BDeu score
Copyright 2005-2020 Pierre-Henri WUILLEMIN() & Christophe GONZALES() info_at_agrum_dot_org.
Base class for dag.
Definition: DAG.h:102
void setEpsilon(double eps)
Given that we approximate f(t), stopping criterion on |f(t+1)-f(t)| If the criterion was disabled it ...
Copyright 2005-2020 Pierre-Henri WUILLEMIN() & Christophe GONZALES() info_at_agrum_dot_org.
Copyright 2005-2020 Pierre-Henri WUILLEMIN() & Christophe GONZALES() info_at_agrum_dot_org.
Size NodeId
Type for node ids.
Definition: graphElements.h:98
Copyright 2005-2020 Pierre-Henri WUILLEMIN() & Christophe GONZALES() info_at_agrum_dot_org.
void setMaxIter(Size max)
stopping criterion on number of iterationsIf the criterion was disabled it will be enabled ...
the no a priori class: corresponds to 0 weight-sample
Copyright 2005-2020 Pierre-Henri WUILLEMIN() & Christophe GONZALES() info_at_agrum_dot_org.
void enableMaxTime()
Enable stopping criterion on timeout.
Copyright 2005-2020 Pierre-Henri WUILLEMIN() & Christophe GONZALES() info_at_agrum_dot_org.
#define GUM_ERROR(type, msg)
Definition: exceptions.h:55
Copyright 2005-2020 Pierre-Henri WUILLEMIN() & Christophe GONZALES() info_at_agrum_dot_org.
Base class for mixed graphs.
Definition: mixedGraph.h:127
the structural constraint imposing a partial order over nodes
void enableEpsilon()
Enable stopping criterion on epsilon.