aGrUM  0.15.1
genericBNLearner.cpp
Go to the documentation of this file.
1 
32 #include <algorithm>
33 
34 #include <agrum/agrum.h>
40 
41 // include the inlined functions if necessary
42 #ifdef GUM_NO_INLINE
44 #endif /* GUM_NO_INLINE */
45 
46 namespace gum {
47 
48  namespace learning {
49 
50 
52  __database(db) {
53  // get the variables names
54  const auto& var_names = __database.variableNames();
55  const std::size_t nb_vars = var_names.size();
56  for (auto dom : __database.domainSizes())
57  __domain_sizes.push_back(dom);
58  for (std::size_t i = 0; i < nb_vars; ++i) {
60  }
61 
62  // create the parser
63  __parser =
65  }
66 
67 
69  const std::string& filename,
70  const std::vector< std::string >& missing_symbols) :
71  Database(genericBNLearner::__readFile(filename, missing_symbols)) {}
72 
73 
75  const std::string& CSV_filename,
76  Database& score_database,
77  const std::vector< std::string >& missing_symbols) {
78  // assign to each column name in the CSV file its column
80  DBInitializerFromCSV<> initializer(CSV_filename);
81  const auto& apriori_names = initializer.variableNames();
82  std::size_t apriori_nb_vars = apriori_names.size();
83  HashTable< std::string, std::size_t > apriori_names2col(apriori_nb_vars);
84  for (std::size_t i = std::size_t(0); i < apriori_nb_vars; ++i)
85  apriori_names2col.insert(apriori_names[i], i);
86 
87  // check that there are at least as many variables in the a priori
88  // database as those in the score_database
89  if (apriori_nb_vars < score_database.__database.nbVariables()) {
91  "the a apriori database has fewer variables "
92  "than the observed database");
93  }
94 
95  // get the mapping from the columns of score_database to those of
96  // the CSV file
97  const std::vector< std::string >& score_names =
98  score_database.databaseTable().variableNames();
99  const std::size_t score_nb_vars = score_names.size();
100  HashTable< std::size_t, std::size_t > mapping(score_nb_vars);
101  for (std::size_t i = std::size_t(0); i < score_nb_vars; ++i) {
102  try {
103  mapping.insert(i, apriori_names2col[score_names[i]]);
104  } catch (Exception&) {
106  "Variable "
107  << score_names[i]
108  << " of the observed database does not belong to the "
109  << "apriori database");
110  }
111  }
112 
113  // create the translators for CSV database
114  for (std::size_t i = std::size_t(0); i < score_nb_vars; ++i) {
115  const Variable& var = score_database.databaseTable().variable(i);
116  __database.insertTranslator(var, mapping[i], missing_symbols);
117  }
118 
119  // fill the database
120  initializer.fillDatabase(__database);
121 
122  // get the domain sizes of the variables
123  for (auto dom : __database.domainSizes())
124  __domain_sizes.push_back(dom);
125 
126  // compute the mapping from node ids to column indices
127  __nodeId2cols = score_database.nodeId2Columns();
128 
129  // create the parser
130  __parser =
132  }
133 
134 
138  // create the parser
139  __parser =
141  }
142 
143 
145  __database(std::move(from.__database)),
146  __domain_sizes(std::move(from.__domain_sizes)),
147  __nodeId2cols(std::move(from.__nodeId2cols)) {
148  // create the parser
149  __parser =
151  }
152 
153 
155 
157  operator=(const Database& from) {
158  if (this != &from) {
159  delete __parser;
160  __database = from.__database;
163 
164  // create the parser
165  __parser =
167  }
168 
169  return *this;
170  }
171 
174  if (this != &from) {
175  delete __parser;
176  __database = std::move(from.__database);
177  __domain_sizes = std::move(from.__domain_sizes);
178  __nodeId2cols = std::move(from.__nodeId2cols);
179 
180  // create the parser
181  __parser =
183  }
184 
185  return *this;
186  }
187 
188 
189  // ===========================================================================
190 
192  const std::string& filename,
193  const std::vector< std::string >& missing_symbols) :
194  __score_database(filename, missing_symbols) {
196 
197  // for debugging purposes
198  GUM_CONSTRUCTOR(genericBNLearner);
199  }
200 
201 
203  __score_database(db) {
205 
206  // for debugging purposes
207  GUM_CONSTRUCTOR(genericBNLearner);
208  }
209 
210 
229 
230  // for debugging purposes
231  GUM_CONS_CPY(genericBNLearner);
232  }
233 
244  __selected_algo(from.__selected_algo), __K2(std::move(from.__K2)),
245  __miic_3off2(std::move(from.__miic_3off2)),
249  std::move(from.__local_search_with_tabu_list)),
251  __ranges(std::move(from.__ranges)),
253  __initial_dag(std::move(from.__initial_dag)) {
255 
256  // for debugging purposes
257  GUM_CONS_MOV(genericBNLearner);
258  }
259 
261  if (__score) delete __score;
262 
263  if (__apriori) delete __apriori;
264 
265  if (__no_apriori) delete __no_apriori;
266 
268 
269  if (__mutual_info) delete __mutual_info;
270 
271  GUM_DESTRUCTOR(genericBNLearner);
272  }
273 
275  if (this != &from) {
276  if (__score) {
277  delete __score;
278  __score = nullptr;
279  }
280 
281  if (__apriori) {
282  delete __apriori;
283  __apriori = nullptr;
284  }
285 
286  if (__apriori_database) {
287  delete __apriori_database;
288  __apriori_database = nullptr;
289  }
290 
291  if (__mutual_info) {
292  delete __mutual_info;
293  __mutual_info = nullptr;
294  }
295 
296  __score_type = from.__score_type;
298  __EMepsilon = from.__EMepsilon;
307  __K2 = from.__K2;
308  __miic_3off2 = from.__miic_3off2;
313  __ranges = from.__ranges;
316  __current_algorithm = nullptr;
317  }
318 
319  return *this;
320  }
321 
323  if (this != &from) {
324  if (__score) {
325  delete __score;
326  __score = nullptr;
327  }
328 
329  if (__apriori) {
330  delete __apriori;
331  __apriori = nullptr;
332  }
333 
334  if (__apriori_database) {
335  delete __apriori_database;
336  __apriori_database = nullptr;
337  }
338 
339  if (__mutual_info) {
340  delete __mutual_info;
341  __mutual_info = nullptr;
342  }
343 
344  __score_type = from.__score_type;
345  __param_estimator_type = from.__param_estimator_type;
346  __EMepsilon = from.__EMepsilon;
347  __apriori_type = from.__apriori_type;
348  __apriori_weight = from.__apriori_weight;
349  __constraint_SliceOrder = std::move(from.__constraint_SliceOrder);
350  __constraint_Indegree = std::move(from.__constraint_Indegree);
351  __constraint_TabuList = std::move(from.__constraint_TabuList);
352  __constraint_ForbiddenArcs = std::move(from.__constraint_ForbiddenArcs);
353  __constraint_MandatoryArcs = std::move(from.__constraint_MandatoryArcs);
354  __selected_algo = from.__selected_algo;
355  __K2 = from.__K2;
356  __miic_3off2 = std::move(from.__miic_3off2);
357  __3off2_kmode = from.__3off2_kmode;
358  __greedy_hill_climbing = std::move(from.__greedy_hill_climbing);
360  std::move(from.__local_search_with_tabu_list);
361  __score_database = std::move(from.__score_database);
362  __ranges = std::move(from.__ranges);
363  __apriori_dbname = std::move(from.__apriori_dbname);
364  __initial_dag = std::move(from.__initial_dag);
365  __current_algorithm = nullptr;
366  }
367 
368  return *this;
369  }
370 
371 
372  DatabaseTable<> readFile(const std::string& filename) {
373  // get the extension of the file
374  Size filename_size = Size(filename.size());
375 
376  if (filename_size < 4) {
378  "genericBNLearner could not determine the "
379  "file type of the database");
380  }
381 
382  std::string extension = filename.substr(filename.size() - 4);
383  std::transform(
384  extension.begin(), extension.end(), extension.begin(), ::tolower);
385 
386  if (extension != ".csv") {
388  "genericBNLearner does not support yet this type "
389  "of database file");
390  }
391 
392  DBInitializerFromCSV<> initializer(filename);
393 
394  const auto& var_names = initializer.variableNames();
395  const std::size_t nb_vars = var_names.size();
396 
397  DBTranslatorSet<> translator_set;
399  for (std::size_t i = 0; i < nb_vars; ++i) {
400  translator_set.insertTranslator(translator, i);
401  }
402 
403  DatabaseTable<> database(translator_set);
404  database.setVariableNames(initializer.variableNames());
405  initializer.fillDatabase(database);
406 
407  return database;
408  }
409 
410 
411  void genericBNLearner::__checkFileName(const std::string& filename) {
412  // get the extension of the file
413  Size filename_size = Size(filename.size());
414 
415  if (filename_size < 4) {
417  "genericBNLearner could not determine the "
418  "file type of the database");
419  }
420 
421  std::string extension = filename.substr(filename.size() - 4);
422  std::transform(
423  extension.begin(), extension.end(), extension.begin(), ::tolower);
424 
425  if (extension != ".csv") {
426  GUM_ERROR(
428  "genericBNLearner does not support yet this type of database file");
429  }
430  }
431 
432 
434  const std::string& filename,
435  const std::vector< std::string >& missing_symbols) {
436  // get the extension of the file
437  __checkFileName(filename);
438 
439  DBInitializerFromCSV<> initializer(filename);
440 
441  const auto& var_names = initializer.variableNames();
442  const std::size_t nb_vars = var_names.size();
443 
444  DBTranslatorSet<> translator_set;
445  DBTranslator4LabelizedVariable<> translator(missing_symbols);
446  for (std::size_t i = 0; i < nb_vars; ++i) {
447  translator_set.insertTranslator(translator, i);
448  }
449 
450  DatabaseTable<> database(missing_symbols, translator_set);
451  database.setVariableNames(initializer.variableNames());
452  initializer.fillDatabase(database);
453 
454  database.reorder();
455 
456  return database;
457  }
458 
459 
461  // first, save the old apriori, to be delete if everything is ok
462  Apriori<>* old_apriori = __apriori;
463 
464  // create the new apriori
465  switch (__apriori_type) {
469  break;
470 
474  break;
475 
477  if (__apriori_database != nullptr) {
478  delete __apriori_database;
479  __apriori_database = nullptr;
480  }
481 
485 
490  break;
491 
492  case AprioriType::BDEU:
493  __apriori = new AprioriBDeu<>(__score_database.databaseTable(),
495  break;
496 
497  default:
499  "The BNLearner does not support yet this apriori");
500  }
501 
502  // do not forget to assign a weight to the apriori
504 
505  // remove the old apriori, if any
506  if (old_apriori != nullptr) delete old_apriori;
507  }
508 
510  // first, save the old score, to be delete if everything is ok
511  Score<>* old_score = __score;
512 
513  // create the new scoring function
514  switch (__score_type) {
515  case ScoreType::AIC:
517  *__apriori,
518  __ranges,
520  break;
521 
522  case ScoreType::BD:
524  *__apriori,
525  __ranges,
527  break;
528 
529  case ScoreType::BDeu:
531  *__apriori,
532  __ranges,
534  break;
535 
536  case ScoreType::BIC:
538  *__apriori,
539  __ranges,
541  break;
542 
543  case ScoreType::K2:
545  *__apriori,
546  __ranges,
548  break;
549 
552  *__apriori,
553  __ranges,
555  break;
556 
557  default:
559  "genericBNLearner does not support yet this score");
560  }
561 
562  // remove the old score, if any
563  if (old_score != nullptr) delete old_score;
564  }
565 
568  bool take_into_account_score) {
569  ParamEstimator<>* param_estimator = nullptr;
570 
571  // create the new estimator
572  switch (__param_estimator_type) {
574  if (take_into_account_score && (__score != nullptr)) {
575  param_estimator =
576  new ParamEstimatorML<>(parser,
577  *__apriori,
579  __ranges,
581  } else {
582  param_estimator =
583  new ParamEstimatorML<>(parser,
584  *__apriori,
585  *__no_apriori,
586  __ranges,
588  }
589 
590  break;
591 
592  default:
594  "genericBNLearner does not support "
595  << "yet this parameter estimator");
596  }
597 
598  // assign the set of ranges
599  param_estimator->setRanges(__ranges);
600 
601  return param_estimator;
602  }
603 
606  // Initialize the mixed graph to the fully connected graph
607  MixedGraph mgraph;
608  for (Size i = 0; i < __score_database.databaseTable().nbVariables(); ++i) {
609  mgraph.addNodeWithId(i);
610  for (Size j = 0; j < i; ++j) {
611  mgraph.addEdge(j, i);
612  }
613  }
614 
615  // translating the constraints for 3off2 or miic
616  HashTable< std::pair< NodeId, NodeId >, char > initial_marks;
617  const ArcSet& mandatory_arcs = __constraint_MandatoryArcs.arcs();
618  for (const auto& arc : mandatory_arcs) {
619  initial_marks.insert({arc.tail(), arc.head()}, '>');
620  }
621 
622  const ArcSet& forbidden_arcs = __constraint_ForbiddenArcs.arcs();
623  for (const auto& arc : forbidden_arcs) {
624  initial_marks.insert({arc.tail(), arc.head()}, '-');
625  }
626  __miic_3off2.addConstraints(initial_marks);
627 
628  // create the mutual entropy object
629  // if (__mutual_info == nullptr) { this->useNML(); }
631 
632  return mgraph;
633  }
634 
637  GUM_ERROR(OperationNotAllowed, "Must be using the miic/3off2 algorithm");
638  }
639  // check that the database does not contain any missing value
642  "For the moment, the BNLearner is unable to learn "
643  << "structures with missing values in databases");
644  }
645  BNLearnerListener listener(this, __miic_3off2);
646 
647  // create the mixedGraph_constraint_MandatoryArcs.arcs();
648  MixedGraph mgraph = this->__prepare_miic_3off2();
649 
651  }
652 
654  // create the score and the apriori
655  __createApriori();
656  __createScore();
657 
658  return __learnDAG();
659  }
660 
662  if (__mutual_info != nullptr) delete __mutual_info;
663 
664  __mutual_info =
666  *__no_apriori,
667  __ranges,
669  switch (__3off2_kmode) {
672  break;
673 
676  break;
677 
680  break;
681 
682  default:
684  "The BNLearner's corrected mutual information class does "
685  << "not support yet penalty mode " << int(__3off2_kmode));
686  }
687  }
688 
690  // check that the database does not contain any missing value
692  || ((__apriori_database != nullptr)
696  "For the moment, the BNLearner is unable to cope "
697  "with missing values in databases");
698  }
699  // add the mandatory arcs to the initial dag and remove the forbidden ones
700  // from the initial graph
701  DAG init_graph = __initial_dag;
702 
703  const ArcSet& mandatory_arcs = __constraint_MandatoryArcs.arcs();
704 
705  for (const auto& arc : mandatory_arcs) {
706  if (!init_graph.exists(arc.tail())) init_graph.addNodeWithId(arc.tail());
707 
708  if (!init_graph.exists(arc.head())) init_graph.addNodeWithId(arc.head());
709 
710  init_graph.addArc(arc.tail(), arc.head());
711  }
712 
713  const ArcSet& forbidden_arcs = __constraint_ForbiddenArcs.arcs();
714 
715  for (const auto& arc : forbidden_arcs) {
716  init_graph.eraseArc(arc);
717  }
718 
719  switch (__selected_algo) {
720  // ========================================================================
722  BNLearnerListener listener(this, __miic_3off2);
723  // create the mixedGraph and the corrected mutual information
724  MixedGraph mgraph = this->__prepare_miic_3off2();
725 
726  return __miic_3off2.learnStructure(*__mutual_info, mgraph);
727  }
728 
729  // ========================================================================
736  gen_constraint;
737  static_cast< StructuralConstraintMandatoryArcs& >(gen_constraint) =
739  static_cast< StructuralConstraintForbiddenArcs& >(gen_constraint) =
741  static_cast< StructuralConstraintPossibleEdges& >(gen_constraint) =
743  static_cast< StructuralConstraintSliceOrder& >(gen_constraint) =
745 
747  gen_constraint);
748 
751  sel_constraint;
752  static_cast< StructuralConstraintIndegree& >(sel_constraint) =
754 
755  GraphChangesSelector4DiGraph< decltype(sel_constraint),
756  decltype(op_set) >
757  selector(*__score, sel_constraint, op_set);
758 
759  return __greedy_hill_climbing.learnStructure(selector, init_graph);
760  }
761 
762  // ========================================================================
769  gen_constraint;
770  static_cast< StructuralConstraintMandatoryArcs& >(gen_constraint) =
772  static_cast< StructuralConstraintForbiddenArcs& >(gen_constraint) =
774  static_cast< StructuralConstraintPossibleEdges& >(gen_constraint) =
776  static_cast< StructuralConstraintSliceOrder& >(gen_constraint) =
778 
780  gen_constraint);
781 
785  sel_constraint;
786  static_cast< StructuralConstraintTabuList& >(sel_constraint) =
788  static_cast< StructuralConstraintIndegree& >(sel_constraint) =
790 
791  GraphChangesSelector4DiGraph< decltype(sel_constraint),
792  decltype(op_set) >
793  selector(*__score, sel_constraint, op_set);
794 
796  init_graph);
797  }
798 
799  // ========================================================================
800  case AlgoType::K2: {
801  BNLearnerListener listener(this, __K2.approximationScheme());
805  gen_constraint;
806  static_cast< StructuralConstraintMandatoryArcs& >(gen_constraint) =
808  static_cast< StructuralConstraintForbiddenArcs& >(gen_constraint) =
810  static_cast< StructuralConstraintPossibleEdges& >(gen_constraint) =
812 
814  gen_constraint);
815 
816  // if some mandatory arcs are incompatible with the order, use a DAG
817  // constraint instead of a DiGraph constraint to avoid cycles
818  const ArcSet& mandatory_arcs =
819  static_cast< StructuralConstraintMandatoryArcs& >(gen_constraint)
820  .arcs();
821  const Sequence< NodeId >& order = __K2.order();
822  bool order_compatible = true;
823 
824  for (const auto& arc : mandatory_arcs) {
825  if (order.pos(arc.tail()) >= order.pos(arc.head())) {
826  order_compatible = false;
827  break;
828  }
829  }
830 
831  if (order_compatible) {
834  sel_constraint;
835  static_cast< StructuralConstraintIndegree& >(sel_constraint) =
837 
838  GraphChangesSelector4DiGraph< decltype(sel_constraint),
839  decltype(op_set) >
840  selector(*__score, sel_constraint, op_set);
841 
842  return __K2.learnStructure(selector, init_graph);
843  } else {
846  sel_constraint;
847  static_cast< StructuralConstraintIndegree& >(sel_constraint) =
849 
850  GraphChangesSelector4DiGraph< decltype(sel_constraint),
851  decltype(op_set) >
852  selector(*__score, sel_constraint, op_set);
853 
854  return __K2.learnStructure(selector, init_graph);
855  }
856  }
857 
858  // ========================================================================
859  default:
861  "the learnDAG method has not been implemented for this "
862  "learning algorithm");
863  }
864  }
865 
867  const std::string& apriori = __getAprioriType();
868 
869  switch (__score_type) {
870  case ScoreType::AIC:
872 
873  case ScoreType::BD:
875 
876  case ScoreType::BDeu:
878 
879  case ScoreType::BIC:
881 
882  case ScoreType::K2:
884 
888 
889  default: return "genericBNLearner does not support yet this score";
890  }
891  }
892 
893 
895  std::pair< std::size_t, std::size_t >
896  genericBNLearner::useCrossValidationFold(const std::size_t learning_fold,
897  const std::size_t k_fold) {
898  if (k_fold == 0) {
899  GUM_ERROR(OutOfBounds, "K-fold cross validation with k=0 is forbidden");
900  }
901 
902  if (learning_fold >= k_fold) {
904  "In " << k_fold << "-fold cross validation, the learning "
905  << "fold should be strictly lower than " << k_fold
906  << " but, here, it is equal to " << learning_fold);
907  }
908 
909  const std::size_t db_size = __score_database.databaseTable().nbRows();
910  if (k_fold >= db_size) {
912  "In " << k_fold << "-fold cross validation, the database's "
913  << "size should be strictly greater than " << k_fold
914  << " but, here, the database has only " << db_size
915  << "rows");
916  }
917 
918  // create the ranges of rows of the test database
919  const std::size_t foldSize = db_size / k_fold;
920  const std::size_t unfold_deb = learning_fold * foldSize;
921  const std::size_t unfold_end = unfold_deb + foldSize;
922 
923  __ranges.clear();
924  if (learning_fold == std::size_t(0)) {
925  __ranges.push_back(
926  std::pair< std::size_t, std::size_t >(unfold_end, db_size));
927  } else {
928  __ranges.push_back(
929  std::pair< std::size_t, std::size_t >(std::size_t(0), unfold_deb));
930 
931  if (learning_fold != k_fold - 1) {
932  __ranges.push_back(
933  std::pair< std::size_t, std::size_t >(unfold_end, db_size));
934  }
935  }
936 
937  return std::pair< std::size_t, std::size_t >(unfold_deb, unfold_end);
938  }
939 
940 
941  std::pair< double, double > genericBNLearner::chi2(
942  const NodeId id1, const NodeId id2, const std::vector< NodeId >& knowing) {
943  __createApriori();
947  parser, *__apriori, databaseRanges());
948 
949  return chi2score.statistics(id1, id2, knowing);
950  }
951 
952  std::pair< double, double >
953  genericBNLearner::chi2(const std::string& name1,
954  const std::string& name2,
955  const std::vector< std::string >& knowing) {
956  std::vector< NodeId > knowingIds;
957  std::transform(
958  knowing.begin(),
959  knowing.end(),
960  std::back_inserter(knowingIds),
961  [this](const std::string& c) -> NodeId { return this->idFromName(c); });
962  return chi2(idFromName(name1), idFromName(name2), knowingIds);
963  }
964 
965  std::pair< double, double > genericBNLearner::G2(
966  const NodeId id1, const NodeId id2, const std::vector< NodeId >& knowing) {
967  __createApriori();
971 
972  return g2score.statistics(id1, id2, knowing);
973  }
974 
975  std::pair< double, double >
976  genericBNLearner::G2(const std::string& name1,
977  const std::string& name2,
978  const std::vector< std::string >& knowing) {
979  std::vector< NodeId > knowingIds;
980  std::transform(
981  knowing.begin(),
982  knowing.end(),
983  std::back_inserter(knowingIds),
984  [this](const std::string& c) -> NodeId { return this->idFromName(c); });
985  return G2(idFromName(name1), idFromName(name2), knowingIds);
986  }
987 
988  double genericBNLearner::logLikelihood(const std::vector< NodeId >& vars,
989  const std::vector< NodeId >& knowing) {
990  __createApriori();
994  parser, *__apriori, databaseRanges());
995 
996  std::vector< NodeId > total(vars);
997  total.insert(total.end(), knowing.begin(), knowing.end());
998  double LLtotal = ll2score.score(IdSet<>(total, false, true));
999  if (knowing.size() == (Size)0) {
1000  return LLtotal;
1001  } else {
1002  double LLknw = ll2score.score(IdSet<>(knowing, false, true));
1003  return LLtotal - LLknw;
1004  }
1005  }
1006 
1007  double
1008  genericBNLearner::logLikelihood(const std::vector< std::string >& vars,
1009  const std::vector< std::string >& knowing) {
1010  std::vector< NodeId > ids;
1011  std::vector< NodeId > knowingIds;
1012 
1013  auto mapper = [this](const std::string& c) -> NodeId {
1014  return this->idFromName(c);
1015  };
1016 
1017  std::transform(vars.begin(), vars.end(), std::back_inserter(ids), mapper);
1018  std::transform(
1019  knowing.begin(), knowing.end(), std::back_inserter(knowingIds), mapper);
1020 
1021  return logLikelihood(ids, knowingIds);
1022  }
1023 
1024 
1025  } /* namespace learning */
1026 
1027 } /* namespace gum */
void useNML()
use the kNML penalty function
AlgoType __selected_algo
the selected learning algorithm
the class for structural constraints limiting the number of parents of nodes in a directed graph ...
void insert(const T1 &first, const T2 &second)
Inserts a new association in the gum::Bijection.
const std::vector< std::string, ALLOC< std::string > > & variableNames()
returns the names of the variables in the input dataset
Copyright 2005-2019 Pierre-Henri WUILLEMIN et Christophe GONZALES (LIP6) {prenom.nom}_at_lip6.fr.
the class for computing BDeu scores
Definition: scoreBDeu.h:59
ApproximationScheme & approximationScheme()
returns the approximation policy of the learning algorithm
Score * __score
the score used
double score(const IdSet< ALLOC > &idset)
returns the score for a given IdSet
Base class for every random variable.
Definition: variable.h:66
virtual void addNodeWithId(const NodeId id)
try to insert a node with the given id
Database __score_database
the database to be used by the scores and parameter estimators
Idx pos(const Key &key) const
Returns the position of the object passed in argument (if it exists).
Definition: sequence_tpl.h:518
virtual void setWeight(const double weight)
sets the weight of the a priori (kind of effective sample size)
the structural constraint for forbidding the creation of some arcs during structure learning ...
CorrectedMutualInformation ::KModeTypes __3off2_kmode
the penalty used in 3off2
const std::string & __getAprioriType() const
returns the type (as a string) of a given apriori
double __EMepsilon
epsilon for EM. if espilon=0.0 : no EM
std::pair< std::size_t, std::size_t > useCrossValidationFold(const std::size_t learning_fold, const std::size_t k_fold)
sets the ranges of rows to be used for cross-validation learning
The class computing n times the corrected mutual information, as used in the 3off2 algorithm...
const ArcSet & arcs() const
returns the set of mandatory arcs
static void __checkFileName(const std::string &filename)
checks whether the extension of a CSV filename is correct
The base class for all the scores used for learning (BIC, BDeu, etc)
Definition: score.h:52
Copyright 2005-2019 Pierre-Henri WUILLEMIN et Christophe GONZALES (LIP6) {prenom.nom}_at_lip6.fr.
const std::vector< std::string > & missingSymbols() const
returns the set of missing symbols taken into account
void setRanges(const std::vector< std::pair< std::size_t, std::size_t >, XALLOC< std::pair< std::size_t, std::size_t > > > &new_ranges)
sets new ranges to perform the countings used by the parameter estimator
DBVector< std::size_t > domainSizes() const
returns the domain sizes of all the variables in the database table
Copyright 2005-2019 Pierre-Henri WUILLEMIN et Christophe GONZALES (LIP6) {prenom.nom}_at_lip6.fr.
MixedGraph learnMixedStructure(CorrectedMutualInformation<> &I, MixedGraph graph)
learns the structure of an Essential Graph
Definition: Miic.cpp:113
void __createScore()
create the score used for learning
the structural constraint for forbidding the creation of some arcs except those defined in the class ...
The class used to pack sets of generators.
the class for computing Bayesian Dirichlet (BD) log2 scores
Definition: scoreBD.h:65
StructuralConstraintSliceOrder __constraint_SliceOrder
the constraint for 2TBNs
Database & operator=(const Database &from)
copy operator
the structural constraint indicating that some arcs shall never be removed or reversed ...
virtual void eraseArc(const Arc &arc)
removes an arc from the ArcGraphPart
Miic __miic_3off2
the 3off2 algorithm
the class for computing Chi2 independence test scores
Definition: indepTestChi2.h:48
virtual void addEdge(const NodeId first, const NodeId second)
insert a new edge into the undirected graph
Definition: undiGraph_inl.h:35
ParamEstimatorType __param_estimator_type
the type of the parameter estimator
void addConstraints(HashTable< std::pair< NodeId, NodeId >, char > constraints)
Set a ensemble of constraints for the orientation phase.
Definition: Miic.cpp:1067
STL namespace.
Copyright 2005-2019 Pierre-Henri WUILLEMIN et Christophe GONZALES (LIP6) {prenom.nom}_at_lip6.fr.
A class for storing a pair of sets of NodeIds, the second one corresponding to a conditional set...
Definition: idSet.h:48
A class that redirects gum_signal from algorithms to the listeners of BNLearn.
the base class for all a priori
Definition: apriori.h:50
virtual std::string isAprioriCompatible() const final
indicates whether the apriori is compatible (meaningful) with the score
DatabaseTable __database
the database itself
the class for computing K2 scores (actually their log2 value)
Definition: scoreK2.h:61
MixedGraph __prepare_miic_3off2()
prepares the initial graph for 3off2 or miic
std::pair< double, double > chi2(const NodeId id1, const NodeId id2, const std::vector< NodeId > &knowing={})
Return the <statistic,pvalue> pair for chi2 test in the database.
bool exists(const NodeId id) const
alias for existsNode
Copyright 2005-2019 Pierre-Henri WUILLEMIN et Christophe GONZALES (LIP6) {prenom.nom}_at_lip6.fr.
Definition: agrum.h:25
double logLikelihood(const std::vector< NodeId > &vars, const std::vector< NodeId > &knowing={})
Return the loglikelihood of vars in the base, conditioned by knowing for the BNLearner.
std::pair< double, double > G2(const NodeId id1, const NodeId id2, const std::vector< NodeId > &knowing={})
Return the <statistic,pvalue> pair for for G2 test in the database.
AprioriType __apriori_type
the a priori selected for the score and parameters
NodeId idFromName(const std::string &var_name) const
returns the node id corresponding to a variable name
the internal apriori for the BDeu score (N&#39; / (r_i * q_i)BDeu is a BD score with a N&#39;/(r_i * q_i) apr...
Definition: aprioriBDeu.h:54
The class for generic Hash Tables.
Definition: hashTable.h:679
the class for computing Log2-likelihood scores
std::pair< double, double > statistics(NodeId var1, NodeId var2, const std::vector< NodeId, ALLOC< NodeId > > &rhs_ids={})
get the pair <chi2 statistic,pvalue> for a test var1 indep var2 given rhs_ids
CorrectedMutualInformation * __mutual_info
the selected correction for 3off2 and miic
A dirichlet priori: computes its N&#39;_ijk from a database.
the class for computing G2 independence test scores
Definition: indepTestG2.h:48
DAG __initial_dag
an initial DAG given to learners
const Sequence< NodeId > & order() const noexcept
returns the current order
The mecanism to compute the next available graph changes for directed structure learning search algor...
StructuralConstraintMandatoryArcs __constraint_MandatoryArcs
the constraint on forbidden arcs
std::size_t nbVariables() const noexcept
returns the number of variables (columns) of the database
genericBNLearner(const std::string &filename, const std::vector< std::string > &missing_symbols)
default constructor
DAG learnStructure(GRAPH_CHANGES_SELECTOR &selector, DAG initial_dag=DAG())
learns the structure of a Bayes net
Database * __apriori_database
the database used by the Dirichlet a priori
LocalSearchWithTabuList __local_search_with_tabu_list
the local search with tabu list algorithm
DatabaseTable readFile(const std::string &filename)
const ApproximationScheme * __current_algorithm
std::pair< double, double > statistics(NodeId var1, NodeId var2, const std::vector< NodeId, ALLOC< NodeId > > &rhs_ids={})
get the pair <G2statistic,pvalue> for a test var1 indep var2 given rhs_ids
bool hasMissingValues() const
indicates whether the database contains some missing values
the "meta-programming" class for storing structural constraintsIn aGrUM, there are two ways to store ...
ParamEstimator * __createParamEstimator(DBRowGeneratorParser<> &parser, bool take_into_account_score=true)
create the parameter estimator used for learning
void __createCorrectedMutualInformation()
create the Corrected Mutual Information instance for Miic/3off2
const ArcSet & arcs() const
returns the set of mandatory arcs
Apriori * __apriori
the apriori used
StructuralConstraintTabuList __constraint_TabuList
the constraint for tabu lists
std::string __apriori_dbname
the filename for the Dirichlet a priori, if any
void fillDatabase(DATABASE< ALLOC > &database, const bool retry_insertion=false)
fills the rows of the database table
StructuralConstraintPossibleEdges __constraint_PossibleEdges
the constraint on possible Edges
std::size_t insertTranslator(const Translator< ALLOC > &translator, const std::size_t column, const bool unique_column=true)
inserts a new translator at the end of the translator set
DAG __learnDAG()
returns the DAG learnt
GreedyHillClimbing __greedy_hill_climbing
the greedy hill climbing algorithm
std::size_t nbRows() const noexcept
returns the number of records (rows) in the database
Base class for all aGrUM&#39;s exceptions.
Definition: exceptions.h:106
genericBNLearner & operator=(const genericBNLearner &)
copy operator
const DatabaseTable & databaseTable() const
returns the internal database table
The basic class for computing the next graph changes possible in a structure learning algorithm...
std::string checkScoreAprioriCompatibility()
checks whether the current score and apriori are compatible
the class for computing AIC scores
Definition: scoreAIC.h:52
the class for computing BIC scores
Definition: scoreBIC.h:52
virtual const Apriori< ALLOC > & internalApriori() const =0
returns the internal apriori of the score
virtual void addArc(const NodeId tail, const NodeId head)
insert a new arc into the directed graph
Definition: DAG_inl.h:43
DAG learnDAG()
learn a structure from a file (must have read the db before)
virtual std::string isAprioriCompatible() const final
indicates whether the apriori is compatible (meaningful) with the score
std::vector< std::pair< std::size_t, std::size_t > > __ranges
the set of rows&#39; ranges within the database in which learning is done
virtual std::string isAprioriCompatible() const final
indicates whether the apriori is compatible (meaningful) with the score
const Variable & variable(const std::size_t k, const bool k_is_input_col=false) const
returns either the kth variable of the database table or the first one corresponding to the kth colum...
Copyright 2005-2019 Pierre-Henri WUILLEMIN et Christophe GONZALES (LIP6) {prenom.nom}_at_lip6.fr.
const Bijection< NodeId, std::size_t > & nodeId2Columns() const
returns the mapping between node ids and their columns in the database
The class representing a tabular database as used by learning tasks.
MixedGraph learnMixedStructure()
learn a partial structure from a file (must have read the db before and must have selected miic or 3o...
StructuralConstraintIndegree __constraint_Indegree
the constraint for indegrees
ScoreType __score_type
the score selected for learning
const std::vector< std::pair< std::size_t, std::size_t > > & databaseRanges() const
returns the current database rows&#39; ranges used for learning
std::size_t insertTranslator(const DBTranslator< ALLOC > &translator, const std::size_t input_column, const bool unique_column=true)
insert a new translator into the database table
virtual ~genericBNLearner()
destructor
DBRowGeneratorParser * __parser
the parser used for reading the database
virtual std::string isAprioriCompatible() const final
indicates whether the apriori is compatible (meaningful) with the score
A pack of learning algorithms that can easily be used.
DAG learnStructure(GRAPH_CHANGES_SELECTOR &selector, DAG initial_dag=DAG())
learns the structure of a Bayes net
Definition: K2_tpl.h:41
static DatabaseTable __readFile(const std::string &filename, const std::vector< std::string > &missing_symbols)
reads a file and returns a databaseVectInRam
DBRowGeneratorParser & parser()
returns the parser for the database
virtual void setVariableNames(const std::vector< std::string, ALLOC< std::string > > &names, const bool from_external_object=true) final
sets the names of the variables
double __apriori_weight
the weight of the apriori
virtual std::string isAprioriCompatible() const final
indicates whether the apriori is compatible (meaningful) with the score
void __createApriori()
create the apriori used for learning
The class imposing a N-sized tabu list as a structural constraints for learning algorithms.
The class for initializing DatabaseTable and RawDatabaseTable instances from CSV files.
the smooth a priori: adds a weight w to all the countings
Copyright 2005-2019 Pierre-Henri WUILLEMIN et Christophe GONZALES (LIP6) {prenom.nom}_at_lip6.fr.
std::vector< std::size_t > __domain_sizes
the domain sizes of the variables (useful to speed-up computations)
DAG learnStructure(GRAPH_CHANGES_SELECTOR &selector, DAG initial_dag=DAG())
learns the structure of a Bayes net
Bijection< NodeId, std::size_t > __nodeId2cols
a bijection assigning to each variable name its NodeId
Database(const std::string &file, const std::vector< std::string > &missing_symbols)
default constructor
the class for packing together the translators used to preprocess the datasets
The databases&#39; cell translators for labelized variables.
std::size_t Size
In aGrUM, hashed values are unsigned long int.
Definition: types.h:48
const DatabaseTable & database() const
returns the database used by the BNLearner
StructuralConstraintForbiddenArcs __constraint_ForbiddenArcs
the constraint on forbidden arcs
void useMDL()
use the MDL penalty function
virtual std::string isAprioriCompatible() const final
indicates whether the apriori is compatible (meaningful) with the score
a helper to easily read databases
value_type & insert(const Key &key, const Val &val)
Adds a new element (actually a copy of this element) into the hash table.
The base class for estimating parameters of CPTs.
The class for estimating parameters of CPTs using Maximum Likelihood.
const DBVector< std::string > & variableNames() const noexcept
returns the variable names for all the columns of the database
the class used to read a row in the database and to transform it into a set of DBRow instances that c...
The basic class for computing the next graph changes possible in a structure learning algorithm...
Base class for dag.
Definition: DAG.h:102
The base class for structural constraints used by learning algorithms that learn a directed graph str...
Size NodeId
Type for node ids.
Definition: graphElements.h:98
void reorder(const std::size_t k, const bool k_is_input_col=false)
performs a reordering of the kth translator or of the first translator parsing the kth column of the ...
the no a priori class: corresponds to 0 weight-sample
iterator handler() const
returns a new unsafe handler pointing to the 1st record of the database
DAG learnStructure(CorrectedMutualInformation<> &I, MixedGraph graph)
learns the structure of an Bayesian network, ie a DAG, by first learning an Essential graph and then ...
Definition: Miic.cpp:987
#define GUM_ERROR(type, msg)
Definition: exceptions.h:55
void useNoCorr()
use no correction/penalty function
The base class for structural constraints imposed by DAGs.
Base class for mixed graphs.
Definition: mixedGraph.h:127
the structural constraint imposing a partial order over nodes