aGrUM  0.16.0
paramEstimatorML_tpl.h
Go to the documentation of this file.
1 
27 #ifndef DOXYGEN_SHOULD_SKIP_THIS
28 
29 // if this define is active, the estimator will not accept to assess numbers for a
30 // distribution with no case at all (N_ij=0). if this define is not active, the
31 // estimator will use an uniform distribution for this case.
32 //# define GUM_PARAMESTIMATOR_ERROR_WHEN_NIJ_IS_NULL
33 
34 namespace gum {
35 
36  namespace learning {
37 
39  template < template < typename > class ALLOC >
41  const DBRowGeneratorParser< ALLOC >& parser,
42  const Apriori< ALLOC >& external_apriori,
43  const Apriori< ALLOC >& score_internal_apriori,
44  const std::vector< std::pair< std::size_t, std::size_t >,
45  ALLOC< std::pair< std::size_t, std::size_t > > >& ranges,
46  const Bijection< NodeId, std::size_t, ALLOC< std::size_t > >&
47  nodeId2columns,
48  const typename ParamEstimatorML< ALLOC >::allocator_type& alloc) :
49  ParamEstimator< ALLOC >(parser,
50  external_apriori,
51  score_internal_apriori,
52  ranges,
53  nodeId2columns,
54  alloc) {
55  GUM_CONSTRUCTOR(ParamEstimatorML);
56  }
57 
58 
60  template < template < typename > class ALLOC >
62  const DBRowGeneratorParser< ALLOC >& parser,
63  const Apriori< ALLOC >& external_apriori,
64  const Apriori< ALLOC >& score_internal_apriori,
65  const Bijection< NodeId, std::size_t, ALLOC< std::size_t > >&
66  nodeId2columns,
67  const typename ParamEstimatorML< ALLOC >::allocator_type& alloc) :
68  ParamEstimator< ALLOC >(parser,
69  external_apriori,
70  score_internal_apriori,
71  nodeId2columns,
72  alloc) {
73  GUM_CONSTRUCTOR(ParamEstimatorML);
74  }
75 
76 
78  template < template < typename > class ALLOC >
80  const ParamEstimatorML< ALLOC >& from,
81  const typename ParamEstimatorML< ALLOC >::allocator_type& alloc) :
82  ParamEstimator< ALLOC >(from, alloc) {
83  GUM_CONS_CPY(ParamEstimatorML);
84  }
85 
86 
88  template < template < typename > class ALLOC >
90  const ParamEstimatorML< ALLOC >& from) :
91  ParamEstimatorML< ALLOC >(from, this->getAllocator()) {}
92 
93 
95  template < template < typename > class ALLOC >
97  ParamEstimatorML< ALLOC >&& from,
98  const typename ParamEstimatorML< ALLOC >::allocator_type& alloc) :
99  ParamEstimator< ALLOC >(std::move(from), alloc) {
100  GUM_CONS_MOV(ParamEstimatorML);
101  }
102 
103 
105  template < template < typename > class ALLOC >
107  ParamEstimatorML< ALLOC >&& from) :
108  ParamEstimatorML< ALLOC >(std::move(from), this->getAllocator()) {}
109 
110 
112  template < template < typename > class ALLOC >
113  ParamEstimatorML< ALLOC >* ParamEstimatorML< ALLOC >::clone(
114  const typename ParamEstimatorML< ALLOC >::allocator_type& alloc) const {
115  ALLOC< ParamEstimatorML< ALLOC > > allocator(alloc);
116  ParamEstimatorML< ALLOC >* new_score = allocator.allocate(1);
117  try {
118  allocator.construct(new_score, *this, alloc);
119  } catch (...) {
120  allocator.deallocate(new_score, 1);
121  throw;
122  }
123 
124  return new_score;
125  }
126 
127 
129  template < template < typename > class ALLOC >
130  ParamEstimatorML< ALLOC >* ParamEstimatorML< ALLOC >::clone() const {
131  return clone(this->getAllocator());
132  }
133 
134 
136  template < template < typename > class ALLOC >
138  GUM_DESTRUCTOR(ParamEstimatorML);
139  }
140 
141 
143  template < template < typename > class ALLOC >
144  ParamEstimatorML< ALLOC >& ParamEstimatorML< ALLOC >::
145  operator=(const ParamEstimatorML< ALLOC >& from) {
147  return *this;
148  }
149 
150 
152  template < template < typename > class ALLOC >
153  ParamEstimatorML< ALLOC >& ParamEstimatorML< ALLOC >::
154  operator=(ParamEstimatorML< ALLOC >&& from) {
155  ParamEstimator< ALLOC >::operator=(std::move(from));
156  return *this;
157  }
158 
159 
161  template < template < typename > class ALLOC >
162  std::vector< double, ALLOC< double > > ParamEstimatorML< ALLOC >::parameters(
163  const NodeId target_node,
164  const std::vector< NodeId, ALLOC< NodeId > >& conditioning_nodes) {
165  // create an idset that contains all the nodes in the following order:
166  // first, the target node, then all the conditioning nodes
167  IdSet< ALLOC > idset(target_node, conditioning_nodes, true);
168 
169  // get the counts for all the nodes in the idset and add the external and
170  // score internal aprioris
171  std::vector< double, ALLOC< double > > N_ijk(
172  this->_counter.counts(idset, true));
173  const bool informative_external_apriori =
174  this->_external_apriori->isInformative();
175  const bool informative_score_internal_apriori =
176  this->_score_internal_apriori->isInformative();
177  if (informative_external_apriori)
178  this->_external_apriori->addAllApriori(idset, N_ijk);
179  if (informative_score_internal_apriori)
180  this->_score_internal_apriori->addAllApriori(idset, N_ijk);
181 
182 
183  // now, normalize N_ijk
184 
185  // here, we distinguish nodesets with conditioning nodes from those
186  // without conditioning nodes
187  if (!conditioning_nodes.empty()) {
188  // get the counts for all the conditioning nodes, and add them the
189  // external and score internal aprioris
190  std::vector< double, ALLOC< double > > N_ij(
191  this->_counter.counts(idset.conditionalIdSet(), false));
192  if (informative_external_apriori)
193  this->_external_apriori->addConditioningApriori(idset, N_ij);
194  if (informative_score_internal_apriori)
195  this->_score_internal_apriori->addConditioningApriori(idset, N_ij);
196 
197  const std::size_t conditioning_domsize = N_ij.size();
198  const std::size_t target_domsize = N_ijk.size() / conditioning_domsize;
199 
200 # ifdef GUM_PARAMESTIMATOR_ERROR_WHEN_NIJ_IS_NULL
201  // check that all conditioning nodes have strictly positive counts
202  for (std::size_t j = std::size_t(0); j < conditioning_domsize; ++j) {
203  if (N_ij[j] == 0) {
204  // get the domain sizes of the conditioning nodes
205  const std::size_t cond_nb = conditioning_nodes.size();
206  std::vector< Idx > cond_domsize(cond_nb);
207 
208  const auto& node2cols = this->_counter.nodeId2Columns();
209  const auto& database = this->_counter.database();
210  if (node2cols.empty()) {
211  for (std::size_t i = std::size_t(0); i < cond_nb; ++i) {
212  cond_domsize[i] = database.domainSize(conditioning_nodes[i]);
213  }
214  } else {
215  for (std::size_t i = std::size_t(0); i < cond_nb; ++i) {
216  cond_domsize[i] =
217  database.domainSize(node2cols.second(conditioning_nodes[i]));
218  }
219  }
220 
221  // determine the value of each conditioning variable in N_ij[j]
222  std::vector< Idx > offsets(cond_nb);
223  Idx offset = 1;
224  std::size_t i;
225  for (i = std::size_t(0); i < cond_nb; ++i) {
226  offsets[i] = offset;
227  offset *= cond_domsize[i];
228  }
229  std::vector< Idx > values(cond_nb);
230  i = 0;
231  offset = j;
232  for (Idx jj = cond_nb - 1; i < cond_nb; ++i, --jj) {
233  values[jj] = offset / offsets[jj];
234  offset %= offsets[jj];
235  }
236 
237  // create the error message
238  std::stringstream str;
239  str << "The conditioning set <";
240  bool deja = true;
241  for (i = std::size_t(0); i < cond_nb; ++i) {
242  if (deja)
243  str << ", ";
244  else
245  deja = true;
246  std::size_t col = node2cols.empty()
247  ? conditioning_nodes[i]
248  : node2cols.second(conditioning_nodes[i]);
249  const DiscreteVariable& var =
250  dynamic_cast< const DiscreteVariable& >(database.variable(col));
251  str << var.name() << "=" << var.labels()[values[i]];
252  }
253  auto target_col =
254  node2cols.empty() ? target_node : node2cols.second(target_node);
255  const Variable& var = database.variable(target_col);
256  str << "> for target node " << var.name()
257  << " never appears in the database. Please consider using "
258  << "priors such as smoothing.";
259 
260  GUM_ERROR(DatabaseError, str.str());
261  }
262  }
263 # endif // GUM_PARAMESTIMATOR_ERROR_WHEN_NIJ_IS_NULL
264 
265  // normalize the counts
266  for (std::size_t j = std::size_t(0), k = std::size_t(0);
267  j < conditioning_domsize;
268  ++j) {
269  for (std::size_t i = std::size_t(0); i < target_domsize; ++i, ++k) {
270 # ifdef GUM_PARAMESTIMATOR_ERROR_WHEN_NIJ_IS_NULL
271  N_ijk[k] /= N_ij[j];
272 # else // GUM_PARAMESTIMATOR_ERROR_WHEN_NIJ_IS_NULL
273  if (N_ij[j] != 0) {
274  N_ijk[k] /= N_ij[j];
275  } else {
276  N_ijk[k] = 1.0 / target_domsize;
277  }
278 # endif // GUM_PARAMESTIMATOR_ERROR_WHEN_NIJ_IS_NULL
279  }
280  }
281  } else {
282  // here, there are no conditioning nodes. Hence N_ijk is the marginal
283  // probability distribution over the target node. To normalize it, it
284  // is sufficient to divide each cell by the sum over all the cells
285  double sum = 0;
286  for (const double n_ijk : N_ijk)
287  sum += n_ijk;
288 
289  if (sum != 0) {
290  for (double& n_ijk : N_ijk)
291  n_ijk /= sum;
292  } else {
293 # ifdef GUM_PARAMESTIMATOR_ERROR_WHEN_NIJ_IS_NULL
294  std::stringstream str;
295 
296  const auto& node2cols = this->_counter.nodeId2Columns();
297  const auto& database = this->_counter.database();
298  auto target_col =
299  node2cols.empty() ? target_node : node2cols.second(target_node);
300  const Variable& var = database.variable(target_col);
301  str << "No data for target node " << var.name()
302  << ". It is impossible to estimate the parameters by maximum "
303  "likelihood";
304  GUM_ERROR(DatabaseError, str.str());
305 # else // GUM_PARAMESTIMATOR_ERROR_WHEN_NIJ_IS_NULL
306  for (double& n_ijk : N_ijk)
307  n_ijk = 1.0 / N_ijk.size();
308 # endif // GUM_PARAMESTIMATOR_ERROR_WHEN_NIJ_IS_NULL
309  }
310  }
311 
312  return N_ijk;
313  }
314 
315  } /* namespace learning */
316 
317 } /* namespace gum */
318 
319 #endif /* DOXYGEN_SHOULD_SKIP_THIS */
ParamEstimator< ALLOC > & operator=(const ParamEstimator< ALLOC > &from)
copy operator
ParamEstimator(const DBRowGeneratorParser< ALLOC > &parser, const Apriori< ALLOC > &external_apriori, const Apriori< ALLOC > &score_internal__apriori, const std::vector< std::pair< std::size_t, std::size_t >, ALLOC< std::pair< std::size_t, std::size_t > > > &ranges, const Bijection< NodeId, std::size_t, ALLOC< std::size_t > > &nodeId2columns=Bijection< NodeId, std::size_t, ALLOC< std::size_t > >(), const allocator_type &alloc=allocator_type())
default constructor
STL namespace.
virtual ~ParamEstimatorML()
destructor
Copyright 2005-2019 Pierre-Henri WUILLEMIN et Christophe GONZALES (LIP6) {prenom.nom}_at_lip6.fr.
Definition: agrum.h:25
Apriori< ALLOC > * _score_internal_apriori
if a score was used for learning the structure of the PGM, this is the a priori internal to the score...
Apriori< ALLOC > * _external_apriori
an external a priori
allocator_type getAllocator() const
returns the allocator used by the score
ParamEstimatorML(const DBRowGeneratorParser< ALLOC > &parser, const Apriori< ALLOC > &external_apriori, const Apriori< ALLOC > &score_internal__apriori, const std::vector< std::pair< std::size_t, std::size_t >, ALLOC< std::pair< std::size_t, std::size_t > > > &ranges, const Bijection< NodeId, std::size_t, ALLOC< std::size_t > > &nodeId2columns=Bijection< NodeId, std::size_t, ALLOC< std::size_t > >(), const allocator_type &alloc=allocator_type())
default constructor
ParamEstimatorML< ALLOC > & operator=(const ParamEstimatorML< ALLOC > &from)
copy operator
ALLOC< NodeId > allocator_type
type for the allocators passed in arguments of methods
virtual ParamEstimatorML< ALLOC > * clone() const
virtual copy constructor
const DatabaseTable< ALLOC > & database() const
returns the database on which we perform the counts
virtual std::vector< double, ALLOC< double > > parameters(const NodeId target_node, const std::vector< NodeId, ALLOC< NodeId > > &conditioning_nodes)
returns the CPT&#39;s parameters corresponding to a given nodeset
Size NodeId
Type for node ids.
Definition: graphElements.h:98
RecordCounter< ALLOC > _counter
the record counter used to parse the database
#define GUM_ERROR(type, msg)
Definition: exceptions.h:55