aGrUM  0.14.2
paramEstimatorML_tpl.h
Go to the documentation of this file.
1 /***************************************************************************
2  * Copyright (C) 2005 by Christophe GONZALES and Pierre-Henri WUILLEMIN *
3  * {prenom.nom}_at_lip6.fr *
4  * *
5  * This program is free software; you can redistribute it and/or modify *
6  * it under the terms of the GNU General Public License as published by *
7  * the Free Software Foundation; either version 2 of the License, or *
8  * (at your option) any later version. *
9  * *
10  * This program is distributed in the hope that it will be useful, *
11  * but WITHOUT ANY WARRANTY; without even the implied warranty of *
12  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the *
13  * GNU General Public License for more details. *
14  * *
15  * You should have received a copy of the GNU General Public License *
16  * along with this program; if not, write to the *
17  * Free Software Foundation, Inc., *
18  * 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. *
19  ***************************************************************************/
25 #ifndef DOXYGEN_SHOULD_SKIP_THIS
26 
27 namespace gum {
28 
29  namespace learning {
30 
32  template < template < typename > class ALLOC >
34  const DBRowGeneratorParser< ALLOC >& parser,
35  const Apriori< ALLOC >& external_apriori,
36  const Apriori< ALLOC >& score_internal_apriori,
37  const std::vector< std::pair< std::size_t, std::size_t >,
38  ALLOC< std::pair< std::size_t, std::size_t > > >& ranges,
39  const Bijection< NodeId, std::size_t, ALLOC< std::size_t > >&
40  nodeId2columns,
41  const typename ParamEstimatorML< ALLOC >::allocator_type& alloc) :
42  ParamEstimator< ALLOC >(parser,
43  external_apriori,
44  score_internal_apriori,
45  ranges,
46  nodeId2columns,
47  alloc) {
48  GUM_CONSTRUCTOR(ParamEstimatorML);
49  }
50 
51 
53  template < template < typename > class ALLOC >
55  const DBRowGeneratorParser< ALLOC >& parser,
56  const Apriori< ALLOC >& external_apriori,
57  const Apriori< ALLOC >& score_internal_apriori,
58  const Bijection< NodeId, std::size_t, ALLOC< std::size_t > >&
59  nodeId2columns,
60  const typename ParamEstimatorML< ALLOC >::allocator_type& alloc) :
61  ParamEstimator< ALLOC >(parser,
62  external_apriori,
63  score_internal_apriori,
64  nodeId2columns,
65  alloc) {
66  GUM_CONSTRUCTOR(ParamEstimatorML);
67  }
68 
69 
71  template < template < typename > class ALLOC >
73  const ParamEstimatorML< ALLOC >& from,
74  const typename ParamEstimatorML< ALLOC >::allocator_type& alloc) :
75  ParamEstimator< ALLOC >(from, alloc) {
76  GUM_CONS_CPY(ParamEstimatorML);
77  }
78 
79 
81  template < template < typename > class ALLOC >
83  const ParamEstimatorML< ALLOC >& from) :
84  ParamEstimatorML< ALLOC >(from, this->getAllocator()) {}
85 
86 
88  template < template < typename > class ALLOC >
90  ParamEstimatorML< ALLOC >&& from,
91  const typename ParamEstimatorML< ALLOC >::allocator_type& alloc) :
92  ParamEstimator< ALLOC >(std::move(from), alloc) {
93  GUM_CONS_MOV(ParamEstimatorML);
94  }
95 
96 
98  template < template < typename > class ALLOC >
100  ParamEstimatorML< ALLOC >&& from) :
101  ParamEstimatorML< ALLOC >(std::move(from), this->getAllocator()) {}
102 
103 
105  template < template < typename > class ALLOC >
106  ParamEstimatorML< ALLOC >* ParamEstimatorML< ALLOC >::clone(
107  const typename ParamEstimatorML< ALLOC >::allocator_type& alloc) const {
108  ALLOC< ParamEstimatorML< ALLOC > > allocator(alloc);
109  ParamEstimatorML< ALLOC >* new_score = allocator.allocate(1);
110  try {
111  allocator.construct(new_score, *this, alloc);
112  } catch (...) {
113  allocator.deallocate(new_score, 1);
114  throw;
115  }
116 
117  return new_score;
118  }
119 
120 
122  template < template < typename > class ALLOC >
123  ParamEstimatorML< ALLOC >* ParamEstimatorML< ALLOC >::clone() const {
124  return clone(this->getAllocator());
125  }
126 
127 
129  template < template < typename > class ALLOC >
131  GUM_DESTRUCTOR(ParamEstimatorML);
132  }
133 
134 
136  template < template < typename > class ALLOC >
137  ParamEstimatorML< ALLOC >& ParamEstimatorML< ALLOC >::
138  operator=(const ParamEstimatorML< ALLOC >& from) {
140  return *this;
141  }
142 
143 
145  template < template < typename > class ALLOC >
146  ParamEstimatorML< ALLOC >& ParamEstimatorML< ALLOC >::
147  operator=(ParamEstimatorML< ALLOC >&& from) {
148  ParamEstimator< ALLOC >::operator=(std::move(from));
149  return *this;
150  }
151 
152 
154  template < template < typename > class ALLOC >
155  std::vector< double, ALLOC< double > > ParamEstimatorML< ALLOC >::parameters(
156  const NodeId target_node,
157  const std::vector< NodeId, ALLOC< NodeId > >& conditioning_nodes) {
158  // create an idset that contains all the nodes in the following order:
159  // first, the target node, then all the conditioning nodes
160  IdSet< ALLOC > idset(target_node, conditioning_nodes, true);
161 
162  // get the counts for all the nodes in the idset and add the external and
163  // score internal aprioris
164  std::vector< double, ALLOC< double > > N_ijk(
165  this->_counter.counts(idset, true));
166  const bool informative_external_apriori =
167  this->_external_apriori->isInformative();
168  const bool informative_score_internal_apriori =
169  this->_score_internal_apriori->isInformative();
170  if (informative_external_apriori)
171  this->_external_apriori->addAllApriori(idset, N_ijk);
172  if (informative_score_internal_apriori)
173  this->_score_internal_apriori->addAllApriori(idset, N_ijk);
174 
175 
176  // now, normalize N_ijk
177 
178  // here, we distinguish nodesets with conditioning nodes from those
179  // without conditioning nodes
180  if (!conditioning_nodes.empty()) {
181  // get the counts for all the conditioning nodes, and add them the
182  // external and score internal aprioris
183  std::vector< double, ALLOC< double > > N_ij(
184  this->_counter.counts(idset.conditionalIdSet(), false));
185  if (informative_external_apriori)
186  this->_external_apriori->addConditioningApriori(idset, N_ij);
187  if (informative_score_internal_apriori)
188  this->_score_internal_apriori->addConditioningApriori(idset, N_ij);
189 
190  const std::size_t conditioning_domsize = N_ij.size();
191  const std::size_t target_domsize = N_ijk.size() / conditioning_domsize;
192 
193  // check that all conditioning nodes have strictly positive counts
194  for (std::size_t j = std::size_t(0); j < conditioning_domsize; ++j) {
195  if (!N_ij[j]) {
196  // get the domain sizes of the conditioning nodes
197  const std::size_t cond_nb = conditioning_nodes.size();
198  std::vector< Idx > cond_domsize(cond_nb);
199 
200  const auto& node2cols = this->_counter.nodeId2Columns();
201  const auto& database = this->_counter.database();
202  if (node2cols.empty()) {
203  for (std::size_t i = std::size_t(0); i < cond_nb; ++i) {
204  cond_domsize[i] = database.domainSize(conditioning_nodes[i]);
205  }
206  } else {
207  for (std::size_t i = std::size_t(0); i < cond_nb; ++i) {
208  cond_domsize[i] =
209  database.domainSize(node2cols.second(conditioning_nodes[i]));
210  }
211  }
212 
213  // determine the value of each conditioning variable in N_ij[j]
214  std::vector< Idx > offsets(cond_nb);
215  Idx offset = 1;
216  std::size_t i;
217  for (i = std::size_t(0); i < cond_nb; ++i) {
218  offsets[i] = offset;
219  offset *= cond_domsize[i];
220  }
221  std::vector< Idx > values(cond_nb);
222  i = 0;
223  offset = j;
224  for (Idx jj = cond_nb - 1; i < cond_nb; ++i, --jj) {
225  values[jj] = offset / offsets[jj];
226  offset %= offsets[jj];
227  }
228 
229  // create the error message
230  std::stringstream str;
231  str << "The conditioning set <";
232  bool deja = true;
233  for (i = std::size_t(0); i < cond_nb; ++i) {
234  if (deja)
235  str << ", ";
236  else
237  deja = true;
238  std::size_t col = node2cols.empty()
239  ? conditioning_nodes[i]
240  : node2cols.second(conditioning_nodes[i]);
241  const DiscreteVariable& var =
242  dynamic_cast< const DiscreteVariable& >(database.variable(col));
243  str << var.name() << "=" << var.labels()[values[i]];
244  }
245  auto target_col =
246  node2cols.empty() ? target_node : node2cols.second(target_node);
247  const Variable& var = database.variable(target_col);
248  str << "> for target node " << var.name()
249  << " never appears in the database. Please consider using "
250  << "priors such as smoothing.";
251 
252  GUM_ERROR(DatabaseError, str.str());
253  }
254  }
255 
256  // normalize the counts
257  for (std::size_t j = std::size_t(0), k = std::size_t(0);
258  j < conditioning_domsize;
259  ++j) {
260  for (std::size_t i = std::size_t(0); i < target_domsize; ++i, ++k) {
261  N_ijk[k] /= N_ij[j];
262  }
263  }
264  } else {
265  // here, there are no conditioning nodes. Hence N_ijk is the marginal
266  // probability distribution over the target node. To normalize it, it
267  // is sufficient to divide each cell by the sum over all the cells
268  double sum = 0;
269  for (const double n_ijk : N_ijk)
270  sum += n_ijk;
271 
272  if (sum) {
273  for (double& n_ijk : N_ijk)
274  n_ijk /= sum;
275  } else {
276  GUM_ERROR(DatabaseError,
277  "The database being empty, it is impossible "
278  "to estimate the parameters by maximum likelihood");
279  }
280  }
281 
282  return N_ijk;
283  }
284 
285  } /* namespace learning */
286 
287 } /* namespace gum */
288 
289 #endif /* DOXYGEN_SHOULD_SKIP_THIS */
ParamEstimator< ALLOC > & operator=(const ParamEstimator< ALLOC > &from)
copy operator
ParamEstimator(const DBRowGeneratorParser< ALLOC > &parser, const Apriori< ALLOC > &external_apriori, const Apriori< ALLOC > &score_internal__apriori, const std::vector< std::pair< std::size_t, std::size_t >, ALLOC< std::pair< std::size_t, std::size_t > > > &ranges, const Bijection< NodeId, std::size_t, ALLOC< std::size_t > > &nodeId2columns=Bijection< NodeId, std::size_t, ALLOC< std::size_t > >(), const allocator_type &alloc=allocator_type())
default constructor
STL namespace.
virtual ~ParamEstimatorML()
destructor
gum is the global namespace for all aGrUM entities
Definition: agrum.h:25
Apriori< ALLOC > * _score_internal_apriori
if a score was used for learning the structure of the PGM, this is the a priori internal to the score...
Apriori< ALLOC > * _external_apriori
an external a priori
allocator_type getAllocator() const
returns the allocator used by the score
ParamEstimatorML(const DBRowGeneratorParser< ALLOC > &parser, const Apriori< ALLOC > &external_apriori, const Apriori< ALLOC > &score_internal__apriori, const std::vector< std::pair< std::size_t, std::size_t >, ALLOC< std::pair< std::size_t, std::size_t > > > &ranges, const Bijection< NodeId, std::size_t, ALLOC< std::size_t > > &nodeId2columns=Bijection< NodeId, std::size_t, ALLOC< std::size_t > >(), const allocator_type &alloc=allocator_type())
default constructor
ParamEstimatorML< ALLOC > & operator=(const ParamEstimatorML< ALLOC > &from)
copy operator
ALLOC< NodeId > allocator_type
type for the allocators passed in arguments of methods
virtual ParamEstimatorML< ALLOC > * clone() const
virtual copy constructor
const DatabaseTable< ALLOC > & database() const
returns the database on which we perform the counts
virtual std::vector< double, ALLOC< double > > parameters(const NodeId target_node, const std::vector< NodeId, ALLOC< NodeId > > &conditioning_nodes)
returns the CPT&#39;s parameters corresponding to a given nodeset
Size NodeId
Type for node ids.
Definition: graphElements.h:97
RecordCounter< ALLOC > _counter
the record counter used to parse the database
#define GUM_ERROR(type, msg)
Definition: exceptions.h:52