aGrUM
0.20.2
a C++ library for (probabilistic) graphical models
paramEstimatorML_tpl.h
Go to the documentation of this file.
1
/**
2
*
3
* Copyright 2005-2020 Pierre-Henri WUILLEMIN(@LIP6) & Christophe GONZALES(@AMU)
4
* info_at_agrum_dot_org
5
*
6
* This library is free software: you can redistribute it and/or modify
7
* it under the terms of the GNU Lesser General Public License as published by
8
* the Free Software Foundation, either version 3 of the License, or
9
* (at your option) any later version.
10
*
11
* This library is distributed in the hope that it will be useful,
12
* but WITHOUT ANY WARRANTY; without even the implied warranty of
13
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14
* GNU Lesser General Public License for more details.
15
*
16
* You should have received a copy of the GNU Lesser General Public License
17
* along with this library. If not, see <http://www.gnu.org/licenses/>.
18
*
19
*/
20
21
22
/** @file
23
* @brief the class for estimating parameters of CPTs using Maximum Likelihood
24
*
25
* @author Christophe GONZALES(@AMU) and Pierre-Henri WUILLEMIN(@LIP6)
26
*/
27
#
ifndef
DOXYGEN_SHOULD_SKIP_THIS
28
29
// if this define is active, the estimator will not accept to assess numbers for a
30
// distribution with no case at all (N_ij=0). if this define is not active, the
31
// estimator will use an uniform distribution for this case.
32
//# define GUM_PARAMESTIMATOR_ERROR_WHEN_NIJ_IS_NULL
33
34
namespace
gum
{
35
36
namespace
learning
{
37
38
/// default constructor
39
template
<
template
<
typename
>
class
ALLOC >
40
INLINE ParamEstimatorML<
ALLOC
>::
ParamEstimatorML
(
41
const
DBRowGeneratorParser
<
ALLOC
>&
parser
,
42
const
Apriori
<
ALLOC
>&
external_apriori
,
43
const
Apriori
<
ALLOC
>&
score_internal_apriori
,
44
const
std
::
vector
<
std
::
pair
<
std
::
size_t
,
std
::
size_t
>,
45
ALLOC
<
std
::
pair
<
std
::
size_t
,
std
::
size_t
> > >&
ranges
,
46
const
Bijection
<
NodeId
,
std
::
size_t
,
ALLOC
<
std
::
size_t
> >&
47
nodeId2columns
,
48
const
typename
ParamEstimatorML
<
ALLOC
>::
allocator_type
&
alloc
) :
49
ParamEstimator
<
ALLOC
>(
parser
,
50
external_apriori
,
51
score_internal_apriori
,
52
ranges
,
53
nodeId2columns
,
54
alloc
) {
55
GUM_CONSTRUCTOR
(
ParamEstimatorML
);
56
}
57
58
59
/// default constructor
60
template
<
template
<
typename
>
class
ALLOC
>
61
INLINE
ParamEstimatorML
<
ALLOC
>::
ParamEstimatorML
(
62
const
DBRowGeneratorParser
<
ALLOC
>&
parser
,
63
const
Apriori
<
ALLOC
>&
external_apriori
,
64
const
Apriori
<
ALLOC
>&
score_internal_apriori
,
65
const
Bijection
<
NodeId
,
std
::
size_t
,
ALLOC
<
std
::
size_t
> >&
66
nodeId2columns
,
67
const
typename
ParamEstimatorML
<
ALLOC
>::
allocator_type
&
alloc
) :
68
ParamEstimator
<
ALLOC
>(
parser
,
69
external_apriori
,
70
score_internal_apriori
,
71
nodeId2columns
,
72
alloc
) {
73
GUM_CONSTRUCTOR
(
ParamEstimatorML
);
74
}
75
76
77
/// copy constructor with a given allocator
78
template
<
template
<
typename
>
class
ALLOC
>
79
INLINE
ParamEstimatorML
<
ALLOC
>::
ParamEstimatorML
(
80
const
ParamEstimatorML
<
ALLOC
>&
from
,
81
const
typename
ParamEstimatorML
<
ALLOC
>::
allocator_type
&
alloc
) :
82
ParamEstimator
<
ALLOC
>(
from
,
alloc
) {
83
GUM_CONS_CPY
(
ParamEstimatorML
);
84
}
85
86
87
/// copy constructor
88
template
<
template
<
typename
>
class
ALLOC
>
89
INLINE
ParamEstimatorML
<
ALLOC
>::
ParamEstimatorML
(
90
const
ParamEstimatorML
<
ALLOC
>&
from
) :
91
ParamEstimatorML
<
ALLOC
>(
from
,
this
->
getAllocator
()) {}
92
93
94
/// move constructor with a given allocator
95
template
<
template
<
typename
>
class
ALLOC
>
96
INLINE
ParamEstimatorML
<
ALLOC
>::
ParamEstimatorML
(
97
ParamEstimatorML
<
ALLOC
>&&
from
,
98
const
typename
ParamEstimatorML
<
ALLOC
>::
allocator_type
&
alloc
) :
99
ParamEstimator
<
ALLOC
>(
std
::
move
(
from
),
alloc
) {
100
GUM_CONS_MOV
(
ParamEstimatorML
);
101
}
102
103
104
/// move constructor
105
template
<
template
<
typename
>
class
ALLOC
>
106
INLINE
ParamEstimatorML
<
ALLOC
>::
ParamEstimatorML
(
107
ParamEstimatorML
<
ALLOC
>&&
from
) :
108
ParamEstimatorML
<
ALLOC
>(
std
::
move
(
from
),
this
->
getAllocator
()) {}
109
110
111
/// virtual copy constructor with a given allocator
112
template
<
template
<
typename
>
class
ALLOC
>
113
ParamEstimatorML
<
ALLOC
>*
ParamEstimatorML
<
ALLOC
>::
clone
(
114
const
typename
ParamEstimatorML
<
ALLOC
>::
allocator_type
&
alloc
)
const
{
115
ALLOC
<
ParamEstimatorML
<
ALLOC
> >
allocator
(
alloc
);
116
ParamEstimatorML
<
ALLOC
>*
new_score
=
allocator
.
allocate
(1);
117
try
{
118
allocator
.
construct
(
new_score
, *
this
,
alloc
);
119
}
catch
(...) {
120
allocator
.
deallocate
(
new_score
, 1);
121
throw
;
122
}
123
124
return
new_score
;
125
}
126
127
128
/// virtual copy constructor
129
template
<
template
<
typename
>
class
ALLOC
>
130
ParamEstimatorML
<
ALLOC
>*
ParamEstimatorML
<
ALLOC
>::
clone
()
const
{
131
return
clone
(
this
->
getAllocator
());
132
}
133
134
135
/// destructor
136
template
<
template
<
typename
>
class
ALLOC
>
137
ParamEstimatorML
<
ALLOC
>::~
ParamEstimatorML
() {
138
GUM_DESTRUCTOR
(
ParamEstimatorML
);
139
}
140
141
142
/// copy operator
143
template
<
template
<
typename
>
class
ALLOC
>
144
ParamEstimatorML
<
ALLOC
>&
ParamEstimatorML
<
ALLOC
>::
operator
=(
145
const
ParamEstimatorML
<
ALLOC
>&
from
) {
146
ParamEstimator
<
ALLOC
>::
operator
=(
from
);
147
return
*
this
;
148
}
149
150
151
/// move operator
152
template
<
template
<
typename
>
class
ALLOC
>
153
ParamEstimatorML
<
ALLOC
>&
154
ParamEstimatorML
<
ALLOC
>::
operator
=(
ParamEstimatorML
<
ALLOC
>&&
from
) {
155
ParamEstimator
<
ALLOC
>::
operator
=(
std
::
move
(
from
));
156
return
*
this
;
157
}
158
159
160
/// returns the CPT's parameters corresponding to a given set of nodes
161
template
<
template
<
typename
>
class
ALLOC
>
162
std
::
vector
<
double
,
ALLOC
<
double
> >
ParamEstimatorML
<
ALLOC
>::
parameters
(
163
const
NodeId
target_node
,
164
const
std
::
vector
<
NodeId
,
ALLOC
<
NodeId
> >&
conditioning_nodes
) {
165
// create an idset that contains all the nodes in the following order:
166
// first, the target node, then all the conditioning nodes
167
IdCondSet
<
ALLOC
>
idset
(
target_node
,
conditioning_nodes
,
true
);
168
169
// get the counts for all the nodes in the idset and add the external and
170
// score internal aprioris
171
std
::
vector
<
double
,
ALLOC
<
double
> >
N_ijk
(
172
this
->
counter_
.
counts
(
idset
,
true
));
173
const
bool
informative_external_apriori
174
=
this
->
external_apriori_
->
isInformative
();
175
const
bool
informative_score_internal_apriori
176
=
this
->
score_internal_apriori_
->
isInformative
();
177
if
(
informative_external_apriori
)
178
this
->
external_apriori_
->
addAllApriori
(
idset
,
N_ijk
);
179
if
(
informative_score_internal_apriori
)
180
this
->
score_internal_apriori_
->
addAllApriori
(
idset
,
N_ijk
);
181
182
183
// now, normalize N_ijk
184
185
// here, we distinguish nodesets with conditioning nodes from those
186
// without conditioning nodes
187
if
(!
conditioning_nodes
.
empty
()) {
188
// get the counts for all the conditioning nodes, and add them the
189
// external and score internal aprioris
190
std
::
vector
<
double
,
ALLOC
<
double
> >
N_ij
(
191
this
->
counter_
.
counts
(
idset
.
conditionalIdCondSet
(),
false
));
192
if
(
informative_external_apriori
)
193
this
->
external_apriori_
->
addConditioningApriori
(
idset
,
N_ij
);
194
if
(
informative_score_internal_apriori
)
195
this
->
score_internal_apriori_
->
addConditioningApriori
(
idset
,
N_ij
);
196
197
const
std
::
size_t
conditioning_domsize
=
N_ij
.
size
();
198
const
std
::
size_t
target_domsize
=
N_ijk
.
size
() /
conditioning_domsize
;
199
200
#
ifdef
GUM_PARAMESTIMATOR_ERROR_WHEN_NIJ_IS_NULL
201
// check that all conditioning nodes have strictly positive counts
202
for
(
std
::
size_t
j
=
std
::
size_t
(0);
j
<
conditioning_domsize
; ++
j
) {
203
if
(
N_ij
[
j
] == 0) {
204
// get the domain sizes of the conditioning nodes
205
const
std
::
size_t
cond_nb
=
conditioning_nodes
.
size
();
206
std
::
vector
<
Idx
>
cond_domsize
(
cond_nb
);
207
208
const
auto
&
node2cols
=
this
->
counter_
.
nodeId2Columns
();
209
const
auto
&
database
=
this
->
counter_
.
database
();
210
if
(
node2cols
.
empty
()) {
211
for
(
std
::
size_t
i
=
std
::
size_t
(0);
i
<
cond_nb
; ++
i
) {
212
cond_domsize
[
i
] =
database
.
domainSize
(
conditioning_nodes
[
i
]);
213
}
214
}
else
{
215
for
(
std
::
size_t
i
=
std
::
size_t
(0);
i
<
cond_nb
; ++
i
) {
216
cond_domsize
[
i
]
217
=
database
.
domainSize
(
node2cols
.
second
(
conditioning_nodes
[
i
]));
218
}
219
}
220
221
// determine the value of each conditioning variable in N_ij[j]
222
std
::
vector
<
Idx
>
offsets
(
cond_nb
);
223
Idx
offset
= 1;
224
std
::
size_t
i
;
225
for
(
i
=
std
::
size_t
(0);
i
<
cond_nb
; ++
i
) {
226
offsets
[
i
] =
offset
;
227
offset
*=
cond_domsize
[
i
];
228
}
229
std
::
vector
<
Idx
>
values
(
cond_nb
);
230
i
= 0;
231
offset
=
j
;
232
for
(
Idx
jj
=
cond_nb
- 1;
i
<
cond_nb
; ++
i
, --
jj
) {
233
values
[
jj
] =
offset
/
offsets
[
jj
];
234
offset
%=
offsets
[
jj
];
235
}
236
237
// create the error message
238
std
::
stringstream
str
;
239
str
<<
"The conditioning set <"
;
240
bool
deja
=
true
;
241
for
(
i
=
std
::
size_t
(0);
i
<
cond_nb
; ++
i
) {
242
if
(
deja
)
243
str
<<
", "
;
244
else
245
deja
=
true
;
246
std
::
size_t
col
=
node2cols
.
empty
()
247
?
conditioning_nodes
[
i
]
248
:
node2cols
.
second
(
conditioning_nodes
[
i
]);
249
const
DiscreteVariable
&
var
250
=
dynamic_cast
<
const
DiscreteVariable
& >(
database
.
variable
(
col
));
251
str
<<
var
.
name
() <<
"="
<<
var
.
labels
()[
values
[
i
]];
252
}
253
auto
target_col
254
=
node2cols
.
empty
() ?
target_node
:
node2cols
.
second
(
target_node
);
255
const
Variable
&
var
=
database
.
variable
(
target_col
);
256
str
<<
"> for target node "
<<
var
.
name
()
257
<<
" never appears in the database. Please consider using "
258
<<
"priors such as smoothing."
;
259
260
GUM_ERROR
(
DatabaseError
,
str
.
str
());
261
}
262
}
263
#
endif
// GUM_PARAMESTIMATOR_ERROR_WHEN_NIJ_IS_NULL
264
265
// normalize the counts
266
for
(
std
::
size_t
j
=
std
::
size_t
(0),
k
=
std
::
size_t
(0);
267
j
<
conditioning_domsize
;
268
++
j
) {
269
for
(
std
::
size_t
i
=
std
::
size_t
(0);
i
<
target_domsize
; ++
i
, ++
k
) {
270
#
ifdef
GUM_PARAMESTIMATOR_ERROR_WHEN_NIJ_IS_NULL
271
N_ijk
[
k
] /=
N_ij
[
j
];
272
#
else
// GUM_PARAMESTIMATOR_ERROR_WHEN_NIJ_IS_NULL
273
if
(
N_ij
[
j
] != 0) {
274
N_ijk
[
k
] /=
N_ij
[
j
];
275
}
else
{
276
N_ijk
[
k
] = 1.0 /
target_domsize
;
277
}
278
#
endif
// GUM_PARAMESTIMATOR_ERROR_WHEN_NIJ_IS_NULL
279
}
280
}
281
}
else
{
282
// here, there are no conditioning nodes. Hence N_ijk is the marginal
283
// probability distribution over the target node. To normalize it, it
284
// is sufficient to divide each cell by the sum over all the cells
285
double
sum
= 0;
286
for
(
const
double
n_ijk
:
N_ijk
)
287
sum
+=
n_ijk
;
288
289
if
(
sum
!= 0) {
290
for
(
double
&
n_ijk
:
N_ijk
)
291
n_ijk
/=
sum
;
292
}
else
{
293
#
ifdef
GUM_PARAMESTIMATOR_ERROR_WHEN_NIJ_IS_NULL
294
std
::
stringstream
str
;
295
296
const
auto
&
node2cols
=
this
->
counter_
.
nodeId2Columns
();
297
const
auto
&
database
=
this
->
counter_
.
database
();
298
auto
target_col
299
=
node2cols
.
empty
() ?
target_node
:
node2cols
.
second
(
target_node
);
300
const
Variable
&
var
=
database
.
variable
(
target_col
);
301
str
<<
"No data for target node "
<<
var
.
name
()
302
<<
". It is impossible to estimate the parameters by maximum "
303
"likelihood"
;
304
GUM_ERROR
(
DatabaseError
,
str
.
str
());
305
#
else
// GUM_PARAMESTIMATOR_ERROR_WHEN_NIJ_IS_NULL
306
for
(
double
&
n_ijk
:
N_ijk
)
307
n_ijk
= 1.0 /
N_ijk
.
size
();
308
#
endif
// GUM_PARAMESTIMATOR_ERROR_WHEN_NIJ_IS_NULL
309
}
310
}
311
312
return
N_ijk
;
313
}
314
315
}
/* namespace learning */
316
317
}
/* namespace gum */
318
319
#
endif
/* DOXYGEN_SHOULD_SKIP_THIS */
gum::Set::emplace
INLINE void emplace(Args &&... args)
Definition:
set_tpl.h:669
gum::learning::genericBNLearner::Database::Database
Database(const std::string &filename, const BayesNet< GUM_SCALAR > &bn, const std::vector< std::string > &missing_symbols)
Definition:
genericBNLearner_tpl.h:31