aGrUM
0.20.3
a C++ library for (probabilistic) graphical models
rawDatabaseTable_tpl.h
Go to the documentation of this file.
1
/**
2
*
3
* Copyright (c) 2005-2021 by Pierre-Henri WUILLEMIN(@LIP6) & Christophe GONZALES(@AMU)
4
* info_at_agrum_dot_org
5
*
6
* This library is free software: you can redistribute it and/or modify
7
* it under the terms of the GNU Lesser General Public License as published by
8
* the Free Software Foundation, either version 3 of the License, or
9
* (at your option) any later version.
10
*
11
* This library is distributed in the hope that it will be useful,
12
* but WITHOUT ANY WARRANTY; without even the implied warranty of
13
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14
* GNU Lesser General Public License for more details.
15
*
16
* You should have received a copy of the GNU Lesser General Public License
17
* along with this library. If not, see <http://www.gnu.org/licenses/>.
18
*
19
*/
20
21
22
/** @file
23
* @brief The implementation of raw tabular databases stored in memory (RAM)
24
*
25
* @author Christophe GONZALES(@AMU) and Pierre-Henri WUILLEMIN(@LIP6)
26
*/
27
#
include
<
agrum
/
tools
/
database
/
rawDatabaseTable
.
h
>
28
29
#
ifndef
DOXYGEN_SHOULD_SKIP_THIS
30
31
namespace
gum
{
32
33
namespace
learning
{
34
35
36
// default constructor
37
template
<
template
<
typename
>
class
ALLOC
>
38
template
<
template
<
typename
>
class
VARALLOC,
template
<
typename
>
class
MISSALLOC >
39
INLINE RawDatabaseTable<
ALLOC
>::
RawDatabaseTable
(
40
const
typename
RawDatabaseTable
<
ALLOC
>::
template
MissingValType
<
MISSALLOC
>&
41
missing_symbols
,
42
const
std
::
vector
<
std
::
string
,
VARALLOC
<
std
::
string
> >&
var_names
,
43
const
typename
RawDatabaseTable
<
ALLOC
>::
allocator_type
&
alloc
) :
44
IDatabaseTable
<
DBCell
,
ALLOC
>(
missing_symbols
,
var_names
,
alloc
),
45
_ignored_cols_
(
alloc
) {
46
GUM_CONSTRUCTOR
(
RawDatabaseTable
);
47
}
48
49
50
// default constructor
51
template
<
template
<
typename
>
class
ALLOC
>
52
template
<
template
<
typename
>
class
MISSALLOC
>
53
INLINE
RawDatabaseTable
<
ALLOC
>::
RawDatabaseTable
(
54
const
typename
RawDatabaseTable
<
ALLOC
>::
template
MissingValType
<
MISSALLOC
>&
55
missing_symbols
,
56
const
typename
RawDatabaseTable
<
ALLOC
>::
allocator_type
&
alloc
) :
57
IDatabaseTable
<
DBCell
,
ALLOC
>(
missing_symbols
,
58
std
::
vector
<
std
::
string
,
ALLOC
<
std
::
string
> >(),
59
alloc
),
60
_ignored_cols_
(
alloc
) {
61
GUM_CONSTRUCTOR
(
RawDatabaseTable
);
62
}
63
64
65
// default constructor
66
template
<
template
<
typename
>
class
ALLOC
>
67
INLINE
RawDatabaseTable
<
ALLOC
>::
RawDatabaseTable
(
68
const
typename
RawDatabaseTable
<
ALLOC
>::
allocator_type
&
alloc
) :
69
IDatabaseTable
<
DBCell
,
ALLOC
>(
std
::
vector
<
std
::
string
,
ALLOC
<
std
::
string
> >(),
70
std
::
vector
<
std
::
string
,
ALLOC
<
std
::
string
> >(),
71
alloc
),
72
_ignored_cols_
(
alloc
) {
73
GUM_CONSTRUCTOR
(
RawDatabaseTable
);
74
}
75
76
77
// copy constructor with a given allocator
78
template
<
template
<
typename
>
class
ALLOC
>
79
INLINE
RawDatabaseTable
<
ALLOC
>::
RawDatabaseTable
(
80
const
RawDatabaseTable
<
ALLOC
>&
from
,
81
const
typename
RawDatabaseTable
<
ALLOC
>::
allocator_type
&
alloc
) :
82
IDatabaseTable
<
DBCell
,
ALLOC
>(
from
,
alloc
),
83
_ignored_cols_
(
from
.
_ignored_cols_
,
alloc
) {
84
GUM_CONS_CPY
(
RawDatabaseTable
);
85
}
86
87
// copy constructor
88
template
<
template
<
typename
>
class
ALLOC
>
89
INLINE
RawDatabaseTable
<
ALLOC
>::
RawDatabaseTable
(
const
RawDatabaseTable
<
ALLOC
>&
from
) :
90
RawDatabaseTable
<
ALLOC
>(
from
,
from
.
getAllocator
()) {}
91
92
93
// move constructor with a given allocator
94
template
<
template
<
typename
>
class
ALLOC
>
95
INLINE
RawDatabaseTable
<
ALLOC
>::
RawDatabaseTable
(
96
RawDatabaseTable
<
ALLOC
>&&
from
,
97
const
typename
RawDatabaseTable
<
ALLOC
>::
allocator_type
&
alloc
) :
98
IDatabaseTable
<
DBCell
,
ALLOC
>(
std
::
move
(
from
),
alloc
),
99
_ignored_cols_
(
std
::
move
(
from
.
_ignored_cols_
)) {
100
GUM_CONS_MOV
(
RawDatabaseTable
);
101
}
102
103
104
// move constructor
105
template
<
template
<
typename
>
class
ALLOC
>
106
INLINE
RawDatabaseTable
<
ALLOC
>::
RawDatabaseTable
(
RawDatabaseTable
<
ALLOC
>&&
from
) :
107
RawDatabaseTable
<
ALLOC
>(
std
::
move
(
from
),
from
.
getAllocator
()) {}
108
109
110
// virtual copy constructor
111
template
<
template
<
typename
>
class
ALLOC
>
112
RawDatabaseTable
<
ALLOC
>*
RawDatabaseTable
<
ALLOC
>::
clone
(
113
const
typename
RawDatabaseTable
<
ALLOC
>::
allocator_type
&
alloc
)
const
{
114
ALLOC
<
RawDatabaseTable
<
ALLOC
> >
allocator
(
alloc
);
115
RawDatabaseTable
<
ALLOC
>*
new_db
=
allocator
.
allocate
(1);
116
try
{
117
allocator
.
construct
(
new_db
, *
this
,
alloc
);
118
}
catch
(...) {
119
allocator
.
deallocate
(
new_db
, 1);
120
throw
;
121
}
122
123
return
new_db
;
124
}
125
126
127
// virtual copy constructor
128
template
<
template
<
typename
>
class
ALLOC
>
129
RawDatabaseTable
<
ALLOC
>*
RawDatabaseTable
<
ALLOC
>::
clone
()
const
{
130
return
clone
(
this
->
getAllocator
());
131
}
132
133
134
// destructor
135
template
<
template
<
typename
>
class
ALLOC
>
136
INLINE
RawDatabaseTable
<
ALLOC
>::~
RawDatabaseTable
() {
137
GUM_DESTRUCTOR
(
RawDatabaseTable
);
138
}
139
140
141
// copy operator
142
template
<
template
<
typename
>
class
ALLOC
>
143
RawDatabaseTable
<
ALLOC
>&
144
RawDatabaseTable
<
ALLOC
>::
operator
=(
const
RawDatabaseTable
<
ALLOC
>&
from
) {
145
if
(
this
!= &
from
) {
146
IDatabaseTable
<
DBCell
,
ALLOC
>::
operator
=(
from
);
147
_ignored_cols_
=
from
.
_ignored_cols_
;
148
}
149
return
*
this
;
150
}
151
152
153
// move constructor
154
template
<
template
<
typename
>
class
ALLOC
>
155
RawDatabaseTable
<
ALLOC
>&
156
RawDatabaseTable
<
ALLOC
>::
operator
=(
RawDatabaseTable
<
ALLOC
>&&
from
) {
157
if
(
this
!= &
from
) {
158
IDatabaseTable
<
DBCell
,
ALLOC
>::
operator
=(
std
::
move
(
from
));
159
_ignored_cols_
=
std
::
move
(
from
.
_ignored_cols_
);
160
}
161
return
*
this
;
162
}
163
164
165
// sets the names of the variables
166
template
<
template
<
typename
>
class
ALLOC
>
167
void
RawDatabaseTable
<
ALLOC
>::
setVariableNames
(
168
const
std
::
vector
<
std
::
string
,
ALLOC
<
std
::
string
> >&
names
,
169
const
bool
from_external_object
) {
170
const
std
::
size_t
size
=
names
.
size
();
171
const
std
::
size_t
ignored_cols_size
=
_ignored_cols_
.
size
();
172
173
if
(!
from_external_object
|| !
ignored_cols_size
) {
174
if
(
this
->
rows_
.
empty
() || (
size
==
this
->
rows_
[0].
size
())) {
175
this
->
variable_names_
=
names
;
176
}
else
{
177
GUM_ERROR
(
SizeError
,
178
"the number of variable's names (i.e., "
179
<<
size
<<
") does not correspond to the number of columns of the "
180
<<
"raw database table (i.e.,"
<<
this
->
rows_
[0].
size
() <<
")"
);
181
}
182
}
else
{
183
// check that the size of the names vector (after removing the ignored
184
// columns) is the same as the rest of the database
185
std
::
size_t
ignored_size
=
std
::
size_t
(0);
186
187
// find the number of ignored cols
188
for
(
auto
iter
=
_ignored_cols_
.
rbegin
(),
rend
=
_ignored_cols_
.
rend
();
iter
!=
rend
;
189
++
iter
, ++
ignored_size
) {
190
if
(*
iter
<
size
) {
break
; }
191
}
192
ignored_size
=
ignored_cols_size
-
ignored_size
;
193
194
if
(
this
->
rows_
.
empty
() || (
size
==
this
->
rows_
[0].
size
() +
ignored_size
)) {
195
DBVector
<
std
::
string
>
new_names
;
196
for
(
std
::
size_t
i
=
std
::
size_t
(0),
j
=
std
::
size_t
(0);
i
<
size
; ++
i
) {
197
if
(
i
!=
_ignored_cols_
[
j
]) {
198
new_names
.
push_back
(
names
[
i
]);
199
}
else
{
200
if
(++
j
==
ignored_cols_size
) {
201
for
(++
i
;
i
<
size
; ++
i
) {
202
new_names
.
push_back
(
names
[
i
]);
203
}
204
}
205
}
206
}
207
this
->
variable_names_
=
std
::
move
(
new_names
);
208
return
;
209
}
else
{
210
GUM_ERROR
(
SizeError
,
211
"the number of variable's names excluding the ignored "
212
<<
"columns (i.e., "
<< (
size
-
ignored_size
)
213
<<
") does not correspond to the number of columns of the "
214
<<
"raw database table (i.e.,"
<<
this
->
rows_
[0].
size
() <<
")"
);
215
}
216
}
217
}
218
219
220
/// makes the database table ignore from now on the kth column
221
template
<
template
<
typename
>
class
ALLOC
>
222
void
RawDatabaseTable
<
ALLOC
>::
ignoreColumn
(
const
std
::
size_t
k
,
223
const
bool
from_external_object
) {
224
// first, compute the value that k would have in an external database
225
// and compute where the new value should be inserted
226
std
::
size_t
i
;
// where to insert the new k into the ignored colums
227
std
::
size_t
kk
=
k
;
// kk = k value for an external database
228
const
std
::
size_t
size
=
_ignored_cols_
.
size
();
229
230
if
(
from_external_object
) {
231
for
(
i
=
std
::
size_t
(0);
i
<
size
; ++
i
) {
232
if
(
k
<=
_ignored_cols_
[
i
]) {
233
if
(
k
==
_ignored_cols_
[
i
])
return
;
234
break
;
235
}
236
}
237
}
else
{
238
for
(
i
=
std
::
size_t
(0);
i
<
size
; ++
i
, ++
kk
) {
239
if
(
kk
<=
_ignored_cols_
[
i
]) {
240
if
(
kk
==
_ignored_cols_
[
i
])
return
;
241
break
;
242
}
243
}
244
}
245
246
// the column of _rows_ and variable_names_ impacted by the ignoreColumn
247
// operation is therefore equal to kk-i. So, we should check that such
248
// a column exists and, if so, we should remove the column from _rows_
249
// and from variable_names_. Note that if there is no more variable,
250
// _rows_ should become empty
251
const
std
::
size_t
col
=
kk
-
i
;
252
if
(
col
<
this
->
variable_names_
.
size
()) {
253
this
->
variable_names_
.
erase
(
this
->
variable_names_
.
begin
() +
col
);
254
if
(
this
->
variable_names_
.
empty
()) {
255
IDatabaseTable
<
DBCell
,
ALLOC
>::
eraseAllRows
();
256
}
else
{
257
const
std
::
size_t
nb_rows
=
this
->
rows_
.
size
();
258
if
(
nb_rows
!=
std
::
size_t
(0)) {
259
const
std
::
size_t
nb_cols
=
this
->
rows_
[0].
size
();
260
for
(
std
::
size_t
i
=
std
::
size_t
(0);
i
<
nb_rows
; ++
i
) {
261
auto
&
row
=
this
->
rows_
[
i
].
row
();
262
if
(
this
->
has_row_missing_val_
[
i
] ==
IsMissing
::
True
) {
263
bool
has_missing_val
=
false
;
264
for
(
std
::
size_t
j
=
std
::
size_t
(0);
j
<
nb_cols
; ++
j
) {
265
if
((
j
!=
col
) &&
row
[
j
].
isMissing
()) {
266
has_missing_val
=
true
;
267
break
;
268
}
269
}
270
if
(!
has_missing_val
)
this
->
has_row_missing_val_
[
i
] =
IsMissing
::
False
;
271
}
272
row
.
erase
(
row
.
begin
() +
col
);
273
}
274
}
275
}
276
}
277
278
// here, we know that we should insert kk at the ith index of _ignored_cols_
279
_ignored_cols_
.
push_back
(
std
::
size_t
(0));
280
for
(
std
::
size_t
j
=
size
;
j
>
i
; --
j
)
281
_ignored_cols_
[
j
] =
_ignored_cols_
[
j
- 1];
282
_ignored_cols_
[
i
] =
kk
;
283
}
284
285
286
/// returns the set of ignored columns
287
template
<
template
<
typename
>
class
ALLOC
>
288
INLINE
const
typename
RawDatabaseTable
<
ALLOC
>::
template
DBVector
<
std
::
size_t
>
289
RawDatabaseTable
<
ALLOC
>::
ignoredColumns
()
const
{
290
return
_ignored_cols_
;
291
}
292
293
294
/// returns the set of columns parsed
295
template
<
template
<
typename
>
class
ALLOC
>
296
const
typename
RawDatabaseTable
<
ALLOC
>::
template
DBVector
<
std
::
size_t
>
297
RawDatabaseTable
<
ALLOC
>::
inputColumns
()
const
{
298
const
auto
&
data
=
IDatabaseTable
<
DBCell
,
ALLOC
>::
content
();
299
if
(
data
.
empty
()) {
return
DBVector
<
std
::
size_t
>(); }
300
301
const
std
::
size_t
size
=
data
[0].
size
();
302
const
std
::
size_t
ignored_cols_size
=
_ignored_cols_
.
size
();
303
DBVector
<
std
::
size_t
>
cols
(
size
);
304
305
if
(!
ignored_cols_size
) {
306
for
(
std
::
size_t
i
=
std
::
size_t
(0);
i
<
size
; ++
i
) {
307
cols
[
i
] =
i
;
308
}
309
}
else
{
310
// fill the cols vector with consecutive values, excluding the
311
// ignored columns
312
std
::
size_t
i
=
std
::
size_t
(0);
// the consecutive values
313
std
::
size_t
k
=
std
::
size_t
(0);
// the index in col where we save values
314
std
::
size_t
j
=
std
::
size_t
(0);
// the index to parse the ignored columns
315
while
(
true
) {
316
if
(
i
!=
_ignored_cols_
[
j
]) {
317
cols
[
k
] =
i
;
318
if
(++
k
==
size
)
break
;
319
}
else
{
320
if
(++
j
==
ignored_cols_size
) {
321
for
(++
i
;
k
<
size
; ++
i
, ++
k
) {
322
cols
[
k
] =
i
;
323
}
324
break
;
325
}
326
}
327
++
i
;
328
}
329
}
330
331
return
cols
;
332
}
333
334
335
// translates a string into a DBCell and returns it
336
template
<
template
<
typename
>
class
ALLOC
>
337
INLINE
DBCell
RawDatabaseTable
<
ALLOC
>::
_convert_
(
const
std
::
string
&
elt
)
const
{
338
return
DBCell
::
bestDBCell
(
elt
,
this
->
missing_symbols_
);
339
}
340
341
342
// insert a new row at the end of the database
343
template
<
template
<
typename
>
class
ALLOC
>
344
void
RawDatabaseTable
<
ALLOC
>::
insertRow
(
345
const
std
::
vector
<
std
::
string
,
ALLOC
<
std
::
string
> >&
new_row
) {
346
// check that the size of the row (after removing the ignored columns) is
347
// the same as the rest of the database
348
const
std
::
size_t
row_size
=
new_row
.
size
();
349
const
std
::
size_t
ignored_cols_size
=
_ignored_cols_
.
size
();
350
std
::
size_t
ignored_size
=
std
::
size_t
(0);
351
if
(
ignored_cols_size
) {
352
// find the number of ignored cols
353
for
(
auto
iter
=
_ignored_cols_
.
rbegin
(),
rend
=
_ignored_cols_
.
rend
();
iter
!=
rend
;
354
++
iter
, ++
ignored_size
) {
355
if
(*
iter
<
row_size
) {
break
; }
356
}
357
ignored_size
=
ignored_cols_size
-
ignored_size
;
358
}
359
360
if
(!
this
->
isRowSizeOK_
(
row_size
-
ignored_size
)) {
361
GUM_ERROR
(
SizeError
,
362
"the new row has "
<< (
row_size
-
ignored_size
)
363
<<
" elements whereas the raw database table has "
364
<<
this
->
variable_names_
.
size
() <<
" columns"
);
365
}
366
367
// create the dbrow that will contain the new data
368
Row
<
DBCell
>
dbrow
;
369
dbrow
.
reserve
(
row_size
-
ignored_size
);
370
bool
has_missing_val
=
false
;
371
372
// translate the row into T_data and put them into the newly created dbrow
373
if
(
ignored_size
== 0) {
374
for
(
const
auto
&
elt
:
new_row
) {
375
const
DBCell
new_cell
(
this
->
_convert_
(
elt
));
376
if
(
new_cell
.
isMissing
())
has_missing_val
=
true
;
377
dbrow
.
pushBack
(
new_cell
);
378
}
379
}
else
{
380
for
(
std
::
size_t
i
=
std
::
size_t
(0),
j
=
std
::
size_t
(0);
i
<
row_size
; ++
i
) {
381
if
(
i
!=
_ignored_cols_
[
j
]) {
382
const
DBCell
new_cell
(
this
->
_convert_
(
new_row
[
i
]));
383
if
(
new_cell
.
isMissing
())
has_missing_val
=
true
;
384
dbrow
.
pushBack
(
new_cell
);
385
}
else
{
386
if
(++
j
==
ignored_size
) {
387
for
(++
i
;
i
<
row_size
; ++
i
) {
388
const
DBCell
new_cell
(
this
->
_convert_
(
new_row
[
i
]));
389
if
(
new_cell
.
isMissing
())
has_missing_val
=
true
;
390
dbrow
.
pushBack
(
new_cell
);
391
}
392
}
393
}
394
}
395
}
396
397
IDatabaseTable
<
DBCell
,
ALLOC
>::
insertRow
(
std
::
move
(
dbrow
),
398
has_missing_val
?
IsMissing
::
True
399
:
IsMissing
::
False
);
400
}
401
402
403
// erase the content of the database, including the names of the variables
404
template
<
template
<
typename
>
class
ALLOC
>
405
void
RawDatabaseTable
<
ALLOC
>::
clear
() {
406
_ignored_cols_
.
clear
();
407
IDatabaseTable
<
DBCell
,
ALLOC
>::
clear
();
408
}
409
410
411
}
/* namespace learning */
412
413
}
/* namespace gum */
414
415
#
endif
/* DOXYGEN_SHOULD_SKIP_THIS */
gum::Set::emplace
INLINE void emplace(Args &&... args)
Definition:
set_tpl.h:643
gum::learning::genericBNLearner::Database::Database
Database(const std::string &filename, const BayesNet< GUM_SCALAR > &bn, const std::vector< std::string > &missing_symbols)
Definition:
genericBNLearner_tpl.h:31