aGrUM  0.20.3
a C++ library for (probabilistic) graphical models
CSVParser.h
Go to the documentation of this file.
1 /***************************************************************************
2  * Copyright (c) 2005-2020 by Christophe GONZALES(@AMU) and Pierre-Henri WUILLEMIN(@LIP6) *
3  * info_at_agrum_dot_org *
4  * *
5  * This program is free software; you can redistribute it and/or modify *
6  * it under the terms of the GNU General Public License as published by *
7  * the Free Software Foundation; either version 2 of the License, or *
8  * (at your option) any later version. *
9  * *
10  * This program is distributed in the hope that it will be useful, *
11  * but WITHOUT ANY WARRANTY; without even the implied warranty of *
12  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the *
13  * GNU General Public License for more details. *
14  * *
15  * You should have received a copy of the GNU General Public License *
16  * along with this program; if not, write to the *
17  * Free Software Foundation, Inc., *
18  * 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. *
19  ***************************************************************************/
20 /** @file
21  * @brief Class for fast parsing of CSV file (never more than one line in
22  * application memory)
23  *
24  * Typical use :
25  * @code
26  * // open the CSV file
27  * std::string filename="foo.csv"
28  * std::ifstream in(filename.c_str());
29  * gum::learning::CSVParser csvp(in);
30  *
31  * // read each line in the CSV file
32  * while (csvp.next()) {
33  * csvp.current ();
34  * }
35  *
36  * in.close();
37  * @endcode
38  *
39  * @author Pierre-Henri WUILLEMIN(@LIP6) & Christophe GONZALES(@AMU)
40  *
41  */
42 
43 #ifndef GUM_CSV_PARSER_H
44 #define GUM_CSV_PARSER_H
45 
46 #include <istream>
47 #include <string>
48 #include <vector>
49 
50 #include <agrum/agrum.h>
51 
52 namespace gum {
53 
54  namespace learning {
55 
56  /** @class CSVParser
57  * @ingroup learning_database
58  * @headerfile CSVParser.h <agrum/tools/database/CSVParser.h>
59  * @brief Class for fast parsing of CSV file (never more than one
60  * line in application memory)
61  *
62  * Typical use:
63  * @code
64  * // open the CSV file
65  * std::string filename="foo.csv"
66  * std::ifstream in(filename.c_str());
67  * gum::learning::CSVParser<> csvp(in);
68  *
69  * // read each line in the CSV file
70  * while (csvp.next()) {
71  * csvp.current ();
72  * }
73  *
74  * in.close();
75  * @endcode
76  */
77  template <template<typename> class ALLOC = std::allocator>
78  class CSVParser {
79  public:
80 
81  /// type for the allocators passed in arguments of methods
82  using allocator_type = ALLOC<std::string>;
83 
84 
85  // ##########################################################################
86  /// @name Constructors / Destructors
87  // ##########################################################################
88  /// @{
89 
90  /// default constructor
91  /** @param in an input stream containing the CSV
92  * @param delimiter the character that acts as the column separator in
93  * the CSV
94  * @param commentmarker the character that marks the beginning of a comment
95  * @param quoteMarker the character that is used to quote the sentences
96  * in the CSV
97  * @param alloc the allocator used by all the methods
98  */
99  CSVParser( std::istream& in,
100  const std::string& delimiter = ",",
101  const char commentmarker = '#',
102  const char quoteMarker = '"',
103  const allocator_type& alloc = allocator_type () );
104 
105  /// destructor
106  virtual ~CSVParser();
107 
108  /// @}
109 
110 
111  // ########################################################################
112  /// @name Accessors / Modifiers
113  // ########################################################################
114  /// @{
115 
116  /// gets the next line of the csv stream and parses it
117  /** @return false if there is no next line
118  */
119  bool next();
120 
121  /// returns the current parsed line
122  /** @throw NullElement is raised if there is no data
123  */
124  const std::vector<std::string,ALLOC<std::string>>& current() const;
125 
126  /// returns the current line number within the stream
127  const std::size_t nbLine() const;
128 
129  /// reopens a new input stream to parse
130  void useNewStream ( std::istream& in,
131  const std::string& delimiter = ",",
132  const char commentmarker = '#',
133  const char quoteMarker = '"' );
134 
135  /// @}
136 
137 
138 #ifndef DOXYGEN_SHOULD_SKIP_THIS
139 
140  private:
141  void _getNextTriplet_( const std::string& str,
142  std::size_t& first_letter_token,
143  std::size_t& next_token,
144  std::size_t& last_letter_token,
145  std::size_t from ) const;
146 
147  void _tokenize_( const std::string& str );
148 
149  std::size_t _correspondingQuoteMarker_( const std::string& str,
150  std::size_t pos ) const;
151 
152 
153  std::string _line_;
154  std::string _delimiter_;
155  std::string _spaces_;
156  std::string _delimiterPlusSpaces_;
157  std::size_t _nbLine_;
158  char _commentMarker_;
159  char _quoteMarker_;
160  bool _emptyData_;
161 
162  std::istream* _instream_;
163  std::vector<std::string,ALLOC<std::string>> _data_;
164 
165 #endif /* DOXYGEN_SHOULD_SKIP_THIS */
166 
167  };
168 
169  } // namespace learning
170 
171 } // namespace gum
172 
173 #include <agrum/tools/database/CSVParser_tpl.h>
174 
175 #endif // GUM_CSV_PARSER_H