29 #ifndef DOXYGEN_SHOULD_SKIP_THIS 36 template <
template<
typename>
class ALLOC>
38 std::istream& instream,
39 const std::string& delimiter,
40 const char commentmarker,
41 const char quoteMarker,
44 , __delimiter( delimiter )
46 , __delimiterPlusSpaces( __delimiter + __spaces )
47 , __nbLine(
std::size_t(0) )
48 , __commentMarker( commentmarker )
49 , __quoteMarker( quoteMarker )
51 , __instream( &instream )
58 template <
template<
typename>
class ALLOC>
72 template <
template<
typename>
class ALLOC>
73 void CSVParser<ALLOC>::__getNextTriplet(
const std::string& str,
74 std::size_t& first_letter_token,
75 std::size_t& next_token,
76 std::size_t& last_letter_token,
77 std::size_t from )
const {
78 first_letter_token = str.find_first_not_of( __spaces, from );
80 if ( first_letter_token == std::string::npos ) {
81 next_token = last_letter_token = first_letter_token;
85 if ( str.at( first_letter_token ) == __quoteMarker ) {
86 last_letter_token = __correspondingQuoteMarker( str, first_letter_token );
88 if ( last_letter_token == std::string::npos )
91 next_token = str.find_first_of( __delimiter, last_letter_token + 1 );
92 std::size_t next_char =
93 str.find_first_not_of( __spaces, last_letter_token + 1 );
95 if ( next_char < next_token ) {
100 next_token = str.find_first_of( __delimiter, first_letter_token );
102 if ( next_token == std::string::npos ) {
103 last_letter_token = str.find_last_not_of( __spaces, next_token );
105 else if ( next_token == first_letter_token ) {
106 last_letter_token = first_letter_token;
110 str.find_last_not_of( __delimiterPlusSpaces, next_token - 1 );
116 template <
template<
typename>
class ALLOC>
117 void CSVParser<ALLOC>::__tokenize(
const std::string& s ) {
119 std::size_t commentMarker = s.find_first_of( __commentMarker, 0 );
120 std::size_t quoteMarker = s.find_first_of( __quoteMarker, 0 );
121 std::size_t quoteMarkerEnd;
123 while ( quoteMarker < commentMarker ) {
124 quoteMarkerEnd = __correspondingQuoteMarker( s, quoteMarker );
126 if ( quoteMarkerEnd == std::string::npos )
129 while ( commentMarker < quoteMarkerEnd ) {
130 commentMarker = s.find_first_of( __commentMarker, commentMarker + 1 );
133 quoteMarker = s.find_first_of( __quoteMarker, quoteMarkerEnd + 1 );
136 std::string str = s.substr( 0, commentMarker );
138 std::size_t counter = 0, first_letter_token, next_token, last_letter_token;
141 str, first_letter_token, next_token, last_letter_token, 0 );
143 while ( ( std::string::npos != first_letter_token ) &&
144 ( std::string::npos != last_letter_token ) ) {
145 if ( __data.size() <= counter ) __data.resize( counter + 1 );
147 if ( first_letter_token == next_token ) {
148 __data[counter] =
"";
150 else if ( last_letter_token >= first_letter_token ) {
151 const std::size_t fieldlength =
152 last_letter_token + 1 - first_letter_token;
153 __data[counter].resize( fieldlength );
154 __data[counter].assign( str, first_letter_token, fieldlength );
157 __data[counter] =
"";
162 if ( next_token == std::string::npos )
break;
164 __getNextTriplet( str,
172 if ( ( first_letter_token == std::string::npos ) &&
173 ( last_letter_token == first_letter_token ) &&
174 ( next_token == first_letter_token ) ) {
176 __data.resize( counter );
177 __data[counter - 1] =
"";
180 __data.resize( counter );
188 template <
template<
typename>
class ALLOC>
190 const std::string& delimiter,
191 const char commentmarker,
192 const char quoteMarker ) {
194 __delimiter = delimiter;
196 __delimiterPlusSpaces = __delimiter + __spaces;
197 __nbLine = std::size_t(0);
198 __commentMarker = commentmarker;
199 __quoteMarker = quoteMarker;
201 __instream = &instream;
207 template <
template<
typename>
class ALLOC>
209 while ( getline( *__instream, __line ) ) {
212 if ( __line.size() == std::size_t (0) )
continue;
215 std::size_t lastPos =
216 __line.find_first_not_of( __spaces, std::size_t(0) );
218 if ( lastPos == std::string::npos )
continue;
220 if ( __line.at( lastPos ) == __commentMarker )
continue;
222 __tokenize( __line );
231 template <
template<
typename>
class ALLOC>
233 CSVParser<ALLOC>::__correspondingQuoteMarker(
const std::string& str,
234 std::size_t pos )
const {
235 std::size_t res = pos, before;
238 res = str.find_first_of( __quoteMarker, res + 1 );
240 if ( res == std::string::npos )
return res;
242 before = str.find_last_not_of(
'\\', res - 1 );
244 if ( before == std::string::npos )
247 if ( ( res - before ) % 2 == 1 )
255 template <
template<
typename>
class ALLOC>
256 INLINE
const std::vector<std::string,ALLOC<std::string>>&
259 GUM_ERROR( NullElement,
"No parsed data" );
266 template <
template<
typename>
class ALLOC>
269 GUM_ERROR( NullElement,
"No parsed data" );
void useNewStream(std::istream &in, const std::string &delimiter=",", const char commentmarker='#', const char quoteMarker='"' )
reopens a new input stream to parse
const std::size_t nbLine() const
returns the current line number within the stream
ALLOC< std::string > allocator_type
type for the allocators passed in arguments of methods
#define GUM_SYNTAX_ERROR(msg, line, column)
Copyright 2005-2019 Pierre-Henri WUILLEMIN et Christophe GONZALES (LIP6) {prenom.nom}_at_lip6.fr.
CSVParser(std::istream &in, const std::string &delimiter=",", const char commentmarker='#', const char quoteMarker='"', const allocator_type& alloc = allocator_type () )
default constructor
bool next()
gets the next line of the csv stream and parses it
virtual ~CSVParser()
destructor
const std::vector< std::string, ALLOC< std::string > > & current() const
returns the current parsed line
std::size_t Size
In aGrUM, hashed values are unsigned long int.
#define GUM_ERROR(type, msg)
Class for fast parsing of CSV file (never more than one line in application memory) ...