-: 0:Source:CSV.cpp -: 0:Graph:CSV.gcno -: 0:Data:CSV.gcda -: 0:Runs:1 -: 0:Programs:1 -: 1:// CSV.cpp (C) 2008 adolfo@di-mare.com -: 2: -: 3:#ifdef English_dox -: 4:/** \file CSV.cpp -: 5: \brief Implementation for \c CSV.h. -: 6: \author Adolfo Di Mare -: 7: \date 2008 -: 8:*/ -: 9:#endif -: 10: -: 11:#ifdef Spanish_dox -: 12:/** \file CSV.cpp -: 13: \brief Implementación para \c CSV.h. -: 14: \author Adolfo Di Mare -: 15: \date 2008 -: 16:*/ -: 17:#endif -: 18: -: 19:#include "CSV.h" -: 20: -: 21:#define COMMA ',' -: 22:#define DQUOTE '"' -: 23:#define LF '\n' // Line Feed -: 24:#define CR '\r' // Carriage Return -: 25: -: 26: -: 27: -: 28:// Actions for the finite automaton used to parse CSV input -: 29:// ======================================================== -: 30:// [ ] ==> n=0; i=0; DATA[0] = ""; -: 31:// [ csv="" ] ==> ++n; DATA[n] = ""; ++i; -: 32:// [ ] ==> ++i; -: 33:// [ h+= ] ==> DATA[n] += str[i]; ++i; -: 34:// [ h='""' ] ==> DATA[n] = """"; ++i; -: 35:// [ END ] ==> return n; -: 36:// -: 37:// | ',' '\n' | '"' | l | -: 38:// delta() | comma+LF | d-quote | letter | -: 39:// ----------+------------+------------+------------+ -: 40:// ==> 0 | 0 | 1 | 3 | -: 41:// init | return | | csv+=ch | -: 42:// ----------+------------+------------+------------+ -: 43:// 1 | 1 | 2 | 1 | -: 44:// quoted(1)| csv+=ch | | csv+=ch | -: 45:// ----------+------------+------------+------------+ -: 46:// 2 | 0 | 1 | 3 | -: 47:// inquote(2)| return | csv+=ch | csv='""' | -: 48:// ----------+------------+------------+------------+ -: 49:// 3 | 0 | 3 | 3 | -: 50:// regular | return | csv+=ch | csv+=ch | -: 51:// ----------+------------+------------+------------+ -: 52: -: 53:#ifdef English_dox -: 54:/** Scans input stream \c CIN and returns the next CSV value. -: 55: - The retrieved value from \c CIN gets stored into \c csv. -: 56: - Works with \c char, not tested for \c wchar_t. -: 57: - Stops when \c CIN.fail() or when \c CIN.eof(). -: 58: - Will not remove any chars from the retrieved value. -: 59: -: 60: \return true when the CSV complies with RFC-4180. -: 61:*/ -: 62:#endif -: 63:#ifdef Spanish_dox -: 64:/** Obtiene del flujo de entrada \c CIN el siguiente valor CSV. -: 65: - El valor obtenido de \c CIN queda almacenado en \c csv. -: 66: - Trabaja bien con \c char, no ha sido probado para \c wchar_t. -: 67: - Para cuando \c CIN.fail() o cuando \c CIN.eof(). -: 68: - No elimina ningún caracter del valor obtenido. -: 69: -: 70: \return true cuando el campo CSV sigue la especificación RFC-4180. -: 71:*/ -: 72:#endif function _Z11automataCSVRSsRSi called 96 returned 100% blocks executed 94% 96: 73:bool automataCSV( std::string& csv, std::istream& CIN ) { 96: 74: csv.clear(); 96: 75: if ( CIN.fail() || CIN.eof() ) { // see http://www.horstmann.com/cpp/pitfalls.html #####: 76: return false; -: 77: } 96: 78: int state=0; char ch; 96: 79: bool trailing_CR = false; // true when the last char was CR 96: 80: bool ret_val = true; // true while csv complies with RFC-4180 540: 81: for (;;) { 444: 82: CIN.get(ch); 444: 83: if ( CIN.fail() || CIN.eof() ) { 15: 84: return ret_val; -: 85: } 429: 86: csv += ch; -: 87: 429: 88: switch (state) { -: 89: case 0: { // init 93: 90: if ( ch == COMMA ) { 11: 91: return ret_val; -: 92: } 82: 93: else if ( ch == LF ) { 4: 94: return ret_val; -: 95: } 78: 96: else if ( ch == CR ) { 1: 97: trailing_CR = true; 1: 98: state = 3; -: 99: } 77: 100: else if ( ch == DQUOTE ) { // | ',' '\n' | '"' | l | 26: 101: state = 1; // delta() | comma+LF | d-quote | letter | -: 102: } // ----------+------------+------------+------------+ -: 103: else { // letter // ==> 0 | 0 | 1 | 3 | 51: 104: state = 3; // init | return | | csv+=ch | -: 105: } // ----------+------------+------------+------------+ -: 106: } 51: 107: break; -: 108: -: 109: case 1: { // quote(1) 113: 110: if ( ch == DQUOTE ) { // | ',' '\n' | '"' | l | 31: 111: state = 2; // delta() | comma+LF | d-quote | letter | -: 112: } // ----------+------------+------------+------------+ -: 113: // else { // letter COMMA LF // 1 | 1 | 2 | 1 | -: 114: // state = 1; // quoted(1)| csv+=ch | | csv+=ch | -: 115: // } // ----------+------------+------------+------------+ -: 116: } 31: 117: break; -: 118: -: 119: case 2: { // inquote(2) 30: 120: if ( ch == COMMA ) { -: 121: // state = 0; 15: 122: return ret_val; 15: 123: } else if ( ch == LF ) { -: 124: // state = 0; 4: 125: return ret_val; -: 126: } 11: 127: else if ( trailing_CR ) { // ["...""..."\r?...,] '?' after '\r' 1: 128: trailing_CR = false; 1: 129: ret_val = false; 1: 130: state = 3; -: 131: } 10: 132: else if ( ch == CR ) { 3: 133: trailing_CR = true; -: 134: // state = 2; -: 135: } 7: 136: else if ( ch == DQUOTE ) { // | ',' '\n' | '"' | l | 5: 137: state = 1; // delta() | comma+LF | d-quote | letter | -: 138: } // ----------+------------+------------+------------+ -: 139: else { // letter (error) // 2 | 0 | 1 | 3 | 2: 140: ret_val = false; // inquote(2)| return | csv+=ch | csv='""' | 2: 141: state = 3; // ----------+------------+------------+------------+ -: 142: } // [," ... "" "3x,] ==> error condition ["3] -: 143: } 2: 144: break; -: 145: -: 146: case 3: { // regular 193: 147: if ( ch == COMMA ) { -: 148: // state = 0; 38: 149: return ret_val; -: 150: } 155: 151: else if ( ch == LF ) { -: 152: // state = 0; 9: 153: return ret_val; -: 154: } // | ',' '\n' | '"' | l | -: 155: else { // letter // delta() | comma+LF | d-quote | letter | -: 156: // state = 3; // ----------+------------+------------+------------+ -: 157: // swallows DQUOTE's && CR's // 3 | 0 | 3 | 3 | -: 158: } // regular | return | csv+=ch | csv+=ch | -: 159: } // ----------+------------+------------+------------+ 96: 160: break; -: 161: -: 162: } // swith (state) -: 163: } // for (;;) -: 164: 96: 165: return ret_val; -: 166:} -: 167: -: 168:void singleDQUOTE( std::string & str ); -: 169: function _Z10getNextCSVRSsRSi called 96 returned 100% blocks executed 100% 96: 170:bool getNextCSV( std::string& csv, std::istream& CIN ) { 96: 171: bool correct = automataCSV( csv, CIN ); 96: 172: bool ret_val = false; // true if ( csv[ csv.length()-1 ] == LF ) 96: 173: size_t N = csv.length(); // number of retrieved chars -: 174: 96: 175: if ( correct ) { 93: 176: if ( csv.empty() ) { 3: 177: return ret_val; -: 178: } 90: 179: N--; // last char 90: 180: if ( csv[N] == COMMA ) { 61: 181: csv.erase(N); // chop() trailing comma -: 182: } 29: 183: else if( csv[N] == LF ) { 17: 184: ret_val = true; 17: 185: csv.erase(N); // chop() trailing LF 17: 186: if ( N>0 ) { 13: 187: N--; 13: 188: if( csv[N] == CR ) { 9: 189: csv.erase(N); // chop() trailing CR -: 190: } -: 191: } -: 192: } -: 193: 90: 194: if ( ! csv.empty() ) { 74: 195: if ( csv[0] == DQUOTE ) { 23: 196: singleDQUOTE( csv ); // transfrom [""] ==> ["] -: 197: } -: 198: } -: 199: } -: 200: -: 201: else { // assert( correct == false ); 3: 202: if ( N>0 ) { 3: 203: N--; // last char 3: 204: if ( csv[N] == COMMA ) { 3: 205: csv.erase(N,1); // removes trailing comma -: 206: } -: 207: } -: 208: } 93: 209: return ret_val; -: 210:} -: 211: function _Z12setQuotedCSVRSsRKSs called 26 returned 100% blocks executed 85% 26: 212:void setQuotedCSV( std::string& res , const std::string& value ) { 26: 213: std::string::const_iterator ch; 26: 214: bool quote_surround = false; 26: 215: res.clear(); 126: 216: for ( ch = value.begin(); ch != value.end(); ++ch ) { 100: 217: if ( isspace( *ch ) || *ch == COMMA ) { 23: 218: quote_surround = true; -: 219: } 77: 220: else if ( *ch == DQUOTE ) { 6: 221: res += DQUOTE; 6: 222: quote_surround = true; -: 223: } 100: 224: res += *ch; -: 225: } -: 226: 26: 227: if ( quote_surround ) { 17: 228: res = DQUOTE + res + DQUOTE; -: 229: } -: 230:} -: 231: function _Z4trimRSs called 15 returned 100% blocks executed 88% 15: 232:void trim( std::string & str ) { 15: 233: if ( str.empty() ) { // already trimmed 1: 234: return; -: 235: } -: 236: -: 237: // find in-string range of chars str[i->j] 14: 238: std::string::size_type i = 0, LEN = str.length(); 32: 239: while ( i < LEN ) { 29: 240: if ( isspace(str[i]) ) { // trim traling whitespace 18: 241: ++i; -: 242: } -: 243: else { 14: 244: break; -: 245: } -: 246: } -: 247: 14: 248: std::string::size_type j = LEN; 36: 249: while ( j > 0 ) { 33: 250: --j; 33: 251: if ( ! isspace(str[j]) ) { 14: 252: break; -: 253: } -: 254: } -: 255: // leave out leading and trailing whitespace 14: 256: str = str.substr(i,j-i+1); -: 257:} -: 258: function _Z7trimCSVRSs called 9 returned 100% blocks executed 100% 9: 259:void trimCSV( std::string & str ) { 9: 260: trim( str ); // 1) trim() 9: 261: if ( str.empty() ) { 2: 262: return; -: 263: } -: 264: -: 265: // D-Quoted??? 7: 266: std::string::size_type N = str.length()-1; 7: 267: if ( str[0] != DQUOTE || str[N] != DQUOTE ) { 5: 268: return; -: 269: } -: 270: -: 271: // Substitute each double DQUOTE's by a single DQUOTE 5: 272: singleDQUOTE( str ); 9: 273: return; -: 274:} -: 275: -: 276: -: 277:#ifdef English_dox -: 278:/// Substitute each double DQUOTE's by a single DQUOTE within \c str. -: 279:#endif -: 280:#ifdef Spanish_dox -: 281:/// Sustituey cada letra DQUOTE doble por una solaletra DQUOTE en \c str. -: 282:#endif function _Z12singleDQUOTERSs called 28 returned 100% blocks executed 85% 28: 283:void singleDQUOTE( std::string & str ) { -: 284: // Substitute each double DQUOTE's by a single DQUOTE 28: 285: std::string tmp; 28: 286: std::string::const_iterator from, next; 158: 287: for ( from = str.begin(); from != str.end(); ++from ) { 156: 288: tmp.push_back( *from ); 156: 289: if ( *from == DQUOTE ) { // already copied the first 60: 290: next = from; next++; 60: 291: if ( next == str.end() ) { 26: 292: break; -: 293: } 34: 294: else if ( *next == DQUOTE ) { 8: 295: from = next; // don´t copy the second DQUOTE -: 296: } -: 297: } -: 298: } -: 299: // Removed enclosing (outermost) DQUOTE's 28: 300: str = tmp.substr(1, tmp.length()-2); 28: 301: return; -: 302:} -: 303: function _Z4chopRSsc called 7 returned 100% blocks executed 100% 7: 304:void chop( std::string & str , char ch ) { 7: 305: if ( str.empty() ) { // nothing to chop 1: 306: return; -: 307: } 6: 308: std::string::size_type N = str.length()-1; 6: 309: if ( str[N] == ch ) { 5: 310: str.erase(N); // removed if it's the last -: 311: } -: 312:} -: 313: -: 314: -: 315:#if 0 -: 316: -: 317:/// Test ==> \c rebuildDquote(). -: 318:void test_CSV::test_rebuildDquote() { -: 319: void rebuildDquote( std::string & str ); -: 320: {{ // test::rebuildDquote() -: 321: std::string s; -: 322: s = "\"" ; rebuildDquote(s); // ["] ==> [""] -: 323: assertTrue( s == "\"\""); -: 324: s = "\" \" \"" ; rebuildDquote(s); // [" " "] ==> ["" "" ""] -: 325: assertTrue( s == "\"\" \"\" \"\""); -: 326: s = "3,4\"" ; rebuildDquote(s); // [3,4"] ==> [3,4""] -: 327: assertTrue( s == "3,4\"\""); -: 328: s = " ," ; rebuildDquote(s); // [ ,] ==> [ ,] -: 329: assertTrue( s == " ,"); -: 330: }} -: 331: { // A61196-A76944 -: 332: std::string s = "\"2\",3, \r\n"; // ["2",3, \r\n] -: 333: rebuildDquote(s); -: 334: assertTrue( s == "\"\"2\"\",3, \r\n"); // ["2",3, \r\n] ==> [""2"",3, \r\n] -: 335: } -: 336:} -: 337: -: 338:#ifdef English_dox -: 339:/** Scans \c str substituting \c '"' by 2 double-quotes \c [""]. -: 340: - Local routine used in the implementation of \c getNextCSV(). -: 341: -: 342: \dontinclude test_CSV.cpp -: 343: \skipline test::rebuildDquote() -: 344: \until }} -: 345: \see test_CSV::test_rebuildDquote() -: 346:*/ -: 347:#endif -: 348:#ifdef Spanish_dox -: 349:/** Sustituye en \c str cada comilla doble \c '"' por 2 comillas dobles \c [""]. -: 350: - Rutina local useda en la implementación de \c getNextCSV(). -: 351: -: 352: \dontinclude test_CSV.cpp -: 353: \skipline test::rebuildDquote() -: 354: \until }} -: 355: \see test_CSV::test_rebuildDquote() -: 356:*/ -: 357:#endif -: 358:void rebuildDquote( std::string & str ) { -: 359: std::string res; -: 360: std::string::const_iterator ch; -: 361: for ( ch = str.begin(); ch != str.end(); ++ch ) { -: 362: res += *ch; -: 363: if ( *ch == DQUOTE ) { -: 364: res += DQUOTE; -: 365: } -: 366: } -: 367: str = res; -: 368:} -: 369: -: 370:bool getNextCSV_OLD( std::string& csv, std::istream& CIN ) { -: 371: csv.clear(); -: 372: if ( CIN.fail() ) { // see http://www.horstmann.com/cpp/pitfalls.html -: 373: return false; -: 374: } -: 375: int state=0; char ch; -: 376: bool trailing_CR = false; // true when the last char was CR -: 377: for (;;) { -: 378: CIN.get(ch); -: 379: if ( CIN.fail() ) { -: 380: return false; -: 381: } -: 382: -: 383: switch (state) { -: 384: case 0: { // init -: 385: if ( ch == COMMA ) { -: 386: // csv += COMMA; // removes COMMA from result string -: 387: return false; -: 388: } -: 389: else if ( ch == LF ) { -: 390: // csv += LF; // removes LF from result string -: 391: return true; -: 392: } -: 393: else if ( ch == CR ) { -: 394: trailing_CR = true; -: 395: csv += CR; -: 396: state = 3; -: 397: } -: 398: else if ( ch == DQUOTE ) { // | ',' '\n' | '"' | l | -: 399: state = 1; // delta() | comma+LF | d-quote | letter | -: 400: } // ----------+------------+------------+------------+ -: 401: else { // letter // ==> 0 | 0 | 1 | 3 | -: 402: csv += ch; // init | return | | csv+=ch | -: 403: state = 3; // ----------+------------+------------+------------+ -: 404: } -: 405: } -: 406: break; -: 407: -: 408: case 1: { // quote(1) -: 409: if ( ch == DQUOTE ) { // | ',' '\n' | '"' | l | -: 410: state = 2; // delta() | comma+LF | d-quote | letter | -: 411: } // ----------+------------+------------+------------+ -: 412: else { // letter COMMA LF // 1 | 1 | 2 | 1 | -: 413: csv += ch; // quoted(1)| csv+=ch | | csv+=ch | -: 414: // state = 1; // ----------+------------+------------+------------+ -: 415: } -: 416: } -: 417: break; -: 418: -: 419: case 2: { // inquote(2) -: 420: if ( ch == COMMA ) { -: 421: // state = 0; -: 422: return false; -: 423: } else if ( ch == LF ) { -: 424: // state = 0; -: 425: return true; -: 426: } -: 427: else if ( trailing_CR ) { // ["...""..."\r?...,] '?' after '\r' -: 428: rebuildDquote( csv ); -: 429: csv = DQUOTE + csv + DQUOTE + CR + ch; -: 430: trailing_CR = false; -: 431: state = 3; -: 432: } -: 433: else if ( ch == CR ) { // removes CR+LF at the end of line -: 434: trailing_CR = true; -: 435: // csv += CR; // removes trailing CR+LF -: 436: // state = 2; -: 437: } -: 438: else if ( ch == DQUOTE ) { // | ',' '\n' | '"' | l | -: 439: csv += DQUOTE; // delta() | comma+LF | d-quote | letter | -: 440: state = 1; // ----------+------------+------------+------------+ -: 441: } // 2 | 0 | 1 | 3 | -: 442: else { // letter (error) // inquote(2)| return | csv+=ch | csv='""' | -: 443: rebuildDquote( csv ); // ----------+------------+------------+------------+ -: 444: csv= DQUOTE + csv + DQUOTE + ch; // [," ... "" "3x,] ==> error condition ["3] -: 445: state = 3; // [" ... "" "3] ==> rebuilt value -: 446: } -: 447: } -: 448: break; -: 449: -: 450: case 3: { // regular -: 451: if ( ch == COMMA ) { -: 452: return false; -: 453: } else if ( ch == LF ) { -: 454: if ( trailing_CR ) { -: 455: csv = csv.substr( 0, csv.length()-1 ); // chop( csv , CR ); -: 456: } -: 457: // state = 0; -: 458: // csv += LF; -: 459: return true; -: 460: } -: 461: else if ( ch == CR ) { // leaves CR at the end -: 462: trailing_CR = true; // mark to remove later -: 463: csv += CR; -: 464: // state = 3; -: 465: } // | ',' '\n' | '"' | l | -: 466: else { // letter // delta() | comma+LF | d-quote | letter | -: 467: csv += ch; // ----------+------------+------------+------------+ -: 468: // state = 3; // 3 | 0 | 3 | 3 | -: 469: // swallows DQUOTE's && CR's // regular | return | csv+=ch | csv+=ch | -: 470: } // ----------+------------+------------+------------+ -: 471: } -: 472: break; -: 473: -: 474: } // swith (state) -: 475: } // for (;;) -: 476: -: 477: return false; -: 478:} -: 479: -: 480:#endif -: 481: -: 482:#ifdef English_dox -: 483:/// Comma Separated Value (not used in this implementation). -: 484:#endif -: 485:#ifdef Spanish_dox -: 486:/// Comma Separated Value (no usado en esta implementación). -: 487:#endif -: 488:namespace csv { } // trick to include it into the Doxygen documentation -: 489: -: 490:// Trick to force Doxygen to document these. -: 491:// - They are at the end of file to avoid trouble. -: 492:using namespace std; function _Z41__static_initialization_and_destruction_0ii called 2 returned 100% blocks executed 100% 2: 493:using namespace csv; function _GLOBAL__I__Z11automataCSVRSsRSi called 1 returned 100% blocks executed 100% 2: 494: function _GLOBAL__D__Z11automataCSVRSsRSi called 1 returned 100% blocks executed 100% 2: 495:// EOF: CSV.cpp