-:    0:Source:CSV.cpp
        -:    0:Graph:CSV.gcno
        -:    0:Data:CSV.gcda
        -:    0:Runs:1
        -:    0:Programs:1
        -:    1:// CSV.cpp  (C) 2008 adolfo@di-mare.com
        -:    2:
        -:    3:#ifdef English_dox
        -:    4:/** \file   CSV.cpp
        -:    5:    \brief  Implementation for \c CSV.h.
        -:    6:    \author Adolfo Di Mare <adolfo@di-mare.com>
        -:    7:    \date   2008
        -:    8:*/
        -:    9:#endif
        -:   10:
        -:   11:#ifdef Spanish_dox
        -:   12:/** \file   CSV.cpp
        -:   13:    \brief  Implementación para \c CSV.h.
        -:   14:    \author Adolfo Di Mare <adolfo@di-mare.com>
        -:   15:    \date   2008
        -:   16:*/
        -:   17:#endif
        -:   18:
        -:   19:#include "CSV.h"
        -:   20:
        -:   21:#define COMMA  ','
        -:   22:#define DQUOTE '"'
        -:   23:#define LF '\n' // Line Feed
        -:   24:#define CR '\r' // Carriage Return
        -:   25:
        -:   26:
        -:   27:
        -:   28://    Actions for the finite automaton used to parse CSV input
        -:   29://    ========================================================
        -:   30://    [              ] ==> n=0; i=0; DATA[0] = "";
        -:   31://    [   csv=""     ] ==> ++n; DATA[n] = ""; ++i;
        -:   32://    [              ] ==> ++i;
        -:   33://    [   h+=        ] ==> DATA[n] += str[i]; ++i;
        -:   34://    [  h='""'      ] ==> DATA[n] = """"; ++i;
        -:   35://    [     END      ] ==> return n;
        -:   36://
        -:   37://            |  ',' '\n'  |    '"'     |     l      |
        -:   38://    delta() |  comma+LF  |  d-quote   |   letter   |
        -:   39://  ----------+------------+------------+------------+
        -:   40://   ==>  0   |     0      |     1      |     3      |
        -:   41://       init |   return   |            |  csv+=ch   |
        -:   42://  ----------+------------+------------+------------+
        -:   43://        1   |     1      |     2      |     1      |
        -:   44://   quoted(1)|  csv+=ch   |            |  csv+=ch   |
        -:   45://  ----------+------------+------------+------------+
        -:   46://        2   |     0      |     1      |     3      |
        -:   47://  inquote(2)|   return   |  csv+=ch   |  csv='""'  |
        -:   48://  ----------+------------+------------+------------+
        -:   49://        3   |     0      |     3      |     3      |
        -:   50://    regular |   return   |  csv+=ch   |  csv+=ch   |
        -:   51://  ----------+------------+------------+------------+
        -:   52:
        -:   53:#ifdef English_dox
        -:   54:/** Scans input stream \c CIN and returns the next CSV value.
        -:   55:    - The retrieved value from \c CIN gets stored into \c csv.
        -:   56:    - Works with \c char, not tested for \c wchar_t.
        -:   57:    - Stops when \c CIN.fail() or when \c CIN.eof().
        -:   58:    - Will not remove any chars from the retrieved value.
        -:   59:
        -:   60:    \return true when the CSV complies with RFC-4180.
        -:   61:*/
        -:   62:#endif
        -:   63:#ifdef Spanish_dox
        -:   64:/** Obtiene del flujo de entrada \c CIN el siguiente valor CSV.
        -:   65:    - El valor obtenido de \c CIN queda almacenado en \c csv.
        -:   66:    - Trabaja bien con \c char, no ha sido probado para \c wchar_t.
        -:   67:    - Para cuando \c CIN.fail() o cuando \c CIN.eof().
        -:   68:    - No elimina ningún caracter del valor obtenido.
        -:   69:
        -:   70:    \return true cuando el campo CSV sigue la especificación RFC-4180.
        -:   71:*/
        -:   72:#endif
function _Z11automataCSVRSsRSi called 96 returned 100% blocks executed 94%
       96:   73:bool automataCSV( std::string& csv, std::istream& CIN ) {
       96:   74:    csv.clear();
       96:   75:    if ( CIN.fail() || CIN.eof() ) { // see http://www.horstmann.com/cpp/pitfalls.html
    #####:   76:        return false;
        -:   77:    }
       96:   78:    int state=0;  char ch;
       96:   79:    bool trailing_CR = false; // true when the last char was CR
       96:   80:    bool ret_val     = true;  // true while csv complies with RFC-4180
      540:   81:    for (;;) {
      444:   82:        CIN.get(ch);
      444:   83:        if ( CIN.fail() || CIN.eof() ) {
       15:   84:            return ret_val;
        -:   85:        }
      429:   86:        csv += ch;
        -:   87:
      429:   88:        switch (state) {
        -:   89:        case 0: { // init
       93:   90:                if ( ch == COMMA ) {
       11:   91:                    return ret_val;
        -:   92:                }
       82:   93:                else if ( ch == LF ) {
        4:   94:                    return ret_val;
        -:   95:                }
       78:   96:                else if ( ch == CR ) {
        1:   97:                    trailing_CR = true;
        1:   98:                    state = 3;
        -:   99:                }
       77:  100:                else if ( ch == DQUOTE ) {     //            |  ',' '\n'  |    '"'     |     l      |
       26:  101:                    state = 1;                 //    delta() |  comma+LF  |  d-quote   |   letter   |
        -:  102:                }                              //  ----------+------------+------------+------------+
        -:  103:                else { // letter               //   ==>  0   |     0      |     1      |     3      |
       51:  104:                    state = 3;                 //       init |   return   |            |  csv+=ch   |
        -:  105:                }                              //  ----------+------------+------------+------------+
        -:  106:            }
       51:  107:            break;
        -:  108:
        -:  109:        case 1: { // quote(1)
      113:  110:                if ( ch == DQUOTE ) {          //            |  ',' '\n'  |    '"'     |     l      |
       31:  111:                    state = 2;                 //    delta() |  comma+LF  |  d-quote   |   letter   |
        -:  112:                }                              //  ----------+------------+------------+------------+
        -:  113:            //  else { // letter COMMA LF      //        1   |     1      |     2      |     1      |
        -:  114:            //      state = 1;                 //   quoted(1)|  csv+=ch   |            |  csv+=ch   |
        -:  115:            //  }                              //  ----------+------------+------------+------------+
        -:  116:            }
       31:  117:            break;
        -:  118:
        -:  119:        case 2: { // inquote(2)
       30:  120:                if ( ch == COMMA ) {
        -:  121:                //  state = 0;
       15:  122:                    return ret_val;
       15:  123:                } else if ( ch == LF ) {
        -:  124:                //  state = 0;
        4:  125:                    return ret_val;
        -:  126:                }
       11:  127:                else if ( trailing_CR ) { //  ["...""..."\r?...,] '?' after '\r'
        1:  128:                    trailing_CR = false;
        1:  129:                    ret_val = false;
        1:  130:                    state = 3;
        -:  131:                }
       10:  132:                else if ( ch == CR ) {
        3:  133:                    trailing_CR = true;
        -:  134:                //  state = 2;
        -:  135:                }
        7:  136:                else if ( ch == DQUOTE ) {     //            |  ',' '\n'  |    '"'     |     l      |
        5:  137:                    state = 1;                 //    delta() |  comma+LF  |  d-quote   |   letter   |
        -:  138:                }                              //  ----------+------------+------------+------------+
        -:  139:                else { // letter (error)       //        2   |     0      |     1      |     3      |
        2:  140:                    ret_val = false;           //  inquote(2)|   return   |  csv+=ch   |  csv='""'  |
        2:  141:                    state = 3;                 //  ----------+------------+------------+------------+
        -:  142:                }                              // [," ... "" "3x,] ==> error condition ["3]
        -:  143:            }
        2:  144:            break;
        -:  145:
        -:  146:        case 3: { // regular
      193:  147:                if ( ch == COMMA ) {
        -:  148:                //  state = 0;
       38:  149:                    return ret_val;
        -:  150:                }
      155:  151:                else if ( ch == LF ) {
        -:  152:                //  state = 0;
        9:  153:                    return ret_val;
        -:  154:                }                              //            |  ',' '\n'  |    '"'     |     l      |
        -:  155:                else { // letter               //    delta() |  comma+LF  |  d-quote   |   letter   |
        -:  156:                //  state = 3;                 //  ----------+------------+------------+------------+
        -:  157:                // swallows DQUOTE's && CR's   //        3   |     0      |     3      |     3      |
        -:  158:                }                              //    regular |   return   |  csv+=ch   |  csv+=ch   |
        -:  159:            }                                  //  ----------+------------+------------+------------+
       96:  160:            break;
        -:  161:
        -:  162:        } // swith (state)
        -:  163:    } // for (;;)
        -:  164:
       96:  165:    return ret_val;
        -:  166:}
        -:  167:
        -:  168:void singleDQUOTE( std::string & str );
        -:  169:
function _Z10getNextCSVRSsRSi called 96 returned 100% blocks executed 100%
       96:  170:bool getNextCSV( std::string& csv, std::istream& CIN ) {
       96:  171:    bool correct = automataCSV( csv, CIN );
       96:  172:    bool ret_val = false;    // true if ( csv[ csv.length()-1 ] == LF )
       96:  173:    size_t N = csv.length(); // number of retrieved chars
        -:  174:
       96:  175:    if ( correct ) {
       93:  176:        if ( csv.empty() ) {
        3:  177:            return ret_val;
        -:  178:        }
       90:  179:        N--; // last char
       90:  180:        if ( csv[N] == COMMA ) {
       61:  181:            csv.erase(N); // chop() trailing comma
        -:  182:        }
       29:  183:        else if( csv[N] == LF ) {
       17:  184:            ret_val = true;
       17:  185:            csv.erase(N); // chop() trailing LF
       17:  186:            if ( N>0 ) {
       13:  187:                N--;
       13:  188:                if( csv[N] == CR ) {
        9:  189:                    csv.erase(N); // chop() trailing CR
        -:  190:                }
        -:  191:            }
        -:  192:        }
        -:  193:
       90:  194:        if ( ! csv.empty() ) {
       74:  195:            if ( csv[0] == DQUOTE ) {
       23:  196:                singleDQUOTE( csv ); // transfrom [""] ==> ["]
        -:  197:            }
        -:  198:        }
        -:  199:    }
        -:  200:
        -:  201:    else {   // assert( correct == false );
        3:  202:        if ( N>0 ) {
        3:  203:            N--; // last char
        3:  204:            if ( csv[N] == COMMA ) {
        3:  205:                csv.erase(N,1); // removes trailing comma
        -:  206:            }
        -:  207:        }
        -:  208:    }
       93:  209:    return ret_val;
        -:  210:}
        -:  211:
function _Z12setQuotedCSVRSsRKSs called 26 returned 100% blocks executed 85%
       26:  212:void setQuotedCSV( std::string& res , const std::string& value ) {
       26:  213:    std::string::const_iterator ch;
       26:  214:    bool quote_surround = false;
       26:  215:    res.clear();
      126:  216:    for ( ch = value.begin(); ch != value.end(); ++ch ) {
      100:  217:        if ( isspace( *ch ) || *ch == COMMA ) {
       23:  218:            quote_surround = true;
        -:  219:        }
       77:  220:        else if ( *ch == DQUOTE ) {
        6:  221:            res += DQUOTE;
        6:  222:            quote_surround = true;
        -:  223:        }
      100:  224:        res += *ch;
        -:  225:    }
        -:  226:
       26:  227:    if ( quote_surround ) {
       17:  228:        res = DQUOTE + res + DQUOTE;
        -:  229:    }
        -:  230:}
        -:  231:
function _Z4trimRSs called 15 returned 100% blocks executed 88%
       15:  232:void trim( std::string & str ) {
       15:  233:    if ( str.empty() ) { // already trimmed
        1:  234:        return;
        -:  235:    }
        -:  236:
        -:  237:    // find in-string range of chars str[i->j]
       14:  238:    std::string::size_type i = 0, LEN = str.length();
       32:  239:    while ( i < LEN ) {
       29:  240:        if ( isspace(str[i]) ) { // trim traling whitespace
       18:  241:            ++i;
        -:  242:        }
        -:  243:        else {
       14:  244:            break;
        -:  245:        }
        -:  246:    }
        -:  247:
       14:  248:    std::string::size_type j = LEN;
       36:  249:    while ( j > 0 ) {
       33:  250:        --j;
       33:  251:        if ( ! isspace(str[j]) ) {
       14:  252:            break;
        -:  253:        }
        -:  254:    }
        -:  255:    // leave out leading and trailing whitespace
       14:  256:    str = str.substr(i,j-i+1);
        -:  257:}
        -:  258:
function _Z7trimCSVRSs called 9 returned 100% blocks executed 100%
        9:  259:void trimCSV( std::string & str ) {
        9:  260:    trim( str ); // 1) trim()
        9:  261:    if ( str.empty() )  {
        2:  262:        return;
        -:  263:    }
        -:  264:
        -:  265:    // D-Quoted???
        7:  266:    std::string::size_type N = str.length()-1;
        7:  267:    if ( str[0] != DQUOTE || str[N] != DQUOTE )  {
        5:  268:        return;
        -:  269:    }
        -:  270:
        -:  271:    // Substitute each double DQUOTE's by a single DQUOTE
        5:  272:    singleDQUOTE( str );
        9:  273:    return;
        -:  274:}
        -:  275:
        -:  276:
        -:  277:#ifdef English_dox
        -:  278:/// Substitute each double DQUOTE's by a single DQUOTE within \c str.
        -:  279:#endif
        -:  280:#ifdef Spanish_dox
        -:  281:/// Sustituey cada letra DQUOTE doble por una solaletra DQUOTE en \c str.
        -:  282:#endif
function _Z12singleDQUOTERSs called 28 returned 100% blocks executed 85%
       28:  283:void singleDQUOTE( std::string & str ) {
        -:  284:    // Substitute each double DQUOTE's by a single DQUOTE
       28:  285:    std::string tmp;
       28:  286:    std::string::const_iterator from, next;
      158:  287:    for ( from = str.begin(); from != str.end(); ++from ) {
      156:  288:        tmp.push_back( *from );
      156:  289:        if ( *from == DQUOTE ) { // already copied the first
       60:  290:            next = from; next++;
       60:  291:            if ( next == str.end() ) {
       26:  292:                break;
        -:  293:            }
       34:  294:            else if ( *next == DQUOTE ) {
        8:  295:                from = next; // don´t copy the second DQUOTE
        -:  296:            }
        -:  297:        }
        -:  298:    }
        -:  299:    // Removed enclosing (outermost) DQUOTE's
       28:  300:    str = tmp.substr(1, tmp.length()-2);
       28:  301:    return;
        -:  302:}
        -:  303:
function _Z4chopRSsc called 7 returned 100% blocks executed 100%
        7:  304:void chop( std::string & str , char ch ) {
        7:  305:    if ( str.empty() ) { // nothing to chop
        1:  306:        return;
        -:  307:    }
        6:  308:    std::string::size_type N = str.length()-1;
        6:  309:    if ( str[N] == ch ) {
        5:  310:        str.erase(N); // removed if it's the last
        -:  311:    }
        -:  312:}
        -:  313:
        -:  314:
        -:  315:#if 0
        -:  316:
        -:  317:/// Test ==> \c rebuildDquote().
        -:  318:void test_CSV::test_rebuildDquote() {
        -:  319:    void rebuildDquote( std::string & str );
        -:  320:    {{  // test::rebuildDquote()
        -:  321:        std::string s;
        -:  322:        s =  "\"" ; rebuildDquote(s);       // ["] ==> [""]
        -:  323:        assertTrue( s == "\"\"");
        -:  324:        s =  "\" \" \"" ; rebuildDquote(s); // [" " "] ==> ["" "" ""]
        -:  325:        assertTrue( s == "\"\" \"\" \"\"");
        -:  326:        s =  "3,4\"" ; rebuildDquote(s);    // [3,4"] ==> [3,4""]
        -:  327:        assertTrue( s == "3,4\"\"");
        -:  328:        s =  " ," ; rebuildDquote(s);       // [ ,] ==> [ ,]
        -:  329:        assertTrue( s == " ,");
        -:  330:    }}
        -:  331:    {   // A61196-A76944
        -:  332:        std::string s =  "\"2\",3, \r\n";        // ["2",3, \r\n]
        -:  333:        rebuildDquote(s);
        -:  334:        assertTrue( s ==  "\"\"2\"\",3, \r\n");  // ["2",3, \r\n] ==> [""2"",3, \r\n]
        -:  335:    }
        -:  336:}
        -:  337:
        -:  338:#ifdef English_dox
        -:  339:/** Scans \c str substituting \c '"' by 2 double-quotes \c [""].
        -:  340:    - Local routine used in the implementation of \c getNextCSV().
        -:  341:
        -:  342:    \dontinclude test_CSV.cpp
        -:  343:    \skipline    test::rebuildDquote()
        -:  344:    \until       }}
        -:  345:    \see         test_CSV::test_rebuildDquote()
        -:  346:*/
        -:  347:#endif
        -:  348:#ifdef Spanish_dox
        -:  349:/** Sustituye en \c str cada comilla doble \c '"' por 2 comillas dobles \c [""].
        -:  350:    - Rutina local useda en la implementación de \c getNextCSV().
        -:  351:
        -:  352:    \dontinclude test_CSV.cpp
        -:  353:    \skipline    test::rebuildDquote()
        -:  354:    \until       }}
        -:  355:    \see         test_CSV::test_rebuildDquote()
        -:  356:*/
        -:  357:#endif
        -:  358:void rebuildDquote( std::string & str ) {
        -:  359:    std::string res;
        -:  360:    std::string::const_iterator ch;
        -:  361:    for ( ch = str.begin(); ch != str.end(); ++ch ) {
        -:  362:        res += *ch;
        -:  363:        if ( *ch == DQUOTE ) {
        -:  364:            res += DQUOTE;
        -:  365:        }
        -:  366:    }
        -:  367:    str = res;
        -:  368:}
        -:  369:
        -:  370:bool getNextCSV_OLD( std::string& csv, std::istream& CIN ) {
        -:  371:    csv.clear();
        -:  372:    if ( CIN.fail() ) { // see http://www.horstmann.com/cpp/pitfalls.html
        -:  373:        return false;
        -:  374:    }
        -:  375:    int state=0; char ch;
        -:  376:    bool trailing_CR = false; // true when the last char was CR
        -:  377:    for (;;) {
        -:  378:        CIN.get(ch);
        -:  379:        if ( CIN.fail() ) {
        -:  380:            return false;
        -:  381:        }
        -:  382:
        -:  383:        switch (state) {
        -:  384:        case 0: { // init
        -:  385:                if ( ch == COMMA ) {
        -:  386:                //  csv += COMMA; // removes COMMA from result string
        -:  387:                    return false;
        -:  388:                }
        -:  389:                else if ( ch == LF ) {
        -:  390:                //  csv += LF;    // removes LF from result string
        -:  391:                    return true;
        -:  392:                }
        -:  393:                else if ( ch == CR ) {
        -:  394:                    trailing_CR = true;
        -:  395:                    csv += CR;
        -:  396:                    state = 3;
        -:  397:                }
        -:  398:                else if ( ch == DQUOTE ) {     //            |  ',' '\n'  |    '"'     |     l      |
        -:  399:                    state = 1;                 //    delta() |  comma+LF  |  d-quote   |   letter   |
        -:  400:                }                              //  ----------+------------+------------+------------+
        -:  401:                else { // letter               //   ==>  0   |     0      |     1      |     3      |
        -:  402:                    csv += ch;                 //       init |   return   |            |  csv+=ch   |
        -:  403:                    state = 3;                 //  ----------+------------+------------+------------+
        -:  404:                }
        -:  405:            }
        -:  406:            break;
        -:  407:
        -:  408:        case 1: { // quote(1)
        -:  409:                if ( ch == DQUOTE ) {          //            |  ',' '\n'  |    '"'     |     l      |
        -:  410:                    state = 2;                 //    delta() |  comma+LF  |  d-quote   |   letter   |
        -:  411:                }                              //  ----------+------------+------------+------------+
        -:  412:                else { // letter COMMA LF      //        1   |     1      |     2      |     1      |
        -:  413:                    csv += ch;                 //   quoted(1)|  csv+=ch   |            |  csv+=ch   |
        -:  414:                //  state = 1;                 //  ----------+------------+------------+------------+
        -:  415:                }
        -:  416:            }
        -:  417:            break;
        -:  418:
        -:  419:        case 2: { // inquote(2)
        -:  420:                if ( ch == COMMA ) {
        -:  421:                //  state = 0;
        -:  422:                    return false;
        -:  423:                } else if ( ch == LF ) {
        -:  424:                //  state = 0;
        -:  425:                    return true;
        -:  426:                }
        -:  427:                else if ( trailing_CR ) { //  ["...""..."\r?...,] '?' after '\r'
        -:  428:                    rebuildDquote( csv );
        -:  429:                    csv = DQUOTE + csv + DQUOTE + CR + ch;
        -:  430:                    trailing_CR = false;
        -:  431:                    state = 3;
        -:  432:                }
        -:  433:                else if ( ch == CR ) { // removes CR+LF at the end of line
        -:  434:                    trailing_CR = true;
        -:  435:                //  csv += CR; // removes trailing CR+LF
        -:  436:                //  state = 2;
        -:  437:                }
        -:  438:                else if ( ch == DQUOTE ) {     //            |  ',' '\n'  |    '"'     |     l      |
        -:  439:                    csv += DQUOTE;             //    delta() |  comma+LF  |  d-quote   |   letter   |
        -:  440:                    state = 1;                 //  ----------+------------+------------+------------+
        -:  441:                }                              //        2   |     0      |     1      |     3      |
        -:  442:                else { // letter (error)       //  inquote(2)|   return   |  csv+=ch   |  csv='""'  |
        -:  443:                    rebuildDquote( csv );      //  ----------+------------+------------+------------+
        -:  444:                    csv= DQUOTE + csv + DQUOTE + ch;  // [," ... "" "3x,] ==> error condition ["3]
        -:  445:                    state = 3;                        //  [" ... "" "3]  ==> rebuilt value
        -:  446:                }
        -:  447:            }
        -:  448:            break;
        -:  449:
        -:  450:        case 3: { // regular
        -:  451:                if ( ch == COMMA ) {
        -:  452:                    return false;
        -:  453:                } else if ( ch == LF ) {
        -:  454:                    if ( trailing_CR ) {
        -:  455:                        csv = csv.substr( 0, csv.length()-1 ); // chop( csv , CR );
        -:  456:                    }
        -:  457:                //  state = 0;
        -:  458:                //  csv += LF;
        -:  459:                    return true;
        -:  460:                }
        -:  461:                else if ( ch == CR ) {    // leaves CR at the end
        -:  462:                    trailing_CR = true;   // mark to remove later
        -:  463:                    csv += CR;
        -:  464:                //  state = 3;
        -:  465:                }                              //            |  ',' '\n'  |    '"'     |     l      |
        -:  466:                else { // letter               //    delta() |  comma+LF  |  d-quote   |   letter   |
        -:  467:                    csv += ch;                 //  ----------+------------+------------+------------+
        -:  468:                //  state = 3;                 //        3   |     0      |     3      |     3      |
        -:  469:                // swallows DQUOTE's && CR's   //    regular |   return   |  csv+=ch   |  csv+=ch   |
        -:  470:               }                               //  ----------+------------+------------+------------+
        -:  471:            }
        -:  472:            break;
        -:  473:
        -:  474:        } // swith (state)
        -:  475:    } // for (;;)
        -:  476:
        -:  477:    return false;
        -:  478:}
        -:  479:
        -:  480:#endif
        -:  481:
        -:  482:#ifdef English_dox
        -:  483:/// Comma Separated Value (not used in this implementation).
        -:  484:#endif
        -:  485:#ifdef Spanish_dox
        -:  486:/// Comma Separated Value (no usado en esta implementación).
        -:  487:#endif
        -:  488:namespace csv { } // trick to include it into the Doxygen documentation
        -:  489:
        -:  490:// Trick to force Doxygen to document these.
        -:  491:// - They are at the end of file to avoid trouble.
        -:  492:using namespace std;
function _Z41__static_initialization_and_destruction_0ii called 2 returned 100% blocks executed 100%
        2:  493:using namespace csv;
function _GLOBAL__I__Z11automataCSVRSsRSi called 1 returned 100% blocks executed 100%
        2:  494:
function _GLOBAL__D__Z11automataCSVRSsRSi called 1 returned 100% blocks executed 100%
        2:  495:// EOF: CSV.cpp