// CSV.h (C) 2008 adolfo@di-mare.com #ifdef English_dox /// Doxygen English documentation. #define English_dox "Doxygen English documentation" /// \def English_dox ///< Marks English documentation blocks. #endif #ifdef Spanish_dox /// Documentación en español. #define Spanish_dox "Documentación en español" /// \def Spanish_dox ///< Macro usado para que Doxygen genere documentación en español. #endif #ifdef English_dox /** \file CSV.h \brief \c getNextCSV() and \c setQuotedCSV(): library to process CSV files. CSV: Comma Separated Value. The CSV file format have been better defined by IETF with RFC-4180. - If the file complies with the format specified in RFC-4180 it's CSV fields will be correctly extracted by these routines. - An effort was made to comply with RFC-4180. \see http://tools.ietf.org/html/rfc4180 There are 2 main routines to process CSV files: - To get values use function \c getNextCSV(). - To store values use function \c void setQuotedCSV() and store the resulting string. Class \c CSV_line is a wrapper around these routines, but requires that no quoted Line Feed characters \c "\n" appear within each line in a CSV file. \author Adolfo Di Mare \date 2008 */ #endif #ifndef CSV_h #define CSV_h #ifdef English_dox /// \def CSV_h ///< Avoids multiple inclusion. #endif #ifdef Spanish_dox /// \def CSV_h ///< Evita la inclusión múltiple. #endif #ifdef __cplusplus // compatibility C <==> C++ #include #include #include // CSV: Comma Separated Values [IETF RFC-4180]. // namespace csv { #ifdef English_dox /** Prepares \c value for output into a CSV file. - Stores a new value into string \c res. - Surrounds the result in double-quotes when \c value has whitespace. - Surrounds the result in double-quotes when \c value has double-quotes. - Surrounds the result in double-quotes when \c value has commas \c ",". - Substitutes any double-quotes \c '"' within \c value with 2 double-quotes \c [""]. - Works with \c char, not tested for \c wchar_t. \dontinclude test_CSV.cpp \skipline test::setQuotedCSV() \until }} \see test_CSV::setQuotedCSV() */ #endif void setQuotedCSV( std::string& res , const std::string& value ); #ifdef English_dox /** Scans input stream \c CIN and returns the next CSV value. - \c CIN should be open in \c std::ios::binary mode as chars are extracted one by one, using \c CIN.get(ch). - The retrieved value from \c CIN gets stored into \c csv. - Works with \c char, not tested for \c wchar_t. - Removes from \c csv the trailing (CR+LF or LF) ==> \c "\r\n" o \c "\n". - An effort was made to comply with RFC-4180. \return true when the CSV field ends in \c "\n" (LF -> LineFeed). \see http://tools.ietf.org/html/rfc4180 \see http://www.horstmann.com/cpp/pitfalls.html \dontinclude CSV_line.cpp \skipline test::getNextCSV() \until }} \see test_CSV::getNextCSV() */ #endif bool getNextCSV( std::string& csv, std::istream& CIN ); #ifdef English_dox /** Deletes leading and trailing whitespace from \c "str". - It will alos delete characters " \f\n\r\t\v". - Uses \c isspace(ch) to find out if a letter is whitespace. \dontinclude test_CSV.cpp \skipline test::trim() \until }} \see test_CSV::test_trim() */ #endif void trim( std::string & str ); #ifdef English_dox /** Converts an incorrect CSV field value into its probably correct value. - Strips out leading and trailing whitespace with \c trim(). - If the trimmed filed is surrounded by quotes it will try to replace every pair of double quotes \c [""] by a single doble quote \c ["]. - Will no verify that double quotes are correctly paired. Sometimes a FILE.csv has quoted fields surrounded by whitespace. As these field values do not comply with RFC-4180, they are extracted by \c getNextCSV() as they come, with no whitespace removed and with their double quotes pairs intact. In the following example the string is enclosed in square parenthesis \c [..] instad of double quotes \c ["] for legibility: \code ["zero", "if "" 1" , , " 3xt" \r\n] [....0.,........ 1..,2,.........3...] csv field getNextCSV() trimCSV() +------------------+----------------+----------+ | ["zero"] | [zero] | [zero] | | [, "if "" 1" ] | [ "if "" 1" ] | [if " 1] | | [, ] | [ ] | [] | | [, " 3xt" \r\n] | [ " 3xt" ] | [ 3xt] | +------------------+----------------+----------+ \endcode By common sense, the programmer would expect that these strings be returned as they appear in the \c trimCSV() column, but the fact of the matter is that the only one that complies with RFC-4180 is the first one. After using \c trimCSV() on the value returned by \c getNextCSV() the result is what is reasonbly expected. - Nonetheless, the values fields that contain line feeds \c "\r" or carriage returns \c "\n" are probably processed in a way different form what it is expected, even before they are passed as arguments to \c trimCSV(). It is wiser no to trust this routine as a complete solution to process CSV files that do not fully comply with RFC-4180. \dontinclude test_CSV.cpp \skipline test::trimCSV() \until }} \see test_CSV::test_trimCSV() */ #endif void trimCSV( std::string & str ); #ifdef English_dox /** Deletes \c ch when it is the trailing character in \c str. - The deleted character always is \c ch. \dontinclude test_CSV.cpp \skipline test::chop() \until }} \see test_CSV::test_chop() */ #endif void chop( std::string & str , char ch=0 ); // }; // namespace csv #ifdef English_dox /// Defined by the C++ standard library namespace std { } // trick to include it into the Doxygen documentation #endif #endif // __cplusplus #include // NULL, etc. [C language] #ifdef __cplusplus extern "C" { #endif // put in here C declarations #ifdef __cplusplus } #endif #endif // EOF: CSV.h