Как я могу прочитать и проанализировать файлы CSV в C++?

Question

Как я могу прочитать и проанализировать файлы CSV в C++?

Попробуйте этот код (ПРИМЕЧАНИЕ: Сообщаемый не работать над Windows Server 2000)

#region NTLogonUser
#region Direct OS LogonUser Code
[DllImport( "advapi32.dll")]
private static extern bool LogonUser(String lpszUsername, 
    String lpszDomain, String lpszPassword, int dwLogonType, 
    int dwLogonProvider, out int phToken);

[DllImport("Kernel32.dll")]
private static extern int GetLastError();

public static bool LogOnXP(String sDomain, String sUser, String sPassword)
{
   int token1, ret;
   int attmpts = 0;

   bool LoggedOn = false;

   while (!LoggedOn && attmpts < 2)
   {
      LoggedOn= LogonUser(sUser, sDomain, sPassword, 3, 0, out token1);
      if (LoggedOn) return (true);
      else
      {
         switch (ret = GetLastError())
         {
            case (126): ; 
               if (attmpts++ > 2)
                  throw new LogonException(
                      "Specified module could not be found. error code: " + 
                      ret.ToString());
               break;

            case (1314): 
               throw new LogonException(
                  "Specified module could not be found. error code: " + 
                      ret.ToString());

            case (1326): 
               // edited out based on comment
               //  throw new LogonException(
               //   "Unknown user name or bad password.");
            return false;

            default: 
               throw new LogonException(
                  "Unexpected Logon Failure. Contact Administrator");
              }
          }
       }
   return(false);
}
#endregion Direct Logon Code
#endregion NTLogonUser

кроме необходимо будет создать собственное исключение для "LogonException"

246

text c++ parsing csv

задан Shog9 7 April 2014 в 03:39

10 ответов

Вы можете посмотреть мой проект FOSS CSVfix ( обновленная ссылка ), который является редактором потока CSV, написанным на C ++. Парсер CSV не является призом, но выполняет свою работу, и весь пакет может делать то, что вам нужно, без написания кода.

См. alib / src / a_csv.cpp для анализатора CSV и csvlib / src / csved_ioman.cpp ( IOManager :: ReadCSV ) для примера использования.

7

ответ дан 23 November 2019 в 03:04

Решение с использованием Boost Tokenizer:

std::vector<std::string> vec;
using namespace boost;
tokenizer<escaped_list_separator<char> > tk(
   line, escaped_list_separator<char>('\\', ',', '\"'));
for (tokenizer<escaped_list_separator<char> >::iterator i(tk.begin());
   i!=tk.end();++i) 
{
   vec.push_back(*i);
}

46

ответ дан 23 November 2019 в 03:04

Excuse me, but this all seems like a great deal of elaborate syntax to hide a few lines of code.

Why not this:

/**

  Read line from a CSV file

  @param[in] fp file pointer to open file
  @param[in] vls reference to vector of strings to hold next line

  */
void readCSV( FILE *fp, std::vector<std::string>& vls )
{
    vls.clear();
    if( ! fp )
        return;
    char buf[10000];
    if( ! fgets( buf,999,fp) )
        return;
    std::string s = buf;
    int p,q;
    q = -1;
    // loop over columns
    while( 1 ) {
        p = q;
        q = s.find_first_of(",\n",p+1);
        if( q == -1 ) 
            break;
        vls.push_back( s.substr(p+1,q-p-1) );
    }
}

int _tmain(int argc, _TCHAR* argv[])
{
    std::vector<std::string> vls;
    FILE * fp = fopen( argv[1], "r" );
    if( ! fp )
        return 1;
    readCSV( fp, vls );
    readCSV( fp, vls );
    readCSV( fp, vls );
    std::cout << "row 3, col 4 is " << vls[3].c_str() << "\n";

    return 0;
}

2

ответ дан 23 November 2019 в 03:04

Вы также можете ознакомиться с возможностями библиотеки Qt .

Она поддерживает регулярные выражения, а класс QString имеет приятные методы, например split () , возвращающий QStringList, список строк, полученных путем разделения исходной строки с помощью заданного разделителя. Должно быть достаточно для файла csv ..

Чтобы получить столбец с заданным именем заголовка, я использую следующее: C ++ наследование Qt проблема qstring

1

ответ дан 23 November 2019 в 03:04

В C ++ String Toolkit Library (StrTk) есть класс сетки токенов, который позволяет загружать данные из текстовых файлов, строк или буферов символов , а также для анализа / обработки их в режиме строка-столбец.

Вы можете указать разделители строк и столбцов или просто использовать значения по умолчанию.

void foo()
{
   std::string data = "1,2,3,4,5\n"
                      "0,2,4,6,8\n"
                      "1,3,5,7,9\n";

   strtk::token_grid grid(data,data.size(),",");

   for(std::size_t i = 0; i < grid.row_count(); ++i)
   {
      strtk::token_grid::row_type r = grid.row(i);
      for(std::size_t j = 0; j < r.size(); ++j)
      {
         std::cout << r.get<int>(j) << "\t";
      }
      std::cout << std::endl;
   }
   std::cout << std::endl;
}

Дополнительные примеры можно найти Здесь

31

ответ дан 23 November 2019 в 03:04

When using the Boost Tokenizer escaped_list_separator for CSV files, then one should be aware of the following:

It requires an escape-character (default back-slash - \)
It requires a splitter/seperator-character (default comma - ,)
It requires an quote-character (default quote - ")

The CSV format specified by wiki states that data fields can contain separators in quotes (supported):

1997,Ford,E350,"Super, luxurious truck"

The CSV format specified by wiki states that single quotes should be handled with double-quotes (escaped_list_separator will strip away all quote characters):

1997,Ford,E350,"Super ""luxurious"" truck"

The CSV format doesn't specify that any back-slash characters should be stripped away (escaped_list_separator will strip away all escape characters).

A possible work-around to fix the default behavior of the boost escaped_list_separator:

First replace all back-slash characters (\) with two back-slash characters (\\) so they are not stripped away.
Secondly replace all double-quotes ("") with a single back-slash character and a quote (\")

This work-around has the side-effect that empty data-fields that are represented by a double-quote, will be transformed into a single-quote-token. When iterating through the tokens, then one must check if the token is a single-quote, and treat it like an empty string.

Not pretty but it works, as long there are not newlines within the quotes.

14

ответ дан 23 November 2019 в 03:04

Использование Spirit для разбора CSV - не излишество. Spirit хорошо подходит для задач микропарсинга. Например, с Spirit 2.1 это очень просто:

bool r = phrase_parse(first, last,

    //  Begin grammar
    (
        double_ % ','
    )
    ,
    //  End grammar

    space, v);

Вектор v заполняется значениями. В новой документации Spirit 2.1, которая только что была выпущена с Boost 1.41, есть серия руководств , посвященных этому вопросу.

Учебник прогрессирует от простого к сложному. Парсеры CSV представлены где-то посередине и затрагивают различные методы использования Spirit. Сгенерированный код настолько же сложен, как и рукописный код. Проверьте сгенерированный ассемблер!

29

ответ дан 23 November 2019 в 03:04

Вы можете использовать Boost Tokenizer с escaped_list_separator.

escaped_list_separator разбирает супермножество csv. Boost::tokenizer

Здесь используются только заголовочные файлы Boost tokenizer, привязка к библиотекам boost не требуется.

Вот пример (см. Parse CSV File With Boost Tokenizer In C++ для подробностей или Boost::tokenizer ):

#include <iostream>     // cout, endl
#include <fstream>      // fstream
#include <vector>
#include <string>
#include <algorithm>    // copy
#include <iterator>     // ostream_operator
#include <boost/tokenizer.hpp>

int main()
{
    using namespace std;
    using namespace boost;
    string data("data.csv");

    ifstream in(data.c_str());
    if (!in.is_open()) return 1;

    typedef tokenizer< escaped_list_separator<char> > Tokenizer;
    vector< string > vec;
    string line;

    while (getline(in,line))
    {
        Tokenizer tok(line);
        vec.assign(tok.begin(),tok.end());

        // vector now contains strings from one row, output to cout here
        copy(vec.begin(), vec.end(), ostream_iterator<string>(cout, "|"));

        cout << "\n----------------------" << endl;
    }
}

29

ответ дан 23 November 2019 в 03:04

Если вы DO позаботитесь о правильном синтаксическом анализе CSV, это будет делать это ... относительно медленно, поскольку он работает по одному символу за раз.

 void ParseCSV(const string& csvSource, vector<vector<string> >& lines)
    {
       bool inQuote(false);
       bool newLine(false);
       string field;
       lines.clear();
       vector<string> line;

       string::const_iterator aChar = csvSource.begin();
       while (aChar != csvSource.end())
       {
          switch (*aChar)
          {
          case '"':
             newLine = false;
             inQuote = !inQuote;
             break;

          case ',':
             newLine = false;
             if (inQuote == true)
             {
                field += *aChar;
             }
             else
             {
                line.push_back(field);
                field.clear();
             }
             break;

          case '\n':
          case '\r':
             if (inQuote == true)
             {
                field += *aChar;
             }
             else
             {
                if (newLine == false)
                {
                   line.push_back(field);
                   lines.push_back(line);
                   field.clear();
                   line.clear();
                   newLine = true;
                }
             }
             break;

          default:
             newLine = false;
             field.push_back(*aChar);
             break;
          }

          aChar++;
       }

       if (field.size())
          line.push_back(field);

       if (line.size())
          lines.push_back(line);
    }

17

ответ дан 23 November 2019 в 03:04

Другие вопросы по тегам:

text c++ parsing csv

Похожие вопросы:

score 275 · Accepted Answer

Если вас не волнует экранирование запятой и новой строки,
И вы не можете вставлять запятую и новую строку в кавычки (если вы не можете убежать, тогда ...)
тогда у него всего около трех строк кода (ОК 14 -> Но достаточно 15, чтобы прочитать весь файл).

std::vector<std::string> getNextLineAndSplitIntoTokens(std::istream& str)
{
    std::vector<std::string>   result;
    std::string                line;
    std::getline(str,line);

    std::stringstream          lineStream(line);
    std::string                cell;

    while(std::getline(lineStream,cell, ','))
    {
        result.push_back(cell);
    }
    // This checks for a trailing comma with no data after it.
    if (!lineStream && cell.empty())
    {
        // If there was a trailing comma then add an empty element.
        result.push_back("");
    }
    return result;
}

Я бы просто создал класс, представляющий строку.
Затем выполните поток в этот объект:

#include <iterator>
#include <iostream>
#include <fstream>
#include <sstream>
#include <vector>
#include <string>

class CSVRow
{
    public:
        std::string const& operator[](std::size_t index) const
        {
            return m_data[index];
        }
        std::size_t size() const
        {
            return m_data.size();
        }
        void readNextRow(std::istream& str)
        {
            std::string         line;
            std::getline(str, line);

            std::stringstream   lineStream(line);
            std::string         cell;

            m_data.clear();
            while(std::getline(lineStream, cell, ','))
            {
                m_data.push_back(cell);
            }
            // This checks for a trailing comma with no data after it.
            if (!lineStream && cell.empty())
            {
                // If there was a trailing comma then add an empty element.
                m_data.push_back("");
            }
        }
    private:
        std::vector<std::string>    m_data;
};

std::istream& operator>>(std::istream& str, CSVRow& data)
{
    data.readNextRow(str);
    return str;
}   
int main()
{
    std::ifstream       file("plop.csv");

    CSVRow              row;
    while(file >> row)
    {
        std::cout << "4th Element(" << row[3] << ")\n";
    }
}

Но с небольшой работой мы могли бы технически создать итератор:

class CSVIterator
{   
    public:
        typedef std::input_iterator_tag     iterator_category;
        typedef CSVRow                      value_type;
        typedef std::size_t                 difference_type;
        typedef CSVRow*                     pointer;
        typedef CSVRow&                     reference;

        CSVIterator(std::istream& str)  :m_str(str.good()?&str:NULL) { ++(*this); }
        CSVIterator()                   :m_str(NULL) {}

        // Pre Increment
        CSVIterator& operator++()               {if (m_str) { if (!((*m_str) >> m_row)){m_str = NULL;}}return *this;}
        // Post increment
        CSVIterator operator++(int)             {CSVIterator    tmp(*this);++(*this);return tmp;}
        CSVRow const& operator*()   const       {return m_row;}
        CSVRow const* operator->()  const       {return &m_row;}

        bool operator==(CSVIterator const& rhs) {return ((this == &rhs) || ((this->m_str == NULL) && (rhs.m_str == NULL)));}
        bool operator!=(CSVIterator const& rhs) {return !((*this) == rhs);}
    private:
        std::istream*       m_str;
        CSVRow              m_row;
};


int main()
{
    std::ifstream       file("plop.csv");

    for(CSVIterator loop(file); loop != CSVIterator(); ++loop)
    {
        std::cout << "4th Element(" << (*loop)[3] << ")\n";
    }
}