#include <csv2/reader.hpp> int main() { csv2::Reader<csv2::delimiter<','>, csv2::quote_character<'"'>, csv2::first_row_is_header<true>, csv2::trim_policy::trim_whitespace> csv; if (csv.mmap("foo.csv")) { const auto header = csv.header(); for (const auto row: csv) { for (const auto cell: row) { // Do something with cell value // std::string value; // cell.read_value(value); } } } }
This benchmark measures the average execution time (of 5 runs after 3 warmup runs) for csv2
to memory-map the input CSV file and iterate over every cell in the CSV. See benchmark/main.cpp
for more details.
cd benchmark g++ -I../include -O3 -std=c++11 -o main main.cpp ./main <csv_file>Type Value Processor 11th Gen Intel(R) Core(TM) i9-11900KF @ 3.50GHz 3.50 GHz Installed RAM 32.0 GB (31.9 GB usable) SSD ADATA SX8200PNP OS Ubuntu 20.04 LTS running on WSL in Windows 11 C++ Compiler g++ (Ubuntu 10.3.0-1ubuntu1~20.04) 10.3.0 Results (as of 23 SEP 2022) Dataset File Size Rows Cols Time Denver Crime Data 111 MB 479,100 19 0.102s AirBnb Paris Listings 196 MB 141,730 96 0.170s 2015 Flight Delays and Cancellations 574 MB 5,819,079 31 0.603s StackLite: Stack Overflow questions 870 MB 17,203,824 7 0.911s Used Cars Dataset 1.4 GB 539,768 25 0.947s Title-Based Semantic Subject Indexing 3.7 GB 12,834,026 4 2.867s Bitcoin tweets - 16M tweets 4 GB 47,478,748 9 3.290s DDoS Balanced Dataset 6.3 GB 12,794,627 85 6.963s Seattle Checkouts by Title 7.1 GB 34,892,623 11 7.698s SHA-1 password hash dump 11 GB 2,62,974,241 2 10.775s DOHUI NOH scaled_data 16 GB 496,782 3213 16.553s
Here is the public API available to you:
template <class delimiter = delimiter<','>, class quote_character = quote_character<'"'>, class first_row_is_header = first_row_is_header<true>, class trim_policy = trim_policy::trim_whitespace> class Reader { public: // Use this if you'd like to mmap and read from file bool mmap(string_type filename); // Use this if you have the CSV contents in std::string already bool parse(string_type contents); // Shape size_t rows() const; size_t cols() const; // Row iterator // If first_row_is_header, row iteration will start // from the second row RowIterator begin() const; RowIterator end() const; // Access the first row of the CSV Row header() const; };
Here's the Row
class:
// Row class class Row { public: // Get raw contents of the row void read_raw_value(Container& value) const; // Cell iterator CellIterator begin() const; CellIterator end() const; };
and here's the Cell
class:
// Cell class class Cell { public: // Get raw contents of the cell void read_raw_value(Container& value) const; // Get converted contents of the cell // Handles escaped content, e.g., // """foo""" => ""foo"" void read_value(Container& value) const; };
This library also provides a basic csv2::Writer
class - one that can be used to write CSV rows to file. Here's a basic usage:
#include <csv2/writer.hpp> #include <vector> #include <string> using namespace csv2; int main() { std::ofstream stream("foo.csv"); Writer<delimiter<','>> writer(stream); std::vector<std::vector<std::string>> rows = { {"a", "b", "c"}, {"1", "2", "3"}, {"4", "5", "6"} }; writer.write_rows(rows); stream.close(); }
Here is the public API available to you:
template <class delimiter = delimiter<','>> class Writer { public: // Construct using an std::ofstream Writer(output_file_stream stream); // Use this to write a single row to file void write_row(container_of_strings row); // Use this to write a list of rows to file void write_rows(container_of_rows rows);
mkdir build && cd build cmake -DCSV2_BUILD_TESTS=ON .. make cd test ./csv2_test
python3 utils/amalgamate/amalgamate.py -c single_include.json -s .
Contributions are welcome, have a look at the CONTRIBUTING.md document for more information.
The project is available under the MIT license.
RetroSearch is an open source project built by @garambo | Open a GitHub Issue
Search and Browse the WWW like it's 1997 | Search results from DuckDuckGo
HTML:
3.2
| Encoding:
UTF-8
| Version:
0.7.4