diff --git a/include/csv.hpp b/include/csv.hpp index dcfcab1e..57124858 100644 --- a/include/csv.hpp +++ b/include/csv.hpp @@ -1,10 +1,10 @@ /* -CSV for C++, version 1.2.2 +CSV for C++, version 1.2.4 https://github.com/vincentlaucsb/csv-parser MIT License -Copyright (c) 2017-2019 Vincent La +Copyright (c) 2017-2020 Vincent La Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal diff --git a/include/internal/csv_reader.cpp b/include/internal/csv_reader.cpp index 2cfeb458..48e9e7f5 100644 --- a/include/internal/csv_reader.cpp +++ b/include/internal/csv_reader.cpp @@ -287,7 +287,11 @@ namespace csv { /** Return the CSV's column names as a vector of strings. */ CSV_INLINE std::vector CSVReader::get_col_names() const { - return this->col_names->get_col_names(); + if (this->col_names) { + return this->col_names->get_col_names(); + } + + return std::vector(); } /** Return the index of the column name if found or @@ -441,7 +445,7 @@ namespace csv { * Drop it otherwise. */ - if (header_was_parsed) { + if (this->col_names) { // Make sure record is of the right length const size_t row_size = this->record_buffer->splits_size(); if (row_size + 1 == this->n_cols) { @@ -453,14 +457,19 @@ namespace csv { * 2) Too short or too long */ this->row_num--; - if (row_size > 0) + if (row_size > 0) { bad_row_handler(std::vector(CSVRow( this->record_buffer))); + } } } else if (this->row_num == this->header_row) { this->set_col_names(std::vector(CSVRow(this->record_buffer))); - } // else: Ignore rows before header row + } + else { + // Ignore rows before header row + this->record_buffer->get_row(); + } this->row_num++; } @@ -508,7 +517,6 @@ namespace csv { { this->col_names = std::make_shared(names); this->record_buffer->col_names = this->col_names; - this->header_was_parsed = true; this->n_cols = names.size(); } diff --git a/include/internal/csv_reader.hpp b/include/internal/csv_reader.hpp index 404e4574..af462fa5 100644 --- a/include/internal/csv_reader.hpp +++ b/include/internal/csv_reader.hpp @@ -222,15 +222,11 @@ namespace csv { /** @name Parser State */ ///@{ /** Pointer to a object containing column information */ - internals::ColNamesPtr col_names = std::make_shared( - std::vector({})); + internals::ColNamesPtr col_names = nullptr; /** Whether or not an attempt to find Unicode BOM has been made */ bool unicode_bom_scan = false; - /** Whether or not we have parsed the header row */ - bool header_was_parsed = false; - /** The number of columns in this CSV */ size_t n_cols = 0; ///@} diff --git a/include/internal/row_buffer.cpp b/include/internal/row_buffer.cpp index cd031173..9f66b886 100644 --- a/include/internal/row_buffer.cpp +++ b/include/internal/row_buffer.cpp @@ -26,6 +26,9 @@ namespace csv { return this->col_names.size(); } + /** Get the current row in the buffer + * @note Has the side effect of updating the current end pointer + */ CSV_INLINE csv::string_view RawRowBuffer::get_row() { csv::string_view ret( this->buffer.c_str() + this->current_end, // Beginning of string diff --git a/single_include/csv.hpp b/single_include/csv.hpp index beb1712a..8b08be83 100644 --- a/single_include/csv.hpp +++ b/single_include/csv.hpp @@ -1,11 +1,11 @@ #pragma once /* -CSV for C++, version 1.2.2 +CSV for C++, version 1.2.4 https://github.com/vincentlaucsb/csv-parser MIT License -Copyright (c) 2017-2019 Vincent La +Copyright (c) 2017-2020 Vincent La Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal @@ -4273,15 +4273,11 @@ namespace csv { /** @name Parser State */ ///@{ /** Pointer to a object containing column information */ - internals::ColNamesPtr col_names = std::make_shared( - std::vector({})); + internals::ColNamesPtr col_names = nullptr; /** Whether or not an attempt to find Unicode BOM has been made */ bool unicode_bom_scan = false; - /** Whether or not we have parsed the header row */ - bool header_was_parsed = false; - /** The number of columns in this CSV */ size_t n_cols = 0; ///@} @@ -4767,7 +4763,11 @@ namespace csv { /** Return the CSV's column names as a vector of strings. */ CSV_INLINE std::vector CSVReader::get_col_names() const { - return this->col_names->get_col_names(); + if (this->col_names) { + return this->col_names->get_col_names(); + } + + return std::vector(); } /** Return the index of the column name if found or @@ -4921,7 +4921,7 @@ namespace csv { * Drop it otherwise. */ - if (header_was_parsed) { + if (this->col_names) { // Make sure record is of the right length const size_t row_size = this->record_buffer->splits_size(); if (row_size + 1 == this->n_cols) { @@ -4933,14 +4933,19 @@ namespace csv { * 2) Too short or too long */ this->row_num--; - if (row_size > 0) + if (row_size > 0) { bad_row_handler(std::vector(CSVRow( this->record_buffer))); + } } } else if (this->row_num == this->header_row) { this->set_col_names(std::vector(CSVRow(this->record_buffer))); - } // else: Ignore rows before header row + } + else { + // Ignore rows before header row + this->record_buffer->get_row(); + } this->row_num++; } @@ -4988,7 +4993,6 @@ namespace csv { { this->col_names = std::make_shared(names); this->record_buffer->col_names = this->col_names; - this->header_was_parsed = true; this->n_cols = names.size(); } @@ -5926,6 +5930,9 @@ namespace csv { return this->col_names.size(); } + /** Get the current row in the buffer + * @note Has the side effect of updating the current end pointer + */ CSV_INLINE csv::string_view RawRowBuffer::get_row() { csv::string_view ret( this->buffer.c_str() + this->current_end, // Beginning of string diff --git a/single_include_test/csv.hpp b/single_include_test/csv.hpp index beb1712a..8b08be83 100644 --- a/single_include_test/csv.hpp +++ b/single_include_test/csv.hpp @@ -1,11 +1,11 @@ #pragma once /* -CSV for C++, version 1.2.2 +CSV for C++, version 1.2.4 https://github.com/vincentlaucsb/csv-parser MIT License -Copyright (c) 2017-2019 Vincent La +Copyright (c) 2017-2020 Vincent La Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal @@ -4273,15 +4273,11 @@ namespace csv { /** @name Parser State */ ///@{ /** Pointer to a object containing column information */ - internals::ColNamesPtr col_names = std::make_shared( - std::vector({})); + internals::ColNamesPtr col_names = nullptr; /** Whether or not an attempt to find Unicode BOM has been made */ bool unicode_bom_scan = false; - /** Whether or not we have parsed the header row */ - bool header_was_parsed = false; - /** The number of columns in this CSV */ size_t n_cols = 0; ///@} @@ -4767,7 +4763,11 @@ namespace csv { /** Return the CSV's column names as a vector of strings. */ CSV_INLINE std::vector CSVReader::get_col_names() const { - return this->col_names->get_col_names(); + if (this->col_names) { + return this->col_names->get_col_names(); + } + + return std::vector(); } /** Return the index of the column name if found or @@ -4921,7 +4921,7 @@ namespace csv { * Drop it otherwise. */ - if (header_was_parsed) { + if (this->col_names) { // Make sure record is of the right length const size_t row_size = this->record_buffer->splits_size(); if (row_size + 1 == this->n_cols) { @@ -4933,14 +4933,19 @@ namespace csv { * 2) Too short or too long */ this->row_num--; - if (row_size > 0) + if (row_size > 0) { bad_row_handler(std::vector(CSVRow( this->record_buffer))); + } } } else if (this->row_num == this->header_row) { this->set_col_names(std::vector(CSVRow(this->record_buffer))); - } // else: Ignore rows before header row + } + else { + // Ignore rows before header row + this->record_buffer->get_row(); + } this->row_num++; } @@ -4988,7 +4993,6 @@ namespace csv { { this->col_names = std::make_shared(names); this->record_buffer->col_names = this->col_names; - this->header_was_parsed = true; this->n_cols = names.size(); } @@ -5926,6 +5930,9 @@ namespace csv { return this->col_names.size(); } + /** Get the current row in the buffer + * @note Has the side effect of updating the current end pointer + */ CSV_INLINE csv::string_view RawRowBuffer::get_row() { csv::string_view ret( this->buffer.c_str() + this->current_end, // Beginning of string diff --git a/tests/test_read_csv.cpp b/tests/test_read_csv.cpp index a1ff3e0b..49781143 100644 --- a/tests/test_read_csv.cpp +++ b/tests/test_read_csv.cpp @@ -389,4 +389,34 @@ bar-category,,bar-project REQUIRE(second_row["category"] == "bar-category"); REQUIRE(second_row["subcategory"] == ""); REQUIRE(second_row["project name"] == "bar-project"); +} + +// Reported in: https://github.com/vincentlaucsb/csv-parser/issues/67 +TEST_CASE("Comments in Header Regression", "[comments_in_header_regression]") { + std::string csv_string(R"(# some extra metadata +# some extra metadata +timestamp,distance,angle,amplitude +22857782,30000,-3141.59,0 +22857786,30000,-3141.09,0 +)"); + + auto format = csv::CSVFormat(); + format.header_row(2); + + csv::CSVReader reader(format); + reader.feed(csv_string); + reader.end_feed(); + + for (auto& str : reader.get_col_names()) { + std::cout << str << std::endl; + } + + std::vector expected = { + "timestamp", "distance", "angle", "amplitude" + }; + + // Original issue: Leading comments appeared in column names + for (size_t i = 0; i < expected.size(); i++) { + REQUIRE(expected[i] == reader.get_col_names()[i]); + } } \ No newline at end of file