Skip to content

Commit

Permalink
Leading Comments Fix (#69)
Browse files Browse the repository at this point in the history
* Fixed an issue parsing files with leading comments

* Removed unneccessary "header_was_parsed" variable

* Updated csv.hpp
  • Loading branch information
vincentlaucsb authored Jan 20, 2020
1 parent 0102794 commit 0d5c37f
Show file tree
Hide file tree
Showing 7 changed files with 87 additions and 36 deletions.
4 changes: 2 additions & 2 deletions include/csv.hpp
Original file line number Diff line number Diff line change
@@ -1,10 +1,10 @@
/*
CSV for C++, version 1.2.2
CSV for C++, version 1.2.4
https://github.com/vincentlaucsb/csv-parser
MIT License
Copyright (c) 2017-2019 Vincent La
Copyright (c) 2017-2020 Vincent La
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
Expand Down
18 changes: 13 additions & 5 deletions include/internal/csv_reader.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -287,7 +287,11 @@ namespace csv {

/** Return the CSV's column names as a vector of strings. */
CSV_INLINE std::vector<std::string> CSVReader::get_col_names() const {
return this->col_names->get_col_names();
if (this->col_names) {
return this->col_names->get_col_names();
}

return std::vector<std::string>();
}

/** Return the index of the column name if found or
Expand Down Expand Up @@ -441,7 +445,7 @@ namespace csv {
* Drop it otherwise.
*/

if (header_was_parsed) {
if (this->col_names) {
// Make sure record is of the right length
const size_t row_size = this->record_buffer->splits_size();
if (row_size + 1 == this->n_cols) {
Expand All @@ -453,14 +457,19 @@ namespace csv {
* 2) Too short or too long
*/
this->row_num--;
if (row_size > 0)
if (row_size > 0) {
bad_row_handler(std::vector<std::string>(CSVRow(
this->record_buffer)));
}
}
}
else if (this->row_num == this->header_row) {
this->set_col_names(std::vector<std::string>(CSVRow(this->record_buffer)));
} // else: Ignore rows before header row
}
else {
// Ignore rows before header row
this->record_buffer->get_row();
}

this->row_num++;
}
Expand Down Expand Up @@ -508,7 +517,6 @@ namespace csv {
{
this->col_names = std::make_shared<internals::ColNames>(names);
this->record_buffer->col_names = this->col_names;
this->header_was_parsed = true;
this->n_cols = names.size();
}

Expand Down
6 changes: 1 addition & 5 deletions include/internal/csv_reader.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -222,15 +222,11 @@ namespace csv {
/** @name Parser State */
///@{
/** Pointer to a object containing column information */
internals::ColNamesPtr col_names = std::make_shared<internals::ColNames>(
std::vector<std::string>({}));
internals::ColNamesPtr col_names = nullptr;

/** Whether or not an attempt to find Unicode BOM has been made */
bool unicode_bom_scan = false;

/** Whether or not we have parsed the header row */
bool header_was_parsed = false;

/** The number of columns in this CSV */
size_t n_cols = 0;
///@}
Expand Down
3 changes: 3 additions & 0 deletions include/internal/row_buffer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,9 @@ namespace csv {
return this->col_names.size();
}

/** Get the current row in the buffer
* @note Has the side effect of updating the current end pointer
*/
CSV_INLINE csv::string_view RawRowBuffer::get_row() {
csv::string_view ret(
this->buffer.c_str() + this->current_end, // Beginning of string
Expand Down
31 changes: 19 additions & 12 deletions single_include/csv.hpp
Original file line number Diff line number Diff line change
@@ -1,11 +1,11 @@
#pragma once
/*
CSV for C++, version 1.2.2
CSV for C++, version 1.2.4
https://github.com/vincentlaucsb/csv-parser
MIT License
Copyright (c) 2017-2019 Vincent La
Copyright (c) 2017-2020 Vincent La
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
Expand Down Expand Up @@ -4273,15 +4273,11 @@ namespace csv {
/** @name Parser State */
///@{
/** Pointer to a object containing column information */
internals::ColNamesPtr col_names = std::make_shared<internals::ColNames>(
std::vector<std::string>({}));
internals::ColNamesPtr col_names = nullptr;

/** Whether or not an attempt to find Unicode BOM has been made */
bool unicode_bom_scan = false;

/** Whether or not we have parsed the header row */
bool header_was_parsed = false;

/** The number of columns in this CSV */
size_t n_cols = 0;
///@}
Expand Down Expand Up @@ -4767,7 +4763,11 @@ namespace csv {

/** Return the CSV's column names as a vector of strings. */
CSV_INLINE std::vector<std::string> CSVReader::get_col_names() const {
return this->col_names->get_col_names();
if (this->col_names) {
return this->col_names->get_col_names();
}

return std::vector<std::string>();
}

/** Return the index of the column name if found or
Expand Down Expand Up @@ -4921,7 +4921,7 @@ namespace csv {
* Drop it otherwise.
*/

if (header_was_parsed) {
if (this->col_names) {
// Make sure record is of the right length
const size_t row_size = this->record_buffer->splits_size();
if (row_size + 1 == this->n_cols) {
Expand All @@ -4933,14 +4933,19 @@ namespace csv {
* 2) Too short or too long
*/
this->row_num--;
if (row_size > 0)
if (row_size > 0) {
bad_row_handler(std::vector<std::string>(CSVRow(
this->record_buffer)));
}
}
}
else if (this->row_num == this->header_row) {
this->set_col_names(std::vector<std::string>(CSVRow(this->record_buffer)));
} // else: Ignore rows before header row
}
else {
// Ignore rows before header row
this->record_buffer->get_row();
}

this->row_num++;
}
Expand Down Expand Up @@ -4988,7 +4993,6 @@ namespace csv {
{
this->col_names = std::make_shared<internals::ColNames>(names);
this->record_buffer->col_names = this->col_names;
this->header_was_parsed = true;
this->n_cols = names.size();
}

Expand Down Expand Up @@ -5926,6 +5930,9 @@ namespace csv {
return this->col_names.size();
}

/** Get the current row in the buffer
* @note Has the side effect of updating the current end pointer
*/
CSV_INLINE csv::string_view RawRowBuffer::get_row() {
csv::string_view ret(
this->buffer.c_str() + this->current_end, // Beginning of string
Expand Down
31 changes: 19 additions & 12 deletions single_include_test/csv.hpp
Original file line number Diff line number Diff line change
@@ -1,11 +1,11 @@
#pragma once
/*
CSV for C++, version 1.2.2
CSV for C++, version 1.2.4
https://github.com/vincentlaucsb/csv-parser
MIT License
Copyright (c) 2017-2019 Vincent La
Copyright (c) 2017-2020 Vincent La
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
Expand Down Expand Up @@ -4273,15 +4273,11 @@ namespace csv {
/** @name Parser State */
///@{
/** Pointer to a object containing column information */
internals::ColNamesPtr col_names = std::make_shared<internals::ColNames>(
std::vector<std::string>({}));
internals::ColNamesPtr col_names = nullptr;

/** Whether or not an attempt to find Unicode BOM has been made */
bool unicode_bom_scan = false;

/** Whether or not we have parsed the header row */
bool header_was_parsed = false;

/** The number of columns in this CSV */
size_t n_cols = 0;
///@}
Expand Down Expand Up @@ -4767,7 +4763,11 @@ namespace csv {

/** Return the CSV's column names as a vector of strings. */
CSV_INLINE std::vector<std::string> CSVReader::get_col_names() const {
return this->col_names->get_col_names();
if (this->col_names) {
return this->col_names->get_col_names();
}

return std::vector<std::string>();
}

/** Return the index of the column name if found or
Expand Down Expand Up @@ -4921,7 +4921,7 @@ namespace csv {
* Drop it otherwise.
*/

if (header_was_parsed) {
if (this->col_names) {
// Make sure record is of the right length
const size_t row_size = this->record_buffer->splits_size();
if (row_size + 1 == this->n_cols) {
Expand All @@ -4933,14 +4933,19 @@ namespace csv {
* 2) Too short or too long
*/
this->row_num--;
if (row_size > 0)
if (row_size > 0) {
bad_row_handler(std::vector<std::string>(CSVRow(
this->record_buffer)));
}
}
}
else if (this->row_num == this->header_row) {
this->set_col_names(std::vector<std::string>(CSVRow(this->record_buffer)));
} // else: Ignore rows before header row
}
else {
// Ignore rows before header row
this->record_buffer->get_row();
}

this->row_num++;
}
Expand Down Expand Up @@ -4988,7 +4993,6 @@ namespace csv {
{
this->col_names = std::make_shared<internals::ColNames>(names);
this->record_buffer->col_names = this->col_names;
this->header_was_parsed = true;
this->n_cols = names.size();
}

Expand Down Expand Up @@ -5926,6 +5930,9 @@ namespace csv {
return this->col_names.size();
}

/** Get the current row in the buffer
* @note Has the side effect of updating the current end pointer
*/
CSV_INLINE csv::string_view RawRowBuffer::get_row() {
csv::string_view ret(
this->buffer.c_str() + this->current_end, // Beginning of string
Expand Down
30 changes: 30 additions & 0 deletions tests/test_read_csv.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -389,4 +389,34 @@ bar-category,,bar-project
REQUIRE(second_row["category"] == "bar-category");
REQUIRE(second_row["subcategory"] == "");
REQUIRE(second_row["project name"] == "bar-project");
}

// Reported in: https://github.com/vincentlaucsb/csv-parser/issues/67
TEST_CASE("Comments in Header Regression", "[comments_in_header_regression]") {
std::string csv_string(R"(# some extra metadata
# some extra metadata
timestamp,distance,angle,amplitude
22857782,30000,-3141.59,0
22857786,30000,-3141.09,0
)");

auto format = csv::CSVFormat();
format.header_row(2);

csv::CSVReader reader(format);
reader.feed(csv_string);
reader.end_feed();

for (auto& str : reader.get_col_names()) {
std::cout << str << std::endl;
}

std::vector<std::string> expected = {
"timestamp", "distance", "angle", "amplitude"
};

// Original issue: Leading comments appeared in column names
for (size_t i = 0; i < expected.size(); i++) {
REQUIRE(expected[i] == reader.get_col_names()[i]);
}
}

0 comments on commit 0d5c37f

Please sign in to comment.