Skip to content

Commit

Permalink
Merge pull request #131 from TripalCultivate/g4.115-delimitedCaseStrings
Browse files Browse the repository at this point in the history
Add cases to valid file delimiter validator
  • Loading branch information
laceysanderson authored Jan 8, 2025
2 parents b762537 + 970e3a0 commit 3d934ca
Show file tree
Hide file tree
Showing 3 changed files with 59 additions and 34 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -1086,7 +1086,8 @@ public function processValidDelimitedFileFailures(array $failures) {
($validation_result['case'] == 'None of the delimiters supported by the file type was used')) {
$table_case = 'unsupported';
}
elseif ($validation_result['case'] == 'Raw row is not delimited') {
elseif (($validation_result['case'] == 'Raw row exceeds number of strict columns') ||
($validation_result['case'] == 'Raw row has insufficient number of columns')) {
$table_case = 'delimited';
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -97,36 +97,33 @@ public function validateRawRow(string $raw_row) {
];
}

// With the list of delimiters identified in the raw row, try each delimiter
// separately to see if number of values is the expected number of columns.
// Store every delimiter that failed into the failed delimiters array.
$delimiters_failed = [];

foreach ($delimiters_used as $delimiter) {
$columns = TripalCultivatePhenotypesValidatorBase::splitRowIntoColumns($raw_row, $input_file_mime_type);
$columns = TripalCultivatePhenotypesValidatorBase::splitRowIntoColumns($raw_row, $input_file_mime_type);
$no_cols = count($columns);

if ($no_cols > $expected_columns['number_of_columns']) {
// The line has more columns than expected.
if ($expected_columns['strict']) {
// A strict comparison - exact match only.
if (count($columns) != $expected_columns['number_of_columns']) {
array_push($delimiters_failed, $delimiter);
}
}
else {
// Not a strict comparison - at least x number of columns.
if (count($columns) < $expected_columns['number_of_columns']) {
array_push($delimiters_failed, $delimiter);
}
return [
'case' => 'Raw row exceeds number of strict columns',
'valid' => FALSE,
'failedItems' => [
'raw_row' => $raw_row,
'expected_columns' => $expected_columns['number_of_columns'],
'strict' => $expected_columns['strict'],
],
];
}
}

// If the failed delimiters array contains the same number of delimiters
// attempted, then every delimiter failed to split the line as required.
if ($delimiters_used == $delimiters_failed) {
if ($no_cols < $expected_columns['number_of_columns']) {
// The line has less column than expected.
return [
'case' => 'Raw row is not delimited',
'case' => 'Raw row has insufficient number of columns',
'valid' => FALSE,
'failedItems' => [
'raw_row' => $raw_row,
'expected_columns' => $expected_columns['number_of_columns'],
'strict' => $expected_columns['strict'],
],
];
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -107,22 +107,45 @@ public function provideRawRowToDelimitedFileValidator() {
],
],

// #2: Not the expected number of columns (strict comparison).
// #2: Not the expected number of columns (more and strict comparison).
[
'column number mismatch',
'column number mismatch - more',
"Data Value One\tData Value Two\tData Value Three\tData Value Four\tData Value Five",
[
'number_of_columns' => 4,
'strict' => TRUE,
],
[
'case' => 'Raw row exceeds number of strict columns',
'valid' => FALSE,
'failedItems' => [
'raw_row' => "Data Value One\tData Value Two\tData Value Three\tData Value Four\tData Value Five",
'expected_columns' => 4,
'strict' => TRUE,
],
],
],

// #3: Not the expected number of columns (less and strict comparison).
[
'column number mismatch - less',
"Data Value One\tData Value Two\tData Value Three",
[
'number_of_columns' => 4,
'strict' => TRUE,
],
[
'case' => 'Raw row is not delimited',
'case' => 'Raw row has insufficient number of columns',
'valid' => FALSE,
'failedItems' => ['raw_row' => "Data Value One\tData Value Two\tData Value Three"],
'failedItems' => [
'raw_row' => "Data Value One\tData Value Two\tData Value Three",
'expected_columns' => 4,
'strict' => TRUE,
],
],
],

// #3: Not the expected number of columns (not strict comparison).
// #4: Not the expected number of columns (not strict comparison).
[
'column number failed minimum',
"Data Value One\tData Value Two\tData Value Three",
Expand All @@ -131,13 +154,17 @@ public function provideRawRowToDelimitedFileValidator() {
'strict' => FALSE,
],
[
'case' => 'Raw row is not delimited',
'case' => 'Raw row has insufficient number of columns',
'valid' => FALSE,
'failedItems' => ['raw_row' => "Data Value One\tData Value Two\tData Value Three"],
'failedItems' => [
'raw_row' => "Data Value One\tData Value Two\tData Value Three",
'expected_columns' => 4,
'strict' => FALSE,
],
],
],

// #4: Line has 2 different delimiters (tab + comma) where one is used to
// #5: Line has 2 different delimiters (tab + comma) where one is used to
// delimit values and the other exists within the values.
[
'two delimiters used',
Expand All @@ -153,7 +180,7 @@ public function provideRawRowToDelimitedFileValidator() {
],
],

// #5: Valid raw row and expecting exactly 4 columns.
// #6: Valid raw row and expecting exactly 4 columns.
[
'valid raw row with exact columns',
"Data Value One\tData Value Two\tData Value Three\tData Value Four",
Expand All @@ -168,7 +195,7 @@ public function provideRawRowToDelimitedFileValidator() {
],
],

// #6: Valid raw row and expecting at least 3 columns.
// #7: Valid raw row and expecting at least 3 columns.
[
'valid raw row with minimum columns',
"Data Value One\tData Value Two\tData Value Three\tData Value Four",
Expand All @@ -183,7 +210,7 @@ public function provideRawRowToDelimitedFileValidator() {
],
],

// #7: Raw row has one column with strict flag set to FALSE (mininum).
// #8: Raw row has one column with strict flag set to FALSE (mininum).
[
'one column with strict set to false',
"Data Value One",
Expand All @@ -198,7 +225,7 @@ public function provideRawRowToDelimitedFileValidator() {
],
],

// #8: Raw row has one column with strict flag set to TRUE (exact match).
// #9: Raw row has one column with strict flag set to TRUE (exact match).
[
'one column with strict flag set to true',
"Data Value One",
Expand Down

0 comments on commit 3d934ca

Please sign in to comment.