Skip to content

Commit

Permalink
Enable empty_file integration test by fixing MapArray children names
Browse files Browse the repository at this point in the history
  • Loading branch information
Jefffrey committed Mar 23, 2024
1 parent e56f1de commit 04a5050
Showing 1 changed file with 44 additions and 6 deletions.
50 changes: 44 additions & 6 deletions tests/integration/main.rs
Original file line number Diff line number Diff line change
Expand Up @@ -55,12 +55,6 @@ fn column_projection() {
test_expected_file("TestOrcFile.columnProjection");
}

#[test]
#[ignore] // TODO: nullable difference
fn empty_file() {
test_expected_file("TestOrcFile.emptyFile");
}

#[test]
#[ignore] // TODO: Why?
fn meta_data() {
Expand Down Expand Up @@ -111,6 +105,50 @@ fn test1() {
assert_eq!(actual_batch, expected_batch);
}

#[test]
fn empty_file() {
let actual_batch = read_orc_file("TestOrcFile.emptyFile");
let expected_batch = read_feather_file("TestOrcFile.emptyFile");

// Super ugly code to rename the "key" and "value" in PyArrow MapArray to
// "keys" and "values" which arrow-rs does
// TODO: surely there is some better way to handle this?
let mut fields = expected_batch.schema().fields[..11].to_vec();
let entries_fields: Fields = vec![
Field::new("keys", DataType::Utf8, false),
Field::new(
"values",
DataType::Struct(
vec![
Field::new("int1", DataType::Int32, true),
Field::new("string1", DataType::Utf8, true),
]
.into(),
),
true,
),
]
.into();
let entries_field = Arc::new(Field::new_struct("entries", entries_fields.clone(), false));
let map_field = Field::new("map", DataType::Map(entries_field.clone(), false), true);
fields.push(Arc::new(map_field));
let schema = Arc::new(Schema::new(fields));
let mut columns = expected_batch.columns()[..11].to_vec();
// Have to destruct the MapArray inorder to reconstruct with correct names for
// MapArray struct children
let map_array = expected_batch.column(11).as_map().clone();
let (_, offsets, entries, nulls, ordered) = map_array.into_parts();
let entries = {
let (_, arrays, nulls) = entries.into_parts();
StructArray::new(entries_fields, arrays, nulls)
};
let map_array = MapArray::new(entries_field, offsets, entries, nulls, ordered);
columns.push(Arc::new(map_array));
let expected_batch = RecordBatch::try_new(schema, columns).unwrap();

assert_eq!(actual_batch, expected_batch);
}

#[test]
#[ignore] // TODO: Incorrect timezone + representation differs
fn test_date_1900() {
Expand Down

0 comments on commit 04a5050

Please sign in to comment.