Skip to content

Commit

Permalink
Merge pull request #110 from coralogix/arrow23
Browse files Browse the repository at this point in the history
Update to arrow-rrs 23
  • Loading branch information
thinkharderdev authored Sep 29, 2022
2 parents f3cdf6b + 7f45d3d commit 7fe5641
Show file tree
Hide file tree
Showing 75 changed files with 1,345 additions and 482 deletions.
102 changes: 0 additions & 102 deletions .github/workflows/rust.yml
Original file line number Diff line number Diff line change
Expand Up @@ -165,108 +165,6 @@ jobs:
POSTGRES_USER: postgres
POSTGRES_PASSWORD: postgres

windows:
name: cargo test (win64)
runs-on: windows-latest
steps:
- uses: actions/checkout@v3
with:
submodules: true
- name: Install protobuf compiler
shell: bash
run: |
mkdir -p $HOME/d/protoc
cd $HOME/d/protoc
export PROTO_ZIP="protoc-21.4-win64.zip"
curl -LO https://github.com/protocolbuffers/protobuf/releases/download/v21.4/$PROTO_ZIP
unzip $PROTO_ZIP
export PATH=$PATH:$HOME/d/protoc/bin
protoc.exe --version
# TODO: this won't cache anything, which is expensive. Setup this action
# with a OS-dependent path.
- name: Setup Rust toolchain
run: |
rustup toolchain install stable
rustup default stable
rustup component add rustfmt
- name: Run tests
shell: bash
run: |
export PATH=$PATH:$HOME/d/protoc/bin
cargo test
env:
# do not produce debug symbols to keep memory usage down
RUSTFLAGS: "-C debuginfo=0"

macos:
name: cargo test (mac)
runs-on: macos-latest
steps:
- uses: actions/checkout@v3
with:
submodules: true
- name: Install protobuf compiler
shell: bash
run: |
mkdir -p $HOME/d/protoc
cd $HOME/d/protoc
export PROTO_ZIP="protoc-21.4-osx-x86_64.zip"
curl -LO https://github.com/protocolbuffers/protobuf/releases/download/v21.4/$PROTO_ZIP
unzip $PROTO_ZIP
echo "$HOME/d/protoc/bin" >> $GITHUB_PATH
export PATH=$PATH:$HOME/d/protoc/bin
protoc --version
# TODO: this won't cache anything, which is expensive. Setup this action
# with a OS-dependent path.
- name: Setup Rust toolchain
run: |
rustup toolchain install stable
rustup default stable
rustup component add rustfmt
- name: Run tests
shell: bash
run: |
cargo test
env:
# do not produce debug symbols to keep memory usage down
RUSTFLAGS: "-C debuginfo=0"

test-datafusion-pyarrow:
name: cargo test pyarrow (amd64)
needs: [linux-build-lib]
runs-on: ubuntu-latest
container:
image: amd64/rust
env:
# Disable full debug symbol generation to speed up CI build and keep memory down
# "1" means line tables only, which is useful for panic tracebacks.
RUSTFLAGS: "-C debuginfo=1"
steps:
- uses: actions/checkout@v3
with:
submodules: true
- name: Cache Cargo
uses: actions/cache@v3
with:
path: /github/home/.cargo
# this key equals the ones on `linux-build-lib` for re-use
key: cargo-cache-
- uses: actions/setup-python@v4
with:
python-version: "3.8"
- name: Install PyArrow
run: |
echo "LIBRARY_PATH=$LD_LIBRARY_PATH" >> $GITHUB_ENV
python -m pip install pyarrow
- name: Setup Rust toolchain
uses: ./.github/actions/setup-builder
with:
rust-version: stable
- name: Run tests
run: |
cd datafusion
cargo test --features=pyarrow
lint:
name: Lint
runs-on: ubuntu-latest
Expand Down
1 change: 1 addition & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -88,6 +88,7 @@ Here are some of the projects known to use DataFusion:
- [delta-rs](https://github.com/delta-io/delta-rs) Native Rust implementation of Delta Lake
- [Flock](https://github.com/flock-lab/flock)
- [InfluxDB IOx](https://github.com/influxdata/influxdb_iox) Time Series Database
- [Parseable](https://github.com/parseablehq/parseable) Log storage and observability platform
- [qv](https://github.com/timvw/qv) Quickly view your data
- [ROAPI](https://github.com/roapi/roapi)
- [Tensorbase](https://github.com/tensorbase/tensorbase)
Expand Down
2 changes: 1 addition & 1 deletion datafusion-cli/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@ rust-version = "1.62"
readme = "README.md"

[dependencies]
arrow = "22.0.0"
arrow = "23.0.0"
clap = { version = "3", features = ["derive", "cargo"] }
datafusion = { path = "../datafusion/core", version = "12.0.0" }
dirs = "4.0.0"
Expand Down
2 changes: 1 addition & 1 deletion datafusion-examples/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,7 @@ path = "examples/avro_sql.rs"
required-features = ["datafusion/avro"]

[dev-dependencies]
arrow-flight = "22.0.0"
arrow-flight = "23.0.0"
async-trait = "0.1.41"
datafusion = { path = "../datafusion/core" }
futures = "0.3"
Expand Down
5 changes: 4 additions & 1 deletion datafusion-examples/examples/flight_server.rs
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@ use std::pin::Pin;
use std::sync::Arc;

use arrow_flight::SchemaAsIpc;
use datafusion::arrow::error::ArrowError;
use datafusion::datasource::file_format::parquet::ParquetFormat;
use datafusion::datasource::listing::{ListingOptions, ListingTableUrl};
use futures::Stream;
Expand Down Expand Up @@ -77,7 +78,9 @@ impl FlightService for FlightServiceImpl {
.unwrap();

let options = datafusion::arrow::ipc::writer::IpcWriteOptions::default();
let schema_result = SchemaAsIpc::new(&schema, &options).into();
let schema_result = SchemaAsIpc::new(&schema, &options)
.try_into()
.map_err(|e: ArrowError| tonic::Status::internal(e.to_string()))?;

Ok(Response::new(schema_result))
}
Expand Down
5 changes: 2 additions & 3 deletions datafusion/common/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -39,12 +39,11 @@ pyarrow = ["pyo3"]

[dependencies]
apache-avro = { version = "0.14", features = ["snappy"], optional = true }
arrow = { version = "22.0.0", features = ["prettyprint"] }
arrow = { version = "23.0.0", features = ["prettyprint"] }
avro-rs = { version = "0.13", features = ["snappy"], optional = true }
cranelift-module = { version = "0.87.0", optional = true }
object_store = { version = "0.5.0", optional = true }
ordered-float = "3.0"
parquet = { version = "22.0.0", features = ["arrow"], optional = true }
parquet = { version = "23.0.0", features = ["arrow"], optional = true }
pyo3 = { version = "0.17.1", optional = true }
serde_json = "1.0"
sqlparser = "0.23"
6 changes: 4 additions & 2 deletions datafusion/common/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,7 @@ pub use scalar::{ScalarType, ScalarValue};
#[macro_export]
macro_rules! downcast_value {
($Value: expr, $Type: ident) => {{
use std::any::type_name;
$Value.as_any().downcast_ref::<$Type>().ok_or_else(|| {
DataFusionError::Internal(format!(
"could not cast value to {}",
Expand All @@ -43,10 +44,11 @@ macro_rules! downcast_value {
})?
}};
($Value: expr, $Type: ident, $T: tt) => {{
$Value.as_any().downcast_ref::<$Type<T>>().ok_or_else(|| {
use std::any::type_name;
$Value.as_any().downcast_ref::<$Type<$T>>().ok_or_else(|| {
DataFusionError::Internal(format!(
"could not cast value to {}",
type_name::<$Type<T>>()
type_name::<$Type<$T>>()
))
})?
}};
Expand Down
6 changes: 3 additions & 3 deletions datafusion/core/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -56,7 +56,7 @@ unicode_expressions = ["datafusion-physical-expr/regex_expressions", "datafusion
[dependencies]
ahash = { version = "0.8", default-features = false, features = ["runtime-rng"] }
apache-avro = { version = "0.14", optional = true }
arrow = { version = "22.0.0", features = ["prettyprint"] }
arrow = { version = "23.0.0", features = ["prettyprint"] }
async-trait = "0.1.41"
bytes = "1.1"
chrono = { version = "0.4", default-features = false }
Expand All @@ -78,7 +78,7 @@ num_cpus = "1.13.0"
object_store = "0.5.0"
ordered-float = "3.0"
parking_lot = "0.12"
parquet = { version = "22.0.0", features = ["arrow", "async"] }
parquet = { version = "23.0.0", features = ["arrow", "async"] }
paste = "^1.0"
pin-project-lite = "^0.2.7"
pyo3 = { version = "0.17.1", optional = true }
Expand All @@ -93,7 +93,7 @@ url = "2.2"
uuid = { version = "1.0", features = ["v4"] }

[dev-dependencies]
arrow = { version = "22.0.0", features = ["prettyprint", "dyn_cmp_dict"] }
arrow = { version = "23.0.0", features = ["prettyprint", "dyn_cmp_dict"] }
async-trait = "0.1.53"
criterion = "0.4"
csv = "1.1.6"
Expand Down
2 changes: 1 addition & 1 deletion datafusion/core/fuzz-utils/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,6 @@ edition = "2021"
# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html

[dependencies]
arrow = { version = "22.0.0", features = ["prettyprint"] }
arrow = { version = "23.0.0", features = ["prettyprint"] }
env_logger = "0.9.0"
rand = "0.8"
7 changes: 2 additions & 5 deletions datafusion/core/src/avro_to_arrow/arrow_array_reader.rs
Original file line number Diff line number Diff line change
Expand Up @@ -20,8 +20,7 @@
use crate::arrow::array::{
make_array, Array, ArrayBuilder, ArrayData, ArrayDataBuilder, ArrayRef,
BooleanBuilder, LargeStringArray, ListBuilder, NullArray, OffsetSizeTrait,
PrimitiveArray, PrimitiveBuilder, StringArray, StringBuilder,
StringDictionaryBuilder,
PrimitiveArray, StringArray, StringBuilder, StringDictionaryBuilder,
};
use crate::arrow::buffer::{Buffer, MutableBuffer};
use crate::arrow::datatypes::{
Expand Down Expand Up @@ -171,9 +170,7 @@ impl<'a, R: Read> AvroArrowArrayReader<'a, R> {
where
T: ArrowPrimitiveType + ArrowDictionaryKeyType,
{
let key_builder = PrimitiveBuilder::<T>::with_capacity(row_len);
let values_builder = StringBuilder::with_capacity(row_len, 5);
StringDictionaryBuilder::new(key_builder, values_builder)
StringDictionaryBuilder::with_capacity(row_len, row_len, row_len)
}

fn build_wrapped_list_array(
Expand Down
34 changes: 33 additions & 1 deletion datafusion/core/src/config.rs
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,9 @@ pub const OPT_COALESCE_TARGET_BATCH_SIZE: &str =
pub const OPT_OPTIMIZER_SKIP_FAILED_RULES: &str =
"datafusion.optimizer.skip_failed_rules";

/// Configuration option "datafusion.execution.time_zone"
pub const OPT_TIME_ZONE: &str = "datafusion.execution.time_zone";

/// Definition of a configuration option
pub struct ConfigDefinition {
/// key used to identifier this configuration option
Expand Down Expand Up @@ -102,6 +105,20 @@ impl ConfigDefinition {
ScalarValue::UInt64(Some(default_value)),
)
}

/// Create a configuration option definition with a string value
pub fn new_string(
key: impl Into<String>,
description: impl Into<String>,
default_value: String,
) -> Self {
Self::new(
key,
description,
DataType::Utf8,
ScalarValue::Utf8(Some(default_value)),
)
}
}

/// Contains definitions for all built-in configuration options
Expand Down Expand Up @@ -167,7 +184,14 @@ impl BuiltInConfigs {
messages if any optimization rules produce errors and then proceed to the next \
rule. When set to false, any rules that produce errors will cause the query to fail.",
true
)],
),
ConfigDefinition::new_string(
OPT_TIME_ZONE,
"The session time zone which some function require \
e.g. EXTRACT(HOUR from SOME_TIME) shift the underline datetime according to the time zone,
then extract the hour",
"UTC".into()
)]
}
}

Expand Down Expand Up @@ -279,6 +303,14 @@ impl ConfigOptions {
}
}

/// get a string configuration option
pub fn get_string(&self, key: &str) -> String {
match self.get(key) {
Some(ScalarValue::Utf8(Some(s))) => s,
_ => "".into(),
}
}

/// Access the underlying hashmap
pub fn options(&self) -> &HashMap<String, ScalarValue> {
&self.options
Expand Down
Loading

0 comments on commit 7fe5641

Please sign in to comment.