Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Replacing SessionState with Session and progress towards moving FileFormatFactory out of datasource #14517

Closed
wants to merge 7 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
16 changes: 9 additions & 7 deletions datafusion-examples/examples/custom_file_format.rs
Original file line number Diff line number Diff line change
Expand Up @@ -21,10 +21,13 @@ use arrow::{
array::{AsArray, RecordBatch, StringArray, UInt8Array},
datatypes::{DataType, Field, Schema, SchemaRef, UInt64Type},
};
use datafusion::common::{GetExt, Statistics};
use datafusion::execution::session_state::SessionStateBuilder;
use datafusion::physical_expr::LexRequirement;
use datafusion::physical_expr::PhysicalExpr;
use datafusion::{
catalog::Session,
common::{GetExt, Statistics},
};
use datafusion::{
datasource::{
file_format::{
Expand All @@ -35,7 +38,6 @@ use datafusion::{
MemTable,
},
error::Result,
execution::context::SessionState,
physical_plan::ExecutionPlan,
prelude::SessionContext,
};
Expand Down Expand Up @@ -83,7 +85,7 @@ impl FileFormat for TSVFileFormat {

async fn infer_schema(
&self,
state: &SessionState,
state: &dyn Session,
store: &Arc<dyn ObjectStore>,
objects: &[ObjectMeta],
) -> Result<SchemaRef> {
Expand All @@ -94,7 +96,7 @@ impl FileFormat for TSVFileFormat {

async fn infer_stats(
&self,
state: &SessionState,
state: &dyn Session,
store: &Arc<dyn ObjectStore>,
table_schema: SchemaRef,
object: &ObjectMeta,
Expand All @@ -106,7 +108,7 @@ impl FileFormat for TSVFileFormat {

async fn create_physical_plan(
&self,
state: &SessionState,
state: &dyn Session,
conf: FileScanConfig,
filters: Option<&Arc<dyn PhysicalExpr>>,
) -> Result<Arc<dyn ExecutionPlan>> {
Expand All @@ -118,7 +120,7 @@ impl FileFormat for TSVFileFormat {
async fn create_writer_physical_plan(
&self,
input: Arc<dyn ExecutionPlan>,
state: &SessionState,
state: &dyn Session,
conf: FileSinkConfig,
order_requirements: Option<LexRequirement>,
) -> Result<Arc<dyn ExecutionPlan>> {
Expand Down Expand Up @@ -148,7 +150,7 @@ impl TSVFileFactory {
impl FileFormatFactory for TSVFileFactory {
fn create(
&self,
state: &SessionState,
state: &dyn Session,
format_options: &std::collections::HashMap<String, String>,
) -> Result<Arc<dyn FileFormat>> {
let mut new_options = format_options.clone();
Expand Down
6 changes: 6 additions & 0 deletions datafusion/catalog-listing/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@ rust-version.workspace = true
version.workspace = true

[dependencies]
apache-avro = { version = "0.17", optional = true }
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

this is unfortunate -- is there any way we can avoid the (direct) avro dependency on the listing table? If not it is fine for this PR I was just wondering

arrow = { workspace = true }
arrow-schema = { workspace = true }
async-compression = { version = "0.4.0", features = [
Expand All @@ -49,9 +50,14 @@ futures = { workspace = true }
glob = "0.3.0"
itertools = { workspace = true }
log = { workspace = true }
num-traits = { version = "0.2", optional = true }
object_store = { workspace = true }
url = { workspace = true }

[features]
# Used to enable the avro format
avro = ["apache-avro", "num-traits", "datafusion-common/avro"]

[dev-dependencies]
async-trait = { workspace = true }
tempfile = { workspace = true }
Expand Down
Loading