From ae5e5b655bae0878a2dffa98e498ac91fbde1ecd Mon Sep 17 00:00:00 2001 From: klangner Date: Thu, 18 Apr 2024 15:43:24 +0200 Subject: [PATCH 1/2] #62 Added cli tool to export data in a csv format --- Cargo.toml | 6 +++++- src/bin/orc-export.rs | 42 ++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 47 insertions(+), 1 deletion(-) create mode 100644 src/bin/orc-export.rs diff --git a/Cargo.toml b/Cargo.toml index aa8c31f1..de174011 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -13,7 +13,7 @@ rust-version = "1.70" [dependencies] anyhow = { version = "1.0", optional = true } -arrow = { version = "51", features = ["prettyprint"] } +arrow = { version = "51", features = ["prettyprint", "chrono-tz"] } bytes = "1.4" chrono = { version = "0.4.37", default-features = false, features = ["std"] } chrono-tz = "0.8.6" @@ -72,6 +72,10 @@ path = "./examples/datafusion_integration.rs" name = "orc-metadata" required-features = ["cli"] +[[bin]] +name = "orc-export" +required-features = ["cli"] + [[bin]] name = "orc-stats" required-features = ["cli"] diff --git a/src/bin/orc-export.rs b/src/bin/orc-export.rs new file mode 100644 index 00000000..04446c91 --- /dev/null +++ b/src/bin/orc-export.rs @@ -0,0 +1,42 @@ +use std::{fs::File, io, path::PathBuf}; + +use anyhow::Result; +use arrow::csv; +use clap::Parser; +use orc_rust::ArrowReaderBuilder; + +#[derive(Parser)] +#[command(name = "orc-export")] +#[command(version, about = "Export data from orc file to csv", long_about = None)] +struct Cli { + /// Path to the orc file + file: PathBuf, + /// Output file + #[arg(short, long)] + output: Option, + // TODO: head=N + // TODO: convert_dates + // TODO: format=[csv|json] + // TODO: columns="col1,col2" +} + +fn main() -> Result<()> { + let cli = Cli::parse(); + let f = File::open(&cli.file)?; + let output_writer: Box = if let Some(output) = cli.output { + Box::new(File::create(output).unwrap()) + } else { + Box::new(io::stdout()) + }; + + let reader = ArrowReaderBuilder::try_new(f).unwrap().build(); + let mut writer = csv::WriterBuilder::new() + .with_header(true) + .build(output_writer); + + for batch in reader.flatten() { + writer.write(&batch)?; + } + + Ok(()) +} From 0d71b09d8f30a843364815a5c4ec90eade019844 Mon Sep 17 00:00:00 2001 From: klangner Date: Fri, 19 Apr 2024 09:16:10 +0200 Subject: [PATCH 2/2] CR fixes --- src/bin/orc-export.rs | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/bin/orc-export.rs b/src/bin/orc-export.rs index 04446c91..eab30cda 100644 --- a/src/bin/orc-export.rs +++ b/src/bin/orc-export.rs @@ -11,7 +11,7 @@ use orc_rust::ArrowReaderBuilder; struct Cli { /// Path to the orc file file: PathBuf, - /// Output file + /// Output file. If not provided output will be printed on console #[arg(short, long)] output: Option, // TODO: head=N @@ -24,12 +24,12 @@ fn main() -> Result<()> { let cli = Cli::parse(); let f = File::open(&cli.file)?; let output_writer: Box = if let Some(output) = cli.output { - Box::new(File::create(output).unwrap()) + Box::new(File::create(output)?) } else { Box::new(io::stdout()) }; - let reader = ArrowReaderBuilder::try_new(f).unwrap().build(); + let reader = ArrowReaderBuilder::try_new(f)?.build(); let mut writer = csv::WriterBuilder::new() .with_header(true) .build(output_writer);