diff --git a/Cargo.toml b/Cargo.toml index aa8c31f1..de174011 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -13,7 +13,7 @@ rust-version = "1.70" [dependencies] anyhow = { version = "1.0", optional = true } -arrow = { version = "51", features = ["prettyprint"] } +arrow = { version = "51", features = ["prettyprint", "chrono-tz"] } bytes = "1.4" chrono = { version = "0.4.37", default-features = false, features = ["std"] } chrono-tz = "0.8.6" @@ -72,6 +72,10 @@ path = "./examples/datafusion_integration.rs" name = "orc-metadata" required-features = ["cli"] +[[bin]] +name = "orc-export" +required-features = ["cli"] + [[bin]] name = "orc-stats" required-features = ["cli"] diff --git a/src/bin/orc-export.rs b/src/bin/orc-export.rs new file mode 100644 index 00000000..eab30cda --- /dev/null +++ b/src/bin/orc-export.rs @@ -0,0 +1,42 @@ +use std::{fs::File, io, path::PathBuf}; + +use anyhow::Result; +use arrow::csv; +use clap::Parser; +use orc_rust::ArrowReaderBuilder; + +#[derive(Parser)] +#[command(name = "orc-export")] +#[command(version, about = "Export data from orc file to csv", long_about = None)] +struct Cli { + /// Path to the orc file + file: PathBuf, + /// Output file. If not provided output will be printed on console + #[arg(short, long)] + output: Option, + // TODO: head=N + // TODO: convert_dates + // TODO: format=[csv|json] + // TODO: columns="col1,col2" +} + +fn main() -> Result<()> { + let cli = Cli::parse(); + let f = File::open(&cli.file)?; + let output_writer: Box = if let Some(output) = cli.output { + Box::new(File::create(output)?) + } else { + Box::new(io::stdout()) + }; + + let reader = ArrowReaderBuilder::try_new(f)?.build(); + let mut writer = csv::WriterBuilder::new() + .with_header(true) + .build(output_writer); + + for batch in reader.flatten() { + writer.write(&batch)?; + } + + Ok(()) +}