Skip to content

Commit

Permalink
Add flags to control generated files
Browse files Browse the repository at this point in the history
Make dictionary processing async.

Signed-off-by: Tin Švagelj <tin.svagelj@live.com>
  • Loading branch information
Caellian committed Jun 21, 2024
1 parent 592593c commit 3504a7b
Show file tree
Hide file tree
Showing 5 changed files with 213 additions and 95 deletions.
1 change: 0 additions & 1 deletion Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,6 @@ edition = "2021"
description = "Wikipedia dump download and cleanup tool"
authors = ["Tin Švagelj <tin.svagelj@live.com>"]
license = "GPLv3"
license-file = "LICENSE"

[dependencies]
tokio = { version = "1.38", features = ["full"] }
Expand Down
6 changes: 3 additions & 3 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -48,7 +48,7 @@ manager of choice) to compile the program.
All configuration is done through CLI arguments.

Print arguments with:
```
```sh
wiki-extractor --help
wiki-extractor local --help
wiki-extractor remote --help
Expand All @@ -58,9 +58,9 @@ wiki-extractor remote --help

- Find a closest [mirror](https://dumps.wikimedia.org/mirrors.html) to make the download faster.
- Used URL should be the part before `/enwiki/latest` part once you locate the dump files.
- Replace the command below with
- Replace the URL in command below with your mirror of choice:

```
```sh
wiki-extractor remote https://dumps.wikimedia.org/ -L en -w latest -o
```

Expand Down
15 changes: 12 additions & 3 deletions src/main.rs
Original file line number Diff line number Diff line change
Expand Up @@ -47,9 +47,12 @@ pub struct Args {
#[arg(short = 'o', long = "output", default_value = "./dump")]
pub output: std::path::PathBuf,

/// Selection of generated files.
#[clap(flatten)]
pub generator: output::options::GeneratorOptions,
/// Options for text dump generation.
#[clap(flatten)]
pub text_options: output::options::TextOptions,
pub text: output::options::TextOptions,
}

fn main() -> anyhow::Result<()> {
Expand All @@ -60,9 +63,15 @@ fn main() -> anyhow::Result<()> {
let Args {
input,
output,
text_options,
generator: generator_options,
text: text_options,
} = Args::parse();

if !generator_options.any() {
log::info!("Nothing to do. See `--help` for list of generators.");
std::process::exit(0);
}

let rt = tokio::runtime::Builder::new_multi_thread()
.enable_all()
.build()
Expand All @@ -75,7 +84,7 @@ fn main() -> anyhow::Result<()> {
std::process::exit(1);
}

let mut gen = DataGenerator::new(output, text_options)?;
let mut gen = DataGenerator::new(output, generator_options, text_options)?;

if let Some(updated) = dump.updated {
log::info!("Dump creation date: {updated}");
Expand Down
Loading

0 comments on commit 3504a7b

Please sign in to comment.