Skip to content

Commit

Permalink
feat: Better filtering option for arx extract (#107)
Browse files Browse the repository at this point in the history
  • Loading branch information
mgautierfr authored Feb 2, 2025
2 parents df9d6b0 + 908a083 commit a0073f7
Show file tree
Hide file tree
Showing 9 changed files with 392 additions and 190 deletions.
7 changes: 7 additions & 0 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

1 change: 1 addition & 0 deletions arx/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@ env_logger = "0.11.5"
log = "0.4.20"
tempfile = "3.10.1"
libc = "0.2.158"
glob = "0.3.1"
color-print = "0.3.7"


Expand Down
152 changes: 99 additions & 53 deletions arx/src/extract.rs
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
use arx::FileFilter;
use clap::{Parser, ValueHint};
use log::info;
use std::collections::HashSet;
Expand All @@ -6,12 +7,57 @@ use std::fs::File;
use std::io::{BufRead, BufReader};
use std::path::PathBuf;

const AFTER_LONG_HELP: &str = color_print::cstr!("
<s,u>Filters</>
Arx support three kinds of filter.
- Simple values given as [EXTRACT_FILES].
- Glob given with <s>--glob</> option.
- File list given with <s>--file-list</> option.
All filters are exclusives. You can pass only one kind of filter.
Filters are relative to root directory.
Filters are only filtering entries to extract. No renaming is made.
If not filter is given, all entries under root directory are accepted.
<u>EXTRACT_FILES</>
List files to extracts.
This filter is recursive. If you give a directory, all files/subdirctory in the given
directory will also be extracted.
<u>GLOB</>
A glob pattern to match files/directory to extract.
This filter is not recursive. If you want to extract all file under a directory foo, use <K!>foo/**/*</>
- <K!>?</> matches any single character.
- <K!>*</> matches any (possibly empty) sequence of characters.
- <K!>**</> matches the current directory and arbitrary subdirectories. This sequence must form a single path component, so both <K!>**a</> and <K!>b**</> are invalid and will result in an error. A sequence of more than two consecutive <K!>*</> characters is also invalid.
- <K!>[...]</> matches any character inside the brackets. Character sequences can also specify ranges of characters, as ordered by Unicode, so e.g. <K!>[0-9]</> specifies any character between 0 and 9 inclusive. An unclosed bracket is invalid.
- <K!>[!...]</> is the negation of <K!>[...]</>, i.e. it matches any characters not in the brackets.
- The metacharacters <K!>?</>, <K!>*</>, <K!>[</>, <K!>]</> can be matched by using brackets (e.g. <K!>[?]</>). When a <K!>]</> occurs immediately following <K!>[</> or <K!>[!</> then it is interpreted as being part of, rather then ending, the character set, so <K!>]</> and NOT <K!>]</> can be matched by <K!>[]]</> and <K!>[!]]</> respectively. The <K!>-</> character can be specified inside a character sequence pattern by placing it at the start or the end, e.g. <K!>[abc-]</>.
<u>FILE_LIST</>
A plain file listing all files/directory to extract (one per line).
This filter is not recursive.
This filter <i>early exits</>. You must give all parent directory to extract a file.
<s,u>Root Directory</>
By default, arx extracts from the root directory of the archive.
<s>--root-dir</> option allow to change the root directory.
This is equivalent to a (virtual) cd in the root directory before walking the tree and apply filter.");

/// Extract the content of an archive
#[derive(Parser, Debug)]
#[command(after_long_help=AFTER_LONG_HELP)]
pub struct Options {
/// Archive to read
#[arg(value_hint=ValueHint::FilePath, required_unless_present("infile_old"))]
infile: Option<PathBuf>,
#[arg(value_hint=ValueHint::FilePath)]
infile: PathBuf,

/// Directory in which extract the archive. (Default to current directory)
#[arg(short = 'C', required = false, value_hint=ValueHint::DirPath)]
Expand All @@ -38,81 +84,82 @@ pub struct Options {
)]
file_list: Option<PathBuf>,

/// Use a glob pattern to filter file to extract
#[arg(short = 'g', long, group = "input")]
glob: Option<String>,

#[arg(from_global)]
verbose: u8,

/// Recursively extract directories
///
/// Default value is true if `EXTRACT_FILES` is passed and false is `FILE_LIST` is passed.
#[arg(
short,
long,
required = false,
default_value_t = false,
default_value_ifs([
("no_recurse", clap::builder::ArgPredicate::IsPresent, "false"),
("extract_files", clap::builder::ArgPredicate::IsPresent, "true")
]),
conflicts_with = "no_recurse",
action
)]
recurse: bool,

/// Force `--recurse` to be false.
#[arg(long)]
no_recurse: bool,

#[arg(
short = 'f',
long = "file",
hide = true,
conflicts_with("infile"),
required_unless_present("infile")
)]
infile_old: Option<PathBuf>,

#[arg(long, default_value = "warn")]
overwrite: arx::Overwrite,
}

fn get_files_to_extract(options: &Options) -> std::io::Result<HashSet<arx::PathBuf>> {
fn get_extract_filter(options: &Options) -> anyhow::Result<Box<dyn FileFilter>> {
if let Some(file_list) = &options.file_list {
let file = File::open(file_list)?;
let mut files: HashSet<arx::PathBuf> = Default::default();
for line in BufReader::new(file).lines() {
files.insert(line?.into());
}
Ok(files)
Ok(Box::new(files))
} else if let Some(pattern) = &options.glob {
Ok(Box::new(PatternFilter(glob::Pattern::new(pattern)?)))
} else if !options.extract_files.is_empty() {
Ok(Box::new(SimpleFileList(options.extract_files.clone())))
} else {
Ok(options.extract_files.iter().cloned().collect())
Ok(Box::new(()))
}
}

struct PatternFilter(pub glob::Pattern);

impl arx::FileFilter for PatternFilter {
fn accept(&self, path: &arx::Path) -> bool {
const MATCH_OPTIONS: glob::MatchOptions = glob::MatchOptions {
case_sensitive: true,
require_literal_separator: true,
require_literal_leading_dot: false,
};
self.0.matches_with(path.as_str(), MATCH_OPTIONS)
}

fn early_exit(&self) -> bool {
false
}
}

struct SimpleFileList(pub Vec<arx::PathBuf>);

impl arx::FileFilter for SimpleFileList {
fn accept(&self, path: &arx::Path) -> bool {
for accepted_path in &self.0 {
if accepted_path == path || accepted_path.starts_with(path) {
return true;
}
}
false
}

fn early_exit(&self) -> bool {
false
}
}

type DummyBuilder = ((), (), ());

pub fn extract(options: Options) -> anyhow::Result<()> {
let files_to_extract = get_files_to_extract(&options)?;
let filter = get_extract_filter(&options)?;
let outdir = match options.outdir {
Some(o) => o,
None => current_dir()?,
};
let infile = if let Some(ref infile) = options.infile_old {
infile
} else {
options.infile.as_ref().unwrap()
};
let arx = arx::Arx::new(infile)?;
info!("Extract archive {:?} in {:?}", &infile, outdir);

let arx = arx::Arx::new(&options.infile)?;
info!("Extract archive {:?} in {:?}", &options.infile, outdir);

match options.root_dir {
None => arx::extract_arx(
&arx,
&outdir,
files_to_extract,
options.recurse,
options.progress,
options.overwrite,
)?,
None => arx::extract_arx(&arx, &outdir, filter, options.progress, options.overwrite)?,
Some(p) => {
let relative_path = arx::Path::from_path(&p)?;
let root = arx.get_entry::<DummyBuilder>(relative_path)?;
Expand All @@ -121,8 +168,7 @@ pub fn extract(options: Options) -> anyhow::Result<()> {
&arx,
&outdir,
&range,
files_to_extract,
options.recurse,
filter,
options.progress,
options.overwrite,
)?,
Expand Down
Loading

0 comments on commit a0073f7

Please sign in to comment.