@@ -8,8 +8,8 @@ use anyhow::{anyhow, bail, Context as _, Result};
8
8
use aws_config:: meta:: region:: RegionProviderChain ;
9
9
use aws_sdk_s3:: types:: Object ;
10
10
use aws_sdk_s3:: { config:: BehaviorVersion , config:: Region , Client } ;
11
- use clap:: { ArgAction , Parser , Subcommand } ;
12
- use glob_matcher:: { S3Engine , S3GlobMatcher } ;
11
+ use clap:: { ArgAction , Parser , Subcommand , ValueEnum } ;
12
+ use glob_matcher:: { S3Engine , S3GlobMatcher , GLOB_CHARS } ;
13
13
use humansize:: { FormatSizeOptions , SizeFormatter , DECIMAL } ;
14
14
use num_format:: { Locale , ToFormattedString } ;
15
15
use regex:: Regex ;
@@ -77,6 +77,16 @@ enum Command {
77
77
/// The full key name will be reproduced in the directory, so multiple
78
78
/// folders may be created.
79
79
dest : String ,
80
+
81
+ /// Control how S3 object keys are mapped to local file paths
82
+ ///
83
+ /// - absolute | abs: the full key path will be reproduced in the
84
+ /// destination
85
+ /// - from-first-glob | g: the key path relative to the first path part
86
+ /// containing a glob in the pattern will be reproduced in the
87
+ /// destination
88
+ #[ clap( short, long, verbatim_doc_comment, default_value = "from-first-glob" ) ]
89
+ path_mode : PathType ,
80
90
} ,
81
91
82
92
/// Learn how to tune s3glob's parallelism for better performance
@@ -120,6 +130,42 @@ enum Command {
120
130
} ,
121
131
}
122
132
133
+ #[ derive( Debug , Clone , Copy , PartialEq , Eq ) ]
134
+ enum PathType {
135
+ Abs ,
136
+ Absolute ,
137
+ G ,
138
+ FromFirstGlob ,
139
+ }
140
+
141
+ impl ValueEnum for PathType {
142
+ fn value_variants < ' a > ( ) -> & ' a [ Self ] {
143
+ & [
144
+ PathType :: Absolute ,
145
+ PathType :: Abs ,
146
+ PathType :: FromFirstGlob ,
147
+ PathType :: G ,
148
+ ]
149
+ }
150
+
151
+ fn from_str ( s : & str , _ignore_case : bool ) -> Result < Self , String > {
152
+ match s {
153
+ "absolute" | "abs" => Ok ( PathType :: Absolute ) ,
154
+ "from-first-glob" | "g" => Ok ( PathType :: FromFirstGlob ) ,
155
+ _ => Err ( format ! ( "invalid path type: {}" , s) ) ,
156
+ }
157
+ }
158
+
159
+ fn to_possible_value ( & self ) -> Option < clap:: builder:: PossibleValue > {
160
+ match self {
161
+ PathType :: Abs => Some ( clap:: builder:: PossibleValue :: new ( "abs" ) ) ,
162
+ PathType :: Absolute => Some ( clap:: builder:: PossibleValue :: new ( "absolute" ) ) ,
163
+ PathType :: FromFirstGlob => Some ( clap:: builder:: PossibleValue :: new ( "from-first-glob" ) ) ,
164
+ PathType :: G => Some ( clap:: builder:: PossibleValue :: new ( "g" ) ) ,
165
+ }
166
+ }
167
+ }
168
+
123
169
#[ derive( Debug , Parser ) ]
124
170
#[ command( version, author, about, max_term_width = 80 ) ]
125
171
/// A fast aws s3 ls and downloader that supports glob patterns
@@ -228,11 +274,11 @@ async fn run(opts: Opts) -> Result<()> {
228
274
let client = create_s3_client ( & opts, & bucket) . await ?;
229
275
230
276
let prefix = raw_pattern
231
- . find ( [ '*' , '?' , '[' , '{' ] )
277
+ . find ( GLOB_CHARS )
232
278
. map_or ( raw_pattern. clone ( ) , |i| raw_pattern[ ..i] . to_owned ( ) ) ;
233
279
234
280
let engine = S3Engine :: new ( client. clone ( ) , bucket. clone ( ) , opts. delimiter . to_string ( ) ) ;
235
- let matcher = S3GlobMatcher :: parse ( raw_pattern, & opts. delimiter . to_string ( ) ) ?;
281
+ let matcher = S3GlobMatcher :: parse ( raw_pattern. clone ( ) , & opts. delimiter . to_string ( ) ) ?;
236
282
let mut prefixes = match matcher. find_prefixes ( engine) . await {
237
283
Ok ( prefixes) => prefixes,
238
284
Err ( err) => {
@@ -331,7 +377,9 @@ async fn run(opts: Opts) -> Result<()> {
331
377
Duration :: from_millis( start. elapsed( ) . as_millis( ) as u64 )
332
378
) ;
333
379
}
334
- Command :: Download { dest, .. } => {
380
+ Command :: Download {
381
+ dest, path_mode, ..
382
+ } => {
335
383
let mut matching_objects = Vec :: new ( ) ;
336
384
let mut match_count = 0 ;
337
385
while let Some ( PrefixResult {
@@ -356,9 +404,13 @@ async fn run(opts: Opts) -> Result<()> {
356
404
let obj_count = objects. len ( ) ;
357
405
let base_path = Path :: new ( & dest) ;
358
406
let mut total_bytes = 0_usize ;
407
+ let prefix_to_strip = extract_prefix_to_strip ( & raw_pattern, path_mode) ;
359
408
for ( i, obj) in objects. iter ( ) . enumerate ( ) {
360
409
let key = obj. key . as_ref ( ) . unwrap ( ) ;
361
- let path = base_path. join ( key) ;
410
+ let key_suffix = key
411
+ . strip_prefix ( & prefix_to_strip)
412
+ . expect ( "all found objects will include the prefix" ) ;
413
+ let path = base_path. join ( key_suffix) ;
362
414
let dir = path. parent ( ) . unwrap ( ) ;
363
415
std:: fs:: create_dir_all ( dir)
364
416
. with_context ( || format ! ( "Creating directory: {}" , dir. display( ) ) ) ?;
@@ -407,6 +459,23 @@ async fn run(opts: Opts) -> Result<()> {
407
459
Ok ( ( ) )
408
460
}
409
461
462
+ fn extract_prefix_to_strip ( raw_pattern : & str , path_mode : PathType ) -> String {
463
+ match path_mode {
464
+ PathType :: Abs | PathType :: Absolute => String :: new ( ) ,
465
+ PathType :: FromFirstGlob | PathType :: G => {
466
+ let up_to_glob: String = raw_pattern
467
+ . chars ( )
468
+ . take_while ( |c| !GLOB_CHARS . contains ( c) )
469
+ . collect ( ) ;
470
+ // find the last slash in the prefix and only include that
471
+ match up_to_glob. rfind ( '/' ) {
472
+ Some ( slash_idx) => up_to_glob[ ..slash_idx + 1 ] . to_string ( ) ,
473
+ None => up_to_glob,
474
+ }
475
+ }
476
+ }
477
+ }
478
+
410
479
#[ derive( Debug ) ]
411
480
struct PrefixResult {
412
481
#[ allow( dead_code) ]
@@ -607,6 +676,7 @@ pub(crate) fn setup_logging(directive: Option<&str>) {
607
676
608
677
#[ cfg( test) ]
609
678
mod tests {
679
+ #![ allow( clippy:: comparison_to_empty) ]
610
680
use aws_sdk_s3:: types:: Object ;
611
681
use rstest:: rstest;
612
682
@@ -630,4 +700,54 @@ mod tests {
630
700
fn test_format_invalid_variable ( ) {
631
701
assert ! ( compile_format( "{invalid_var}" ) . is_err( ) ) ;
632
702
}
703
+
704
+ macro_rules! assert_extract_prefix_to_strip {
705
+ ( $pattern: expr, $path_mode: expr, $expected: expr) => {
706
+ let actual = extract_prefix_to_strip( $pattern, $path_mode) ;
707
+ assert2:: check!(
708
+ actual == $expected,
709
+ "input: {} path_mode: {:?}" ,
710
+ $pattern,
711
+ $path_mode,
712
+ ) ;
713
+ } ;
714
+ }
715
+
716
+ #[ test]
717
+ fn test_extract_prefix_to_strip ( ) {
718
+ // Test absolute path mode
719
+ assert_extract_prefix_to_strip ! ( "prefix/path/to/*.txt" , PathType :: Absolute , "" ) ;
720
+ assert_extract_prefix_to_strip ! ( "bucket/deep/path/*.txt" , PathType :: Abs , "" ) ;
721
+
722
+ // Test from-first-glob path mode
723
+ assert_extract_prefix_to_strip ! (
724
+ "prefix/path/to/*.txt" ,
725
+ PathType :: FromFirstGlob ,
726
+ "prefix/path/to/"
727
+ ) ;
728
+ assert_extract_prefix_to_strip ! (
729
+ "prefix/path/*/more/*.txt" ,
730
+ PathType :: FromFirstGlob ,
731
+ "prefix/path/"
732
+ ) ;
733
+ assert_extract_prefix_to_strip ! ( "prefix/*.txt" , PathType :: FromFirstGlob , "prefix/" ) ;
734
+ assert_extract_prefix_to_strip ! ( "*.txt" , PathType :: FromFirstGlob , "" ) ;
735
+ assert_extract_prefix_to_strip ! ( "prefix/a.txt" , PathType :: FromFirstGlob , "prefix/" ) ;
736
+ // Test with different glob characters
737
+ assert_extract_prefix_to_strip ! (
738
+ "prefix/path/to/[abc]/*.txt" ,
739
+ PathType :: FromFirstGlob ,
740
+ "prefix/path/to/"
741
+ ) ;
742
+ assert_extract_prefix_to_strip ! (
743
+ "prefix/path/to/?/*.txt" ,
744
+ PathType :: FromFirstGlob ,
745
+ "prefix/path/to/"
746
+ ) ;
747
+ assert_extract_prefix_to_strip ! (
748
+ "prefix/path/{a,b}/*.txt" ,
749
+ PathType :: FromFirstGlob ,
750
+ "prefix/path/"
751
+ ) ;
752
+ }
633
753
}
0 commit comments