From 1a4f8fd268f056b11fdae7d7f6867367e02eb729 Mon Sep 17 00:00:00 2001 From: abradley60 Date: Thu, 23 Jan 2025 17:04:18 +1100 Subject: [PATCH 1/5] s3 upload tool --- .../nci/upload/push_folder_to_s3.py | 77 +++++++++++++++++++ 1 file changed, 77 insertions(+) create mode 100644 sar_antarctica/nci/upload/push_folder_to_s3.py diff --git a/sar_antarctica/nci/upload/push_folder_to_s3.py b/sar_antarctica/nci/upload/push_folder_to_s3.py new file mode 100644 index 0000000..61a28c2 --- /dev/null +++ b/sar_antarctica/nci/upload/push_folder_to_s3.py @@ -0,0 +1,77 @@ +import boto3 +from pathlib import Path +import os +import click + +@click.command() +@click.argument('src_folder', type=click.Path(exists=True, file_okay=False)) +@click.argument('s3_bucket') +@click.argument('s3_bucket_folder') +@click.option('--exclude-extensions', '-e', multiple=True, help="File extensions to exclude, e.g., '.txt', '.log'") +@click.option('--exclude-files', '-f', multiple=True, help="Specific files to exclude, e.g., 'config.json'") +@click.option('--aws-access-key-id', help="AWS access key ID") +@click.option('--aws-secret-access-key', help="AWS secret access key") +@click.option('--aws-session-token', help="AWS session token") +@click.option('--region-name', default='ap-southeast-2', show_default=True, help="AWS region name") +def push_files_in_folder_to_s3( + src_folder : str, + s3_bucket : str, + s3_bucket_folder : str, + exclude_extensions : list[str] = [], + exclude_files : list[str] = [], + aws_access_key_id : str = None, + aws_secret_access_key : str = None, + aws_session_token : str = None, + region_name : str = 'ap-southeast-2' +): + """Upload the files in a local folder to an S3 bucket. The subfolder + structure in the specified folder is maintained in s3. + + Parameters + ---------- + src_folder : Path + Source folder containing files of interest + s3_bucket : Path + S3 bucket to push to + s3_bucket_folder : Path + Folder within bucket to push to + exclude_extensions : list[str], optional + List of file extensions to exclude, by default [] + exclude_files : list[str], optional + List of files to exclude, by default [] + aws_access_key_id : str, optional + _description_, by default None + aws_secret_access_key : str, optional + _description_, by default None + aws_session_token : str, optional + _description_, by default None + region_name : str, optional + _description_, by default 'ap-southeast-2' + """ + + + S3_CLIENT = boto3.client( + 's3', + aws_access_key_id=aws_access_key_id, + aws_secret_access_key=aws_secret_access_key, + aws_session_token=aws_session_token, + region_name=region_name + ) + + for root, dirs, files in os.walk(src_folder): + for file in files: + if exclude_extensions: + filename, file_extension = os.path.splitext(file) + if file_extension in exclude_extensions: + continue + if file in exclude_files: + continue + local_path = Path(root) / Path(file) + relative_path = Path(os.path.relpath(local_path, src_folder)) + s3_key = Path(os.path.join(s3_bucket_folder, relative_path).replace("\\", "/")) + S3_CLIENT.upload_file(str(local_path), str(s3_bucket), str(s3_key)) + print(f"Uploaded {local_path} to s3://{s3_bucket}/{s3_key}") + +if __name__ == "__main__": + + push_files_in_folder_to_s3() \ No newline at end of file From bc5a43419a10c783e72140a9a606356985e66c0f Mon Sep 17 00:00:00 2001 From: abradley60 Date: Fri, 24 Jan 2025 13:31:28 +1100 Subject: [PATCH 2/5] removing creds from cli --- .../nci/upload/push_folder_to_s3.py | 41 +++++++++---------- 1 file changed, 20 insertions(+), 21 deletions(-) diff --git a/sar_antarctica/nci/upload/push_folder_to_s3.py b/sar_antarctica/nci/upload/push_folder_to_s3.py index 61a28c2..348f55f 100644 --- a/sar_antarctica/nci/upload/push_folder_to_s3.py +++ b/sar_antarctica/nci/upload/push_folder_to_s3.py @@ -2,6 +2,7 @@ from pathlib import Path import os import click +import logging @click.command() @click.argument('src_folder', type=click.Path(exists=True, file_okay=False)) @@ -9,9 +10,6 @@ @click.argument('s3_bucket_folder') @click.option('--exclude-extensions', '-e', multiple=True, help="File extensions to exclude, e.g., '.txt', '.log'") @click.option('--exclude-files', '-f', multiple=True, help="Specific files to exclude, e.g., 'config.json'") -@click.option('--aws-access-key-id', help="AWS access key ID") -@click.option('--aws-secret-access-key', help="AWS secret access key") -@click.option('--aws-session-token', help="AWS session token") @click.option('--region-name', default='ap-southeast-2', show_default=True, help="AWS region name") def push_files_in_folder_to_s3( src_folder : str, @@ -19,45 +17,43 @@ def push_files_in_folder_to_s3( s3_bucket_folder : str, exclude_extensions : list[str] = [], exclude_files : list[str] = [], - aws_access_key_id : str = None, - aws_secret_access_key : str = None, - aws_session_token : str = None, - region_name : str = 'ap-southeast-2' + region_name : str = 'ap-southeast-2', ): """Upload the files in a local folder to an S3 bucket. The subfolder structure in the specified folder is maintained in s3. Parameters ---------- - src_folder : Path + src_folder : str Source folder containing files of interest - s3_bucket : Path + s3_bucket : str S3 bucket to push to - s3_bucket_folder : Path + s3_bucket_folder : str Folder within bucket to push to exclude_extensions : list[str], optional List of file extensions to exclude, by default [] exclude_files : list[str], optional List of files to exclude, by default [] - aws_access_key_id : str, optional - _description_, by default None - aws_secret_access_key : str, optional - _description_, by default None - aws_session_token : str, optional - _description_, by default None region_name : str, optional _description_, by default 'ap-southeast-2' """ - + + # search for credentials in envrionment and raise warning if not there + if os.environ.get('AWS_ACCESS_KEY_ID') is None: + wrn_msg = 'AWS_ACCESS_KEY_ID is not set in envrionment variables. Set if authenticaiton required on bucket' + logging.warning(wrn_msg) + if os.environ.get('AWS_SECRET_ACCESS_KEY') is None: + wrn_msg = 'AWS_ACCESS_KEY_ID is not set in envrionment variables. Set if authenticaiton required on bucket' + logging.warning(wrn_msg) + S3_CLIENT = boto3.client( 's3', - aws_access_key_id=aws_access_key_id, - aws_secret_access_key=aws_secret_access_key, - aws_session_token=aws_session_token, region_name=region_name ) + logging.info(f'Attempting to upload to S3 bucket : {s3_bucket}') + for root, dirs, files in os.walk(src_folder): for file in files: if exclude_extensions: @@ -70,8 +66,11 @@ def push_files_in_folder_to_s3( relative_path = Path(os.path.relpath(local_path, src_folder)) s3_key = Path(os.path.join(s3_bucket_folder, relative_path).replace("\\", "/")) S3_CLIENT.upload_file(str(local_path), str(s3_bucket), str(s3_key)) - print(f"Uploaded {local_path} to s3://{s3_bucket}/{s3_key}") + logging.info(f"Uploaded {local_path} to s3://{s3_bucket}/{s3_key}") if __name__ == "__main__": + logger = logging.getLogger() + logger.setLevel(logging.INFO) + push_files_in_folder_to_s3() \ No newline at end of file From c35f6cc5c5f3d040189465fb73b08a7521b7d571 Mon Sep 17 00:00:00 2001 From: abradley60 Date: Fri, 24 Jan 2025 13:54:27 +1100 Subject: [PATCH 3/5] adding cli tool for upload to s3 --- pyproject.toml | 1 + sar_antarctica/nci/cli.py | 28 +++++++++++++++++++ .../nci/upload/push_folder_to_s3.py | 14 ---------- 3 files changed, 29 insertions(+), 14 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index 66ff531..7bded4d 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -26,6 +26,7 @@ find-scene = "sar_antarctica.nci.cli:find_scene_file" find-orbits = "sar_antarctica.nci.cli:find_orbits_for_scene" run-pyrosar-gamma-workflow = "sar_antarctica.nci.cli:run_pyrosar_gamma_workflow" submit-pyrosar-gamma-workflow = "sar_antarctica.nci.cli:submit_pyrosar_gamma_workflow" +push-folder-to-s3 = "sar_antarctica.nci.cli:push_folder_to_s3" [tool.pytest.ini_options] testpaths = ["tests/*"] diff --git a/sar_antarctica/nci/cli.py b/sar_antarctica/nci/cli.py index ef344a5..43585fc 100644 --- a/sar_antarctica/nci/cli.py +++ b/sar_antarctica/nci/cli.py @@ -1,6 +1,7 @@ import click from pathlib import Path import tomli +import logging from sar_antarctica.nci.filesystem import get_orbits_nci from sar_antarctica.nci.submission.pyrosar_gamma.prepare_input import ( @@ -18,7 +19,9 @@ run_pyrosar_gamma_geocode, ) from sar_antarctica.nci.submission.pyrosar_gamma.submit_job import submit_job +from sar_antarctica.nci.upload.push_folder_to_s3 import push_files_in_folder_to_s3 +logging.basicConfig(level=logging.INFO) @click.command() @click.argument("scene_name", type=str) @@ -172,3 +175,28 @@ def find_orbits_for_scene(scene: str): ) for orbit in relevant_res_paths: print(orbit["orbit"]) + + +@click.command() +@click.argument('src_folder', type=click.Path(exists=True, file_okay=False)) +@click.argument('s3_bucket') +@click.argument('s3_bucket_folder') +@click.option('--exclude-extensions', '-e', multiple=True, help="File extensions to exclude, e.g., '.txt', '.log'") +@click.option('--exclude-files', '-f', multiple=True, help="Specific files to exclude, e.g., 'config.json'") +@click.option('--region-name', default='ap-southeast-2', show_default=True, help="AWS region name") +def push_folder_to_s3( + src_folder : str, + s3_bucket : str, + s3_bucket_folder : str, + exclude_extensions : list[str] = [], + exclude_files : list[str] = [], + region_name : str = 'ap-southeast-2', +): + push_files_in_folder_to_s3( + src_folder = src_folder, + s3_bucket = s3_bucket, + s3_bucket_folder = s3_bucket_folder, + exclude_extensions = exclude_extensions, + exclude_files = exclude_files, + region_name = region_name, + ) \ No newline at end of file diff --git a/sar_antarctica/nci/upload/push_folder_to_s3.py b/sar_antarctica/nci/upload/push_folder_to_s3.py index 348f55f..aca637c 100644 --- a/sar_antarctica/nci/upload/push_folder_to_s3.py +++ b/sar_antarctica/nci/upload/push_folder_to_s3.py @@ -4,13 +4,6 @@ import click import logging -@click.command() -@click.argument('src_folder', type=click.Path(exists=True, file_okay=False)) -@click.argument('s3_bucket') -@click.argument('s3_bucket_folder') -@click.option('--exclude-extensions', '-e', multiple=True, help="File extensions to exclude, e.g., '.txt', '.log'") -@click.option('--exclude-files', '-f', multiple=True, help="Specific files to exclude, e.g., 'config.json'") -@click.option('--region-name', default='ap-southeast-2', show_default=True, help="AWS region name") def push_files_in_folder_to_s3( src_folder : str, s3_bucket : str, @@ -67,10 +60,3 @@ def push_files_in_folder_to_s3( s3_key = Path(os.path.join(s3_bucket_folder, relative_path).replace("\\", "/")) S3_CLIENT.upload_file(str(local_path), str(s3_bucket), str(s3_key)) logging.info(f"Uploaded {local_path} to s3://{s3_bucket}/{s3_key}") - -if __name__ == "__main__": - - logger = logging.getLogger() - logger.setLevel(logging.INFO) - - push_files_in_folder_to_s3() \ No newline at end of file From 315072e2949c567321f798cb545f411375f2f5b6 Mon Sep 17 00:00:00 2001 From: abradley60 Date: Fri, 24 Jan 2025 14:56:28 +1100 Subject: [PATCH 4/5] adding boto dependancies --- environment.yml | 22 +++++++++++++--------- 1 file changed, 13 insertions(+), 9 deletions(-) diff --git a/environment.yml b/environment.yml index 6a56ade..9d90e8f 100644 --- a/environment.yml +++ b/environment.yml @@ -188,12 +188,16 @@ dependencies: - zstandard=0.23.0 - zstd=1.5.6 - pip: - - exceptiongroup==1.2.2 - - geopandas==1.0.1 - - iniconfig==2.0.0 - - pandas==2.2.3 - - pluggy==1.5.0 - - pyogrio==0.10.0 - - pyproj==3.7.0 - - pytest==8.3.4 - - tzdata==2024.2 + - boto3==1.36.4 + - botocore==1.36.4 + - exceptiongroup==1.2.2 + - geopandas==1.0.1 + - iniconfig==2.0.0 + - jmespath==1.0.1 + - pandas==2.2.3 + - pluggy==1.5.0 + - pyogrio==0.10.0 + - pyproj==3.7.0 + - pytest==8.3.4 + - s3transfer==0.11.1 + - tzdata==2024.2 From 01967061f1ee4c0156d1195dad89f05fbe9efdde Mon Sep 17 00:00:00 2001 From: abradley60 Date: Fri, 24 Jan 2025 15:39:09 +1100 Subject: [PATCH 5/5] adding upload folder flag and changing name for clarity --- pyproject.toml | 2 +- sar_antarctica/nci/cli.py | 8 +++++++- sar_antarctica/nci/upload/push_folder_to_s3.py | 12 +++++++++++- 3 files changed, 19 insertions(+), 3 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index 7bded4d..128465a 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -26,7 +26,7 @@ find-scene = "sar_antarctica.nci.cli:find_scene_file" find-orbits = "sar_antarctica.nci.cli:find_orbits_for_scene" run-pyrosar-gamma-workflow = "sar_antarctica.nci.cli:run_pyrosar_gamma_workflow" submit-pyrosar-gamma-workflow = "sar_antarctica.nci.cli:submit_pyrosar_gamma_workflow" -push-folder-to-s3 = "sar_antarctica.nci.cli:push_folder_to_s3" +upload-files-in-folder-to-s3 = "sar_antarctica.nci.cli:upload_files_in_folder_to_s3" [tool.pytest.ini_options] testpaths = ["tests/*"] diff --git a/sar_antarctica/nci/cli.py b/sar_antarctica/nci/cli.py index 43585fc..40ad304 100644 --- a/sar_antarctica/nci/cli.py +++ b/sar_antarctica/nci/cli.py @@ -181,13 +181,18 @@ def find_orbits_for_scene(scene: str): @click.argument('src_folder', type=click.Path(exists=True, file_okay=False)) @click.argument('s3_bucket') @click.argument('s3_bucket_folder') +@click.option('--upload-folder', + default=False, + is_flag=True, + help="Upload the whole folder to specified s3_bucket_folder.") @click.option('--exclude-extensions', '-e', multiple=True, help="File extensions to exclude, e.g., '.txt', '.log'") @click.option('--exclude-files', '-f', multiple=True, help="Specific files to exclude, e.g., 'config.json'") @click.option('--region-name', default='ap-southeast-2', show_default=True, help="AWS region name") -def push_folder_to_s3( +def upload_files_in_folder_to_s3( src_folder : str, s3_bucket : str, s3_bucket_folder : str, + upload_folder : bool, exclude_extensions : list[str] = [], exclude_files : list[str] = [], region_name : str = 'ap-southeast-2', @@ -196,6 +201,7 @@ def push_folder_to_s3( src_folder = src_folder, s3_bucket = s3_bucket, s3_bucket_folder = s3_bucket_folder, + upload_folder = upload_folder, exclude_extensions = exclude_extensions, exclude_files = exclude_files, region_name = region_name, diff --git a/sar_antarctica/nci/upload/push_folder_to_s3.py b/sar_antarctica/nci/upload/push_folder_to_s3.py index aca637c..0efe19b 100644 --- a/sar_antarctica/nci/upload/push_folder_to_s3.py +++ b/sar_antarctica/nci/upload/push_folder_to_s3.py @@ -8,6 +8,7 @@ def push_files_in_folder_to_s3( src_folder : str, s3_bucket : str, s3_bucket_folder : str, + upload_folder : bool = False, exclude_extensions : list[str] = [], exclude_files : list[str] = [], region_name : str = 'ap-southeast-2', @@ -23,6 +24,11 @@ def push_files_in_folder_to_s3( S3 bucket to push to s3_bucket_folder : str Folder within bucket to push to + upload_folder : bool + upload the entire folder to the s3_bucket_folder. + If; src_folder = my/local_folder/ & s3_bucket_folder = s3/s3_folder + when True, all files uploaded to -> s3/s3_folder/local_folder/... + when False, all files uploaded to -> s3/s3_folder/... exclude_extensions : list[str], optional List of file extensions to exclude, by default [] exclude_files : list[str], optional @@ -57,6 +63,10 @@ def push_files_in_folder_to_s3( continue local_path = Path(root) / Path(file) relative_path = Path(os.path.relpath(local_path, src_folder)) - s3_key = Path(os.path.join(s3_bucket_folder, relative_path).replace("\\", "/")) + if not upload_folder: + s3_key = Path(os.path.join(s3_bucket_folder, relative_path).replace("\\", "/")) + else: + folder = Path(src_folder).name + s3_key = Path(os.path.join(s3_bucket_folder, folder, relative_path).replace("\\", "/")) S3_CLIENT.upload_file(str(local_path), str(s3_bucket), str(s3_key)) logging.info(f"Uploaded {local_path} to s3://{s3_bucket}/{s3_key}")