diff --git a/environment.yml b/environment.yml index 6a56ade..9d90e8f 100644 --- a/environment.yml +++ b/environment.yml @@ -188,12 +188,16 @@ dependencies: - zstandard=0.23.0 - zstd=1.5.6 - pip: - - exceptiongroup==1.2.2 - - geopandas==1.0.1 - - iniconfig==2.0.0 - - pandas==2.2.3 - - pluggy==1.5.0 - - pyogrio==0.10.0 - - pyproj==3.7.0 - - pytest==8.3.4 - - tzdata==2024.2 + - boto3==1.36.4 + - botocore==1.36.4 + - exceptiongroup==1.2.2 + - geopandas==1.0.1 + - iniconfig==2.0.0 + - jmespath==1.0.1 + - pandas==2.2.3 + - pluggy==1.5.0 + - pyogrio==0.10.0 + - pyproj==3.7.0 + - pytest==8.3.4 + - s3transfer==0.11.1 + - tzdata==2024.2 diff --git a/pyproject.toml b/pyproject.toml index 66ff531..128465a 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -26,6 +26,7 @@ find-scene = "sar_antarctica.nci.cli:find_scene_file" find-orbits = "sar_antarctica.nci.cli:find_orbits_for_scene" run-pyrosar-gamma-workflow = "sar_antarctica.nci.cli:run_pyrosar_gamma_workflow" submit-pyrosar-gamma-workflow = "sar_antarctica.nci.cli:submit_pyrosar_gamma_workflow" +upload-files-in-folder-to-s3 = "sar_antarctica.nci.cli:upload_files_in_folder_to_s3" [tool.pytest.ini_options] testpaths = ["tests/*"] diff --git a/sar_antarctica/nci/cli.py b/sar_antarctica/nci/cli.py index ef344a5..40ad304 100644 --- a/sar_antarctica/nci/cli.py +++ b/sar_antarctica/nci/cli.py @@ -1,6 +1,7 @@ import click from pathlib import Path import tomli +import logging from sar_antarctica.nci.filesystem import get_orbits_nci from sar_antarctica.nci.submission.pyrosar_gamma.prepare_input import ( @@ -18,7 +19,9 @@ run_pyrosar_gamma_geocode, ) from sar_antarctica.nci.submission.pyrosar_gamma.submit_job import submit_job +from sar_antarctica.nci.upload.push_folder_to_s3 import push_files_in_folder_to_s3 +logging.basicConfig(level=logging.INFO) @click.command() @click.argument("scene_name", type=str) @@ -172,3 +175,34 @@ def find_orbits_for_scene(scene: str): ) for orbit in relevant_res_paths: print(orbit["orbit"]) + + +@click.command() +@click.argument('src_folder', type=click.Path(exists=True, file_okay=False)) +@click.argument('s3_bucket') +@click.argument('s3_bucket_folder') +@click.option('--upload-folder', + default=False, + is_flag=True, + help="Upload the whole folder to specified s3_bucket_folder.") +@click.option('--exclude-extensions', '-e', multiple=True, help="File extensions to exclude, e.g., '.txt', '.log'") +@click.option('--exclude-files', '-f', multiple=True, help="Specific files to exclude, e.g., 'config.json'") +@click.option('--region-name', default='ap-southeast-2', show_default=True, help="AWS region name") +def upload_files_in_folder_to_s3( + src_folder : str, + s3_bucket : str, + s3_bucket_folder : str, + upload_folder : bool, + exclude_extensions : list[str] = [], + exclude_files : list[str] = [], + region_name : str = 'ap-southeast-2', +): + push_files_in_folder_to_s3( + src_folder = src_folder, + s3_bucket = s3_bucket, + s3_bucket_folder = s3_bucket_folder, + upload_folder = upload_folder, + exclude_extensions = exclude_extensions, + exclude_files = exclude_files, + region_name = region_name, + ) \ No newline at end of file diff --git a/sar_antarctica/nci/upload/push_folder_to_s3.py b/sar_antarctica/nci/upload/push_folder_to_s3.py new file mode 100644 index 0000000..0efe19b --- /dev/null +++ b/sar_antarctica/nci/upload/push_folder_to_s3.py @@ -0,0 +1,72 @@ +import boto3 +from pathlib import Path +import os +import click +import logging + +def push_files_in_folder_to_s3( + src_folder : str, + s3_bucket : str, + s3_bucket_folder : str, + upload_folder : bool = False, + exclude_extensions : list[str] = [], + exclude_files : list[str] = [], + region_name : str = 'ap-southeast-2', +): + """Upload the files in a local folder to an S3 bucket. The subfolder + structure in the specified folder is maintained in s3. + + Parameters + ---------- + src_folder : str + Source folder containing files of interest + s3_bucket : str + S3 bucket to push to + s3_bucket_folder : str + Folder within bucket to push to + upload_folder : bool + upload the entire folder to the s3_bucket_folder. + If; src_folder = my/local_folder/ & s3_bucket_folder = s3/s3_folder + when True, all files uploaded to -> s3/s3_folder/local_folder/... + when False, all files uploaded to -> s3/s3_folder/... + exclude_extensions : list[str], optional + List of file extensions to exclude, by default [] + exclude_files : list[str], optional + List of files to exclude, by default [] + region_name : str, optional + _description_, by default 'ap-southeast-2' + """ + + # search for credentials in envrionment and raise warning if not there + if os.environ.get('AWS_ACCESS_KEY_ID') is None: + wrn_msg = 'AWS_ACCESS_KEY_ID is not set in envrionment variables. Set if authenticaiton required on bucket' + logging.warning(wrn_msg) + if os.environ.get('AWS_SECRET_ACCESS_KEY') is None: + wrn_msg = 'AWS_ACCESS_KEY_ID is not set in envrionment variables. Set if authenticaiton required on bucket' + logging.warning(wrn_msg) + + + S3_CLIENT = boto3.client( + 's3', + region_name=region_name + ) + + logging.info(f'Attempting to upload to S3 bucket : {s3_bucket}') + + for root, dirs, files in os.walk(src_folder): + for file in files: + if exclude_extensions: + filename, file_extension = os.path.splitext(file) + if file_extension in exclude_extensions: + continue + if file in exclude_files: + continue + local_path = Path(root) / Path(file) + relative_path = Path(os.path.relpath(local_path, src_folder)) + if not upload_folder: + s3_key = Path(os.path.join(s3_bucket_folder, relative_path).replace("\\", "/")) + else: + folder = Path(src_folder).name + s3_key = Path(os.path.join(s3_bucket_folder, folder, relative_path).replace("\\", "/")) + S3_CLIENT.upload_file(str(local_path), str(s3_bucket), str(s3_key)) + logging.info(f"Uploaded {local_path} to s3://{s3_bucket}/{s3_key}")