Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Push to S3 #17

Merged
merged 5 commits into from
Jan 31, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
22 changes: 13 additions & 9 deletions environment.yml
Original file line number Diff line number Diff line change
Expand Up @@ -188,12 +188,16 @@ dependencies:
- zstandard=0.23.0
- zstd=1.5.6
- pip:
- exceptiongroup==1.2.2
- geopandas==1.0.1
- iniconfig==2.0.0
- pandas==2.2.3
- pluggy==1.5.0
- pyogrio==0.10.0
- pyproj==3.7.0
- pytest==8.3.4
- tzdata==2024.2
- boto3==1.36.4
- botocore==1.36.4
- exceptiongroup==1.2.2
- geopandas==1.0.1
- iniconfig==2.0.0
- jmespath==1.0.1
- pandas==2.2.3
- pluggy==1.5.0
- pyogrio==0.10.0
- pyproj==3.7.0
- pytest==8.3.4
- s3transfer==0.11.1
- tzdata==2024.2
1 change: 1 addition & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@ find-scene = "sar_antarctica.nci.cli:find_scene_file"
find-orbits = "sar_antarctica.nci.cli:find_orbits_for_scene"
run-pyrosar-gamma-workflow = "sar_antarctica.nci.cli:run_pyrosar_gamma_workflow"
submit-pyrosar-gamma-workflow = "sar_antarctica.nci.cli:submit_pyrosar_gamma_workflow"
upload-files-in-folder-to-s3 = "sar_antarctica.nci.cli:upload_files_in_folder_to_s3"

[tool.pytest.ini_options]
testpaths = ["tests/*"]
Expand Down
34 changes: 34 additions & 0 deletions sar_antarctica/nci/cli.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
import click
from pathlib import Path
import tomli
import logging

from sar_antarctica.nci.filesystem import get_orbits_nci
from sar_antarctica.nci.submission.pyrosar_gamma.prepare_input import (
Expand All @@ -18,7 +19,9 @@
run_pyrosar_gamma_geocode,
)
from sar_antarctica.nci.submission.pyrosar_gamma.submit_job import submit_job
from sar_antarctica.nci.upload.push_folder_to_s3 import push_files_in_folder_to_s3

logging.basicConfig(level=logging.INFO)

@click.command()
@click.argument("scene_name", type=str)
Expand Down Expand Up @@ -172,3 +175,34 @@ def find_orbits_for_scene(scene: str):
)
for orbit in relevant_res_paths:
print(orbit["orbit"])


@click.command()
@click.argument('src_folder', type=click.Path(exists=True, file_okay=False))
@click.argument('s3_bucket')
@click.argument('s3_bucket_folder')
@click.option('--upload-folder',
default=False,
is_flag=True,
help="Upload the whole folder to specified s3_bucket_folder.")
@click.option('--exclude-extensions', '-e', multiple=True, help="File extensions to exclude, e.g., '.txt', '.log'")
@click.option('--exclude-files', '-f', multiple=True, help="Specific files to exclude, e.g., 'config.json'")
@click.option('--region-name', default='ap-southeast-2', show_default=True, help="AWS region name")
def upload_files_in_folder_to_s3(
src_folder : str,
s3_bucket : str,
s3_bucket_folder : str,
upload_folder : bool,
exclude_extensions : list[str] = [],
exclude_files : list[str] = [],
region_name : str = 'ap-southeast-2',
):
push_files_in_folder_to_s3(
src_folder = src_folder,
s3_bucket = s3_bucket,
s3_bucket_folder = s3_bucket_folder,
upload_folder = upload_folder,
exclude_extensions = exclude_extensions,
exclude_files = exclude_files,
region_name = region_name,
)
72 changes: 72 additions & 0 deletions sar_antarctica/nci/upload/push_folder_to_s3.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,72 @@
import boto3
from pathlib import Path
import os
import click
import logging

def push_files_in_folder_to_s3(
src_folder : str,
s3_bucket : str,
s3_bucket_folder : str,
upload_folder : bool = False,
exclude_extensions : list[str] = [],
exclude_files : list[str] = [],
region_name : str = 'ap-southeast-2',
):
"""Upload the files in a local folder to an S3 bucket. The subfolder
structure in the specified folder is maintained in s3.

Parameters
----------
src_folder : str
Source folder containing files of interest
s3_bucket : str
S3 bucket to push to
s3_bucket_folder : str
Folder within bucket to push to
upload_folder : bool
upload the entire folder to the s3_bucket_folder.
If; src_folder = my/local_folder/ & s3_bucket_folder = s3/s3_folder
when True, all files uploaded to -> s3/s3_folder/local_folder/...
when False, all files uploaded to -> s3/s3_folder/...
exclude_extensions : list[str], optional
List of file extensions to exclude, by default []
exclude_files : list[str], optional
List of files to exclude, by default []
region_name : str, optional
_description_, by default 'ap-southeast-2'
"""

# search for credentials in envrionment and raise warning if not there
if os.environ.get('AWS_ACCESS_KEY_ID') is None:
wrn_msg = 'AWS_ACCESS_KEY_ID is not set in envrionment variables. Set if authenticaiton required on bucket'
logging.warning(wrn_msg)
if os.environ.get('AWS_SECRET_ACCESS_KEY') is None:
wrn_msg = 'AWS_ACCESS_KEY_ID is not set in envrionment variables. Set if authenticaiton required on bucket'
logging.warning(wrn_msg)


S3_CLIENT = boto3.client(
's3',
region_name=region_name
)

logging.info(f'Attempting to upload to S3 bucket : {s3_bucket}')

for root, dirs, files in os.walk(src_folder):
for file in files:
if exclude_extensions:
filename, file_extension = os.path.splitext(file)
if file_extension in exclude_extensions:
continue
if file in exclude_files:
continue
local_path = Path(root) / Path(file)
relative_path = Path(os.path.relpath(local_path, src_folder))
if not upload_folder:
s3_key = Path(os.path.join(s3_bucket_folder, relative_path).replace("\\", "/"))
else:
folder = Path(src_folder).name
s3_key = Path(os.path.join(s3_bucket_folder, folder, relative_path).replace("\\", "/"))
S3_CLIENT.upload_file(str(local_path), str(s3_bucket), str(s3_key))
logging.info(f"Uploaded {local_path} to s3://{s3_bucket}/{s3_key}")
Loading