Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Push to S3 #17

Merged
merged 5 commits into from
Jan 31, 2025
Merged
Show file tree
Hide file tree
Changes from 3 commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@ find-scene = "sar_antarctica.nci.cli:find_scene_file"
find-orbits = "sar_antarctica.nci.cli:find_orbits_for_scene"
run-pyrosar-gamma-workflow = "sar_antarctica.nci.cli:run_pyrosar_gamma_workflow"
submit-pyrosar-gamma-workflow = "sar_antarctica.nci.cli:submit_pyrosar_gamma_workflow"
push-folder-to-s3 = "sar_antarctica.nci.cli:push_folder_to_s3"

[tool.pytest.ini_options]
testpaths = ["tests/*"]
Expand Down
28 changes: 28 additions & 0 deletions sar_antarctica/nci/cli.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
import click
from pathlib import Path
import tomli
import logging

from sar_antarctica.nci.filesystem import get_orbits_nci
from sar_antarctica.nci.submission.pyrosar_gamma.prepare_input import (
Expand All @@ -18,7 +19,9 @@
run_pyrosar_gamma_geocode,
)
from sar_antarctica.nci.submission.pyrosar_gamma.submit_job import submit_job
from sar_antarctica.nci.upload.push_folder_to_s3 import push_files_in_folder_to_s3

logging.basicConfig(level=logging.INFO)

@click.command()
@click.argument("scene_name", type=str)
Expand Down Expand Up @@ -172,3 +175,28 @@ def find_orbits_for_scene(scene: str):
)
for orbit in relevant_res_paths:
print(orbit["orbit"])


@click.command()
@click.argument('src_folder', type=click.Path(exists=True, file_okay=False))
@click.argument('s3_bucket')
@click.argument('s3_bucket_folder')
@click.option('--exclude-extensions', '-e', multiple=True, help="File extensions to exclude, e.g., '.txt', '.log'")
@click.option('--exclude-files', '-f', multiple=True, help="Specific files to exclude, e.g., 'config.json'")
@click.option('--region-name', default='ap-southeast-2', show_default=True, help="AWS region name")
def push_folder_to_s3(
src_folder : str,
s3_bucket : str,
s3_bucket_folder : str,
exclude_extensions : list[str] = [],
exclude_files : list[str] = [],
region_name : str = 'ap-southeast-2',
):
push_files_in_folder_to_s3(
src_folder = src_folder,
s3_bucket = s3_bucket,
s3_bucket_folder = s3_bucket_folder,
exclude_extensions = exclude_extensions,
exclude_files = exclude_files,
region_name = region_name,
)
62 changes: 62 additions & 0 deletions sar_antarctica/nci/upload/push_folder_to_s3.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,62 @@
import boto3
from pathlib import Path
import os
import click
import logging

def push_files_in_folder_to_s3(
src_folder : str,
s3_bucket : str,
s3_bucket_folder : str,
exclude_extensions : list[str] = [],
exclude_files : list[str] = [],
region_name : str = 'ap-southeast-2',
):
"""Upload the files in a local folder to an S3 bucket. The subfolder
structure in the specified folder is maintained in s3.

Parameters
----------
src_folder : str
Source folder containing files of interest
s3_bucket : str
S3 bucket to push to
s3_bucket_folder : str
Folder within bucket to push to
exclude_extensions : list[str], optional
List of file extensions to exclude, by default []
exclude_files : list[str], optional
List of files to exclude, by default []
region_name : str, optional
_description_, by default 'ap-southeast-2'
"""

# search for credentials in envrionment and raise warning if not there
if os.environ.get('AWS_ACCESS_KEY_ID') is None:
wrn_msg = 'AWS_ACCESS_KEY_ID is not set in envrionment variables. Set if authenticaiton required on bucket'
logging.warning(wrn_msg)
if os.environ.get('AWS_SECRET_ACCESS_KEY') is None:
wrn_msg = 'AWS_ACCESS_KEY_ID is not set in envrionment variables. Set if authenticaiton required on bucket'
logging.warning(wrn_msg)


S3_CLIENT = boto3.client(
's3',
region_name=region_name
)

logging.info(f'Attempting to upload to S3 bucket : {s3_bucket}')

for root, dirs, files in os.walk(src_folder):
for file in files:
if exclude_extensions:
filename, file_extension = os.path.splitext(file)
if file_extension in exclude_extensions:
continue
if file in exclude_files:
continue
local_path = Path(root) / Path(file)
relative_path = Path(os.path.relpath(local_path, src_folder))
s3_key = Path(os.path.join(s3_bucket_folder, relative_path).replace("\\", "/"))
S3_CLIENT.upload_file(str(local_path), str(s3_bucket), str(s3_key))
logging.info(f"Uploaded {local_path} to s3://{s3_bucket}/{s3_key}")
Loading