Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat(exporter): add a safe_upload_single function #3199

Merged
merged 6 commits into from
Feb 28, 2025
Merged
Show file tree
Hide file tree
Changes from 3 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions gcp/workers/exporter/export_runner.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@

from google.cloud import ndb, storage

from exporter import upload_single
from exporter import safe_upload_single
import osv
import osv.logs

Expand Down Expand Up @@ -103,7 +103,7 @@ def aggregate_all_vulnerabilities(work_dir: str, export_bucket: str):

storage_client = storage.Client()
bucket = storage_client.get_bucket(export_bucket)
upload_single(bucket, output_zip, zip_file_name)
safe_upload_single(bucket, output_zip, zip_file_name)
logging.info('Unified all.zip uploaded successfully.')


Expand Down
40 changes: 40 additions & 0 deletions gcp/workers/exporter/exporter.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,10 @@
ECOSYSTEMS_FILE = 'ecosystems.txt'


class Error(Exception):
"""Base exception class."""


class Exporter:
"""Exporter."""

Expand Down Expand Up @@ -130,6 +134,7 @@ def _export_to_file_and_zipfile(bug: osv.Bug):
with concurrent.futures.ThreadPoolExecutor(
max_workers=_EXPORT_WORKERS) as executor:
# Note: the individual ecosystem all.zip is included here
# TODO: use safe_upload_single() on the zip files.
for filename in os.listdir(ecosystem_dir):
executor.submit(upload_single, bucket,
os.path.join(ecosystem_dir, filename),
Expand All @@ -146,6 +151,41 @@ def upload_single(bucket: Bucket, source_path: str, target_path: str):
logging.exception('Failed to export: %s', e)


def safe_upload_single(bucket: Bucket,
source_path: str,
target_path: str,
safe_delta_pct: int = 10):
"""Upload a single file to a GCS bucket, with a size check.

This refuses to overwrite the GCS object if the file size difference is
greater than the permitted threshold (10% by default).

NOTE: this intentionally only catches unexpectedly smaller files, not larger
ones.

Args:
bucket: (Bucket): the GCS bucket object to upload to.
source_path: (str): the source path to the file to upload.
target_path: (str): the target path in Bucket to upload to.
safe_delta_pct: (int): the threshold at which to raise an exception.

Raises:
Error: if safe_delta_pct is exceeded
"""

source_size = os.stat(source_path).st_size
logging.info('Uploading %s', target_path)
try:
blob = bucket.blob(target_path)
if (source_size / blob.size) * 100 < safe_delta_pct:
raise (Error(
f'Cowardly refusing to overwrite {blob.name} ({blob.size} bytes) '
f'with {source_path} ({source_size} bytes)'))
blob.upload_from_filename(source_path, retry=retry.DEFAULT_RETRY)
except Exception as e:
logging.exception('Failed to export: %s', e)


def main():
parser = argparse.ArgumentParser(description='Exporter')
parser.add_argument(
Expand Down
Loading