Skip to content

Commit

Permalink
feat(exporter): add a safe_upload_single function
Browse files Browse the repository at this point in the history
This adds a file size delta function to prevent the unintended
overwriting of a file with a significantly smaller one.

It only uses it for the unified all.zip upload at present, as I'm not
sure how to identify when to use it for the ecosystem-specific zip files
as they seem to be bundled in with the individual record files.
  • Loading branch information
andrewpollock committed Feb 28, 2025
1 parent 93556bb commit 173276e
Show file tree
Hide file tree
Showing 2 changed files with 42 additions and 2 deletions.
4 changes: 2 additions & 2 deletions gcp/workers/exporter/export_runner.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@

from google.cloud import ndb, storage

from exporter import upload_single
from exporter import safe_upload_single
import osv
import osv.logs

Expand Down Expand Up @@ -103,7 +103,7 @@ def aggregate_all_vulnerabilities(work_dir: str, export_bucket: str):

storage_client = storage.Client()
bucket = storage_client.get_bucket(export_bucket)
upload_single(bucket, output_zip, zip_file_name)
safe_upload_single(bucket, output_zip, zip_file_name)
logging.info('Unified all.zip uploaded successfully.')


Expand Down
40 changes: 40 additions & 0 deletions gcp/workers/exporter/exporter.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,10 @@
ECOSYSTEMS_FILE = 'ecosystems.txt'


class Error(Exception):
"""Base exception class."""


class Exporter:
"""Exporter."""

Expand Down Expand Up @@ -130,6 +134,7 @@ def _export_to_file_and_zipfile(bug: osv.Bug):
with concurrent.futures.ThreadPoolExecutor(
max_workers=_EXPORT_WORKERS) as executor:
# Note: the individual ecosystem all.zip is included here
# TODO: use safe_upload_single() on the zip files.
for filename in os.listdir(ecosystem_dir):
executor.submit(upload_single, bucket,
os.path.join(ecosystem_dir, filename),
Expand All @@ -146,6 +151,41 @@ def upload_single(bucket: Bucket, source_path: str, target_path: str):
logging.exception('Failed to export: %s', e)


def safe_upload_single(bucket: Bucket,
source_path: str,
target_path: str,
safe_delta_pct: int = 10):
"""Upload a single file to a GCS bucket, with a size check.
This refuses to overwrite the GCS object if the file size difference is
greater than the permitted threshold (10% by default).
NOTE: this intentionally only catches unexpectedly smaller files, not larger
ones.
Args:
bucket: (Bucket): the GCS bucket object to upload to.
source_path: (str): the source path to the file to upload.
target_path: (str): the target path in Bucket to upload to.
safe_delta_pct: (int): the threshold at which to raise an exception.
Raises:
Error: if safe_delta_pct is exceeded
"""

source_size = os.stat(source_path).st_size
logging.info('Uploading %s', target_path)
try:
blob = bucket.blob(target_path)
if (source_size / blob.size) * 100 < safe_delta_pct:
raise (Error(
f'Cowardly refusing to overwrite {blob.name} ({blob.size} bytes) '
f'with {source_path} ({source_size} bytes)'))
blob.upload_from_filename(source_path, retry=retry.DEFAULT_RETRY)
except Exception as e:
logging.exception('Failed to export: %s', e)


def main():
parser = argparse.ArgumentParser(description='Exporter')
parser.add_argument(
Expand Down

0 comments on commit 173276e

Please sign in to comment.