Skip to content

Commit

Permalink
Add filter_object_info function to dispatch Lambda
Browse files Browse the repository at this point in the history
  • Loading branch information
philerooski committed May 17, 2024
1 parent 43d6706 commit 22193c4
Show file tree
Hide file tree
Showing 2 changed files with 104 additions and 2 deletions.
53 changes: 51 additions & 2 deletions src/lambda_function/dispatch/app.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,13 +9,57 @@
import logging
import os
import zipfile
from typing import Optional # use | for type hints in 3.10+
from urllib import parse

import boto3

logger = logging.getLogger()
logger.setLevel(logging.INFO)

def filter_object_info(object_info: dict) -> Optional[dict]:
"""
Filter out objects that should not be processed.
Returns None for:
- Records containing owner.txt
- Records that don't contain a specific object key like <path_to_file>/<file_name>
- Records that are missing the `Key` field.
- Records that are missing the `Bucket` field.
Args:
object_info (dict): Object information from source S3 bucket
as formatted by `get_object_info`.
Returns:
dict: `object_info` if it passes the filter criteria (i.e., acts as
identity function) otherwise returns None.
"""
if not object_info["Key"]:
logger.info(
"This object_info record doesn't contain a source key "
f"and can't be processed.\nMessage: {object_info}",
)
return None
elif not object_info["Bucket"]:
logger.info(
"This object_info record doesn't contain a source bucket "
f"and can't be processed.\nMessage: {object_info}",
)
return None
elif "owner.txt" in object_info["Key"]:
logger.info(
f"This object_info record is an owner.txt and can't be processed.\nMessage: {object_info}"
)
return None
elif object_info["Key"].endswith("/"):
logger.info(
f"This object_info record is a directory and can't be processed.\nMessage: {object_info}"
)
return None
return object_info

def get_object_info(s3_event: dict) -> dict:
"""
Derive object info from an S3 event.
Expand Down Expand Up @@ -124,8 +168,13 @@ def main(
sns_notification = json.loads(sqs_record["body"])
sns_message = json.loads(sns_notification["Message"])
logger.info(f"Received SNS message: {sns_message}")
for s3_event in sns_message["Records"]:
object_info = get_object_info(s3_event)
all_object_info_list = map(get_object_info, sns_message["Records"])
valid_object_info_list = [
object_info
for object_info in all_object_info_list
if filter_object_info(object_info) is not None
]
for object_info in valid_object_info_list:
s3_client.download_file(Filename=temp_zip_path, **object_info)
logger.info(f"Getting archive contents for {object_info}")
archive_contents = get_archive_contents(
Expand Down
53 changes: 53 additions & 0 deletions tests/test_lambda_dispatch.py
Original file line number Diff line number Diff line change
Expand Up @@ -135,6 +135,59 @@ def test_get_object_info_unicode_characters_in_key(s3_event):
assert object_info["Key"] == \
"main/2023-09-26T00:06:39Z_d873eafb-554f-4f8a-9e61-cdbcb7de07eb"

@pytest.mark.parametrize(
"object_info,expected",
[
(
{
"Bucket": "recover-dev-input-data",
"Key": "main/2023-01-12T22--02--17Z_77fefff8-b0e2-4c1b-b0c5-405554c92368",
},
{
"Bucket": "recover-dev-input-data",
"Key": "main/2023-01-12T22--02--17Z_77fefff8-b0e2-4c1b-b0c5-405554c92368",
},
),
(
{
"Bucket": "recover-dev-input-data",
"Key": "main/v1/owner.txt",
},
None,
),
(
{
"Bucket": "recover-dev-input-data",
"Key": "main/adults_v2/",
},
None,
),
(
{
"Bucket": "recover-dev-input-data",
"Key": None,
},
None,
),
(
{
"Bucket": None,
"Key": "main/2023-01-12T22--02--17Z_77fefff8-b0e2-4c1b-b0c5-405554c92368",
},
None,
),
],
ids=[
"correct_msg_format",
"owner_txt",
"directory",
"missing_key",
"missing_bucket",
],
)
def test_that_filter_object_info_returns_expected_result(object_info, expected):
assert app.filter_object_info(object_info) == expected

def test_get_archive_contents(archive_path, archive_json_paths):
dummy_bucket = "dummy_bucket"
dummy_key = "dummy_key"
Expand Down

0 comments on commit 22193c4

Please sign in to comment.