From 0ec4eecd1c5f06faf7feb5d748774d8c72c26a19 Mon Sep 17 00:00:00 2001 From: rxu17 <26471741+rxu17@users.noreply.github.com> Date: Fri, 5 May 2023 10:24:58 -0700 Subject: [PATCH] [ETL-374] Fix failing test for setup_external_storage (#46) * add command line arg for pytest for integration test, update readme --------- Co-authored-by: Rixing Xu --- tests/Dockerfile | 2 +- tests/README.md | 17 ++++++++- tests/conftest.py | 15 ++++++++ tests/test_setup_external_storage.py | 55 ++++++++-------------------- 4 files changed, 47 insertions(+), 42 deletions(-) diff --git a/tests/Dockerfile b/tests/Dockerfile index e31fbd2e..1551e479 100644 --- a/tests/Dockerfile +++ b/tests/Dockerfile @@ -1,4 +1,4 @@ FROM amazon/aws-glue-libs:glue_libs_4.0.0_image_01 -RUN pip3 install synapseclient~=2.7 pyarrow~=11.0 pytest-datadir +RUN pip3 install moto~=4.1 synapseclient~=2.7 pyarrow~=11.0 datacompy~=0.8 pytest-datadir ENTRYPOINT ["bash", "-l"] diff --git a/tests/README.md b/tests/README.md index edf3fa49..66bdf08b 100644 --- a/tests/README.md +++ b/tests/README.md @@ -56,10 +56,23 @@ pytest with other tests because they have to be run in a Dockerfile: - test_s3_to_glue_lambda.py - test_setup_external_storage.py -Example) + +#### Running tests for lambda Run the following command from the repo root to run tests for the lambda function (in develop). -You can run this locally or inside the docker image. ```shell script python3 -m pytest tests/test_s3_to_glue_lambda.py -v ``` + +#### Running tests for setup external storage +Run the following command from the repo root to run the integration test for the setup external storage script to check that the STS +access has been set for a given synapse folder (in develop). + +This test takes in two command line arguments: + +- test-synapse-folder-id - synapse id of the folder to check STS access for +- test-ssm-parameter - ssm parameter to get AWS credentials for otherwise leave blank and it will pull credentials from the environment + +```shell script +python3 -m pytest tests/test_setup_external_storage.py --test-synapse-folder-id --test-ssm-parameter +``` diff --git a/tests/conftest.py b/tests/conftest.py index faf4bd3f..05cc2bfb 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -256,3 +256,18 @@ def staging_dataset_with_empty_columns(): @pytest.fixture() def staging_dataset_empty(): return pd.DataFrame() + + +def pytest_addoption(parser): + parser.addoption( + "--test-synapse-folder-id", + action="store", + default=None, + help="ID of the synapse folder to check STS access. Required.", + ) + parser.addoption( + "--test-ssm-parameter", + action="store", + default=None, + help="The SSM parameter to use to check STS access. Optional", + ) diff --git a/tests/test_setup_external_storage.py b/tests/test_setup_external_storage.py index f134e721..1bc8b331 100644 --- a/tests/test_setup_external_storage.py +++ b/tests/test_setup_external_storage.py @@ -1,52 +1,29 @@ import boto3 import pytest import synapseclient -from pyarrow import fs, parquet +from pyarrow import fs +from src.scripts.setup_external_storage import setup_external_storage -@pytest.fixture -def test_parquet_folder(): - """TODO: Replace with production parquet folder synapse ID - or save as external environment variable""" - parquet_folder = "syn51079888" - return parquet_folder +@pytest.fixture() +def test_synapse_folder_id(pytestconfig): + yield pytestconfig.getoption("test_synapse_folder_id") -@pytest.fixture -def test_synapse_client(): - """Returns a synapse client from credentials stored in SSM""" - aws_session = boto3.session.Session(profile_name="default", region_name="us-east-1") - ssm_parameter = "synapse-recover-auth" - if ssm_parameter is not None: - ssm_client = aws_session.client("ssm") - token = ssm_client.get_parameter(Name=ssm_parameter, WithDecryption=True) - test_synapse_client = synapseclient.Synapse() - test_synapse_client.login(authToken=token["Parameter"]["Value"]) - else: # try cached credentials - test_synapse_client = synapseclient.login() - return test_synapse_client +@pytest.fixture() +def test_ssm_parameter(pytestconfig): + yield pytestconfig.getoption("test_ssm_parameter") -def test_setup_external_storage_success(test_parquet_folder, test_synapse_client): + +@pytest.mark.integration() +def test_setup_external_storage_success(test_synapse_folder_id, test_ssm_parameter): """This test tests that it can get the STS token credentials and view and list the - parquet files in the S3 bucket location to verify that it has access""" + files in the S3 bucket location to verify that it has access""" + test_synapse_client = setup_external_storage.get_synapse_client( + ssm_parameter=test_ssm_parameter, aws_session=boto3 + ) # Get STS credentials token = test_synapse_client.get_sts_storage_token( - entity=test_parquet_folder, permission="read_only", output_format="json" - ) - - # Pass STS credentials to Arrow filesystem interface - s3 = fs.S3FileSystem( - access_key=token["accessKeyId"], - secret_key=token["secretAccessKey"], - session_token=token["sessionToken"], - region="us-east-1", + entity=test_synapse_folder_id, permission="read_only", output_format="json" ) - - # get file info - base_s3_uri = "{}/{}".format(token["bucket"], token["baseKey"]) - parquet_datasets = s3.get_file_info(fs.FileSelector(base_s3_uri, recursive=False)) - - # list objects in bucket, if permissions exist, would work - conn = boto3.client("s3") # again assumes boto.cfg setup, assume AWS S3 - conn.list_objects(Bucket=token["bucket"])["Contents"]