diff --git a/.github/workflows/upload-and-deploy.yaml b/.github/workflows/upload-and-deploy.yaml index 48b9e0e3..dc5f076c 100755 --- a/.github/workflows/upload-and-deploy.yaml +++ b/.github/workflows/upload-and-deploy.yaml @@ -23,243 +23,243 @@ jobs: - uses: pre-commit/action@v3.0.0 - upload-files: - name: Upload files to S3 bucket in development - runs-on: ubuntu-latest - needs: pre-commit - # These permissions are needed to interact with GitHub's OIDC Token endpoint. - permissions: - id-token: write - contents: read - environment: develop - steps: - - - name: Setup code, pipenv, aws - uses: Sage-Bionetworks/action-pipenv-aws-setup@v3 - with: - role_to_assume: ${{ vars.AWS_CREDENTIALS_IAM_ROLE }} - role_session_name: GitHubActions-${{ github.repository_owner }}-${{ github.event.repository.name }}-${{ github.run_id }} - python_version: ${{ env.PYTHON_VERSION }} - - - name: Setup sam - uses: aws-actions/setup-sam@v2 - - - name: Set namespace for non-default branch - if: github.ref_name != 'main' - run: echo "NAMESPACE=$GITHUB_REF_NAME" >> $GITHUB_ENV - - - name: Copy files to templates bucket, use dev cloudformation bucket - run: > - python src/scripts/manage_artifacts/artifacts.py - --upload - --namespace $NAMESPACE - --cfn_bucket ${{ vars.CFN_BUCKET }} - - nonglue-unit-tests: - name: Runs unit tests that are not dependent on aws-glue package resources - runs-on: ubuntu-latest - needs: pre-commit - # These permissions are needed to interact with GitHub's OIDC Token endpoint. - permissions: - id-token: write - contents: read - environment: develop - steps: - - name: Setup code, pipenv, aws - uses: Sage-Bionetworks/action-pipenv-aws-setup@v3 - with: - role_to_assume: ${{ vars.AWS_CREDENTIALS_IAM_ROLE }} - role_session_name: ${{ github.event.repository.name }}-${{ github.run_id }}-nonglue-unit-tests - python_version: ${{ env.PYTHON_VERSION }} - - - name: Install additional python dependency - run: | - pipenv install ecs_logging~=2.0 - - - name: Test lambda scripts with pytest - run: | - pipenv run python -m pytest tests/test_s3_event_config_lambda.py -v - pipenv run python -m pytest tests/test_s3_to_glue_lambda.py -v - - - name: Test dev synapse folders for STS access with pytest - run: > - pipenv run python -m pytest tests/test_setup_external_storage.py - --test-bucket $DEV_INPUT_BUCKET - --test-synapse-folder-id syn51758510 - --namespace $NAMESPACE - --test-sts-permission read_only - -v - - pipenv run python -m pytest tests/test_setup_external_storage.py - --test-bucket $DEV_PROCESSED_BUCKET - --test-synapse-folder-id syn51084525 - --namespace $NAMESPACE/parquet - --test-sts-permission read_write - -v - - - pytest-docker: - name: Build and push testing docker images to the pytest ECR repository. - needs: pre-commit - runs-on: ubuntu-latest - # These permissions are needed to interact with GitHub's OIDC Token endpoint. - permissions: - id-token: write - contents: read - strategy: - matrix: - include: - - tag_name: aws_glue_3 - dockerfile: tests/Dockerfile.aws_glue_3 - - tag_name: aws_glue_4 - dockerfile: tests/Dockerfile.aws_glue_4 - environment: develop - steps: - - name: Assume AWS role - uses: aws-actions/configure-aws-credentials@v2 - with: - role-to-assume: ${{ vars.AWS_CREDENTIALS_IAM_ROLE }} - aws-region: "us-east-1" - # unmasking of the AWS account ID allows the acct id to pass through outputs - mask-aws-account-id: "no" - - - name: Login to Amazon ECR - id: login-ecr - uses: aws-actions/amazon-ecr-login@v1 - - - name: Get ECR secret names - id: ecr - run: | - usernameKey=docker_username_$(echo ${{ steps.login-ecr.outputs.registry }} | tr '.-' _) - echo "username-key=$usernameKey" >> $GITHUB_OUTPUT - passwordKey=docker_password_$(echo ${{ steps.login-ecr.outputs.registry }} | tr '.-' _) - echo "password-key=$passwordKey" >> $GITHUB_OUTPUT - - uses: actions/checkout@v3 - - - name: Set up Docker Buildx - uses: docker/setup-buildx-action@v2 - - - name: Build and push to ECR - id: docker-build-push - uses: docker/build-push-action@v4 - with: - push: true - tags: ${{ steps.login-ecr.outputs.registry }}/pytest:${{ github.ref_name }}_${{ matrix.tag_name }} - file: ${{ matrix.dockerfile }} - cache-from: type=local,src=/tmp/.buildx-cache - cache-to: type=local,dest=/tmp/.buildx-cache - outputs: - ecr-registry: ${{ steps.login-ecr.outputs.registry }} - ecr-username: ${{ steps.login-ecr.outputs[steps.ecr.outputs.username-key] }} - ecr-password: ${{ steps.login-ecr.outputs[steps.ecr.outputs.password-key] }} - - - glue-unit-tests: - name: Run Pytest unit tests for AWS glue - needs: pytest-docker - environment: develop - runs-on: ubuntu-latest - # These permissions are needed to interact with GitHub's OIDC Token endpoint. - permissions: - id-token: write - contents: read - strategy: - matrix: - tag_name: - ["aws_glue_3", "aws_glue_4"] - container: - image: ${{ needs.pytest-docker.outputs.ecr-registry }}/pytest:${{ github.ref_name }}_${{ matrix.tag_name }} - credentials: - username: ${{ needs.pytest-docker.outputs.ecr-username }} - password: ${{ needs.pytest-docker.outputs.ecr-password }} - env: - DISABLE_SSL: true - options: "--user root" - steps: - - name: Assume AWS role - uses: aws-actions/configure-aws-credentials@v2 - with: - role-to-assume: ${{ vars.AWS_CREDENTIALS_IAM_ROLE }} - aws-region: "us-east-1" - - uses: actions/checkout@v3 - - run: chown -R glue_user $GITHUB_WORKSPACE - - run: su - glue_user --command "aws configure set aws_access_key_id $AWS_ACCESS_KEY_ID" - - run: su - glue_user --command "aws configure set aws_secret_access_key $AWS_SECRET_ACCESS_KEY" - - run: su - glue_user --command "aws configure set aws_session_token $AWS_SESSION_TOKEN" - - run: su - glue_user --command "aws configure set region $AWS_REGION" - - - name: Set namespace for non-default branch or for tag - if: github.ref_name != 'main' - run: echo "NAMESPACE=$GITHUB_REF_NAME" >> $GITHUB_ENV - - - name: Run Pytest unit tests under AWS 3.0 - if: matrix.tag_name == 'aws_glue_3' - run: | - su - glue_user --command "cd $GITHUB_WORKSPACE && python3 -m pytest tests/test_s3_to_json.py -v" - su - glue_user --command "cd $GITHUB_WORKSPACE && python3 -m pytest tests/test_compare_parquet_datasets.py -v" - - - name: Run Pytest unit tests under AWS 4.0 - if: matrix.tag_name == 'aws_glue_4' - run: > - su - glue_user --command "cd $GITHUB_WORKSPACE && - python3 -m pytest tests/test_json_to_parquet.py --namespace $NAMESPACE -v" - - - sceptre-deploy-develop: - name: Deploys branch using sceptre - runs-on: ubuntu-latest - needs: [upload-files, nonglue-unit-tests, glue-unit-tests] - environment: develop - # These permissions are needed to interact with GitHub's OIDC Token endpoint. - permissions: - id-token: write - contents: read - - steps: - - name: Setup code, pipenv, aws - uses: Sage-Bionetworks/action-pipenv-aws-setup@v3 - with: - role_to_assume: ${{ vars.AWS_CREDENTIALS_IAM_ROLE }} - role_session_name: GitHubActions-${{ github.repository_owner }}-${{ github.event.repository.name }}-${{ github.run_id }} - python_version: ${{ env.PYTHON_VERSION }} - - - name: Create directory for remote sceptre templates - run: mkdir -p templates/remote/ - - - name: Set namespace for non-default branch - if: github.ref_name != 'main' - run: echo "NAMESPACE=$GITHUB_REF_NAME" >> $GITHUB_ENV - - - name: "Deploy sceptre stacks to dev" - run: pipenv run sceptre --var "namespace=${{ env.NAMESPACE }}" launch develop --yes - - - name: Delete preexisting S3 event notification for this namespace - uses: gagoar/invoke-aws-lambda@v3 - with: - AWS_ACCESS_KEY_ID: ${{ env.AWS_ACCESS_KEY_ID }} - AWS_SECRET_ACCESS_KEY: ${{ env.AWS_SECRET_ACCESS_KEY }} - AWS_SESSION_TOKEN: ${{ env.AWS_SESSION_TOKEN }} - REGION: ${{ env.AWS_REGION }} - FunctionName: ${{ env.NAMESPACE }}-S3EventConfig - Payload: '{"RequestType": "Delete"}' - LogType: Tail - - - name: Create S3 event notification for this namespace - uses: gagoar/invoke-aws-lambda@v3 - with: - AWS_ACCESS_KEY_ID: ${{ env.AWS_ACCESS_KEY_ID }} - AWS_SECRET_ACCESS_KEY: ${{ env.AWS_SECRET_ACCESS_KEY }} - AWS_SESSION_TOKEN: ${{ env.AWS_SESSION_TOKEN }} - REGION: ${{ env.AWS_REGION }} - FunctionName: ${{ env.NAMESPACE }}-S3EventConfig - Payload: '{"RequestType": "Create"}' - LogType: Tail + #upload-files: + # name: Upload files to S3 bucket in development + # runs-on: ubuntu-latest + # needs: pre-commit + # # These permissions are needed to interact with GitHub's OIDC Token endpoint. + # permissions: + # id-token: write + # contents: read + # environment: develop + # steps: + + # - name: Setup code, pipenv, aws + # uses: Sage-Bionetworks/action-pipenv-aws-setup@v3 + # with: + # role_to_assume: ${{ vars.AWS_CREDENTIALS_IAM_ROLE }} + # role_session_name: GitHubActions-${{ github.repository_owner }}-${{ github.event.repository.name }}-${{ github.run_id }} + # python_version: ${{ env.PYTHON_VERSION }} + + # - name: Setup sam + # uses: aws-actions/setup-sam@v2 + + # - name: Set namespace for non-default branch + # if: github.ref_name != 'main' + # run: echo "NAMESPACE=$GITHUB_REF_NAME" >> $GITHUB_ENV + + # - name: Copy files to templates bucket, use dev cloudformation bucket + # run: > + # python src/scripts/manage_artifacts/artifacts.py + # --upload + # --namespace $NAMESPACE + # --cfn_bucket ${{ vars.CFN_BUCKET }} + + #nonglue-unit-tests: + # name: Runs unit tests that are not dependent on aws-glue package resources + # runs-on: ubuntu-latest + # needs: pre-commit + # # These permissions are needed to interact with GitHub's OIDC Token endpoint. + # permissions: + # id-token: write + # contents: read + # environment: develop + # steps: + # - name: Setup code, pipenv, aws + # uses: Sage-Bionetworks/action-pipenv-aws-setup@v3 + # with: + # role_to_assume: ${{ vars.AWS_CREDENTIALS_IAM_ROLE }} + # role_session_name: ${{ github.event.repository.name }}-${{ github.run_id }}-nonglue-unit-tests + # python_version: ${{ env.PYTHON_VERSION }} + + # - name: Install additional python dependency + # run: | + # pipenv install ecs_logging~=2.0 + + # - name: Test lambda scripts with pytest + # run: | + # pipenv run python -m pytest tests/test_s3_event_config_lambda.py -v + # pipenv run python -m pytest tests/test_s3_to_glue_lambda.py -v + + # - name: Test dev synapse folders for STS access with pytest + # run: > + # pipenv run python -m pytest tests/test_setup_external_storage.py + # --test-bucket $DEV_INPUT_BUCKET + # --test-synapse-folder-id syn51758510 + # --namespace $NAMESPACE + # --test-sts-permission read_only + # -v + + # pipenv run python -m pytest tests/test_setup_external_storage.py + # --test-bucket $DEV_PROCESSED_BUCKET + # --test-synapse-folder-id syn51084525 + # --namespace $NAMESPACE/parquet + # --test-sts-permission read_write + # -v + + + #pytest-docker: + # name: Build and push testing docker images to the pytest ECR repository. + # needs: pre-commit + # runs-on: ubuntu-latest + # # These permissions are needed to interact with GitHub's OIDC Token endpoint. + # permissions: + # id-token: write + # contents: read + # strategy: + # matrix: + # include: + # - tag_name: aws_glue_3 + # dockerfile: tests/Dockerfile.aws_glue_3 + # - tag_name: aws_glue_4 + # dockerfile: tests/Dockerfile.aws_glue_4 + # environment: develop + # steps: + # - name: Assume AWS role + # uses: aws-actions/configure-aws-credentials@v2 + # with: + # role-to-assume: ${{ vars.AWS_CREDENTIALS_IAM_ROLE }} + # aws-region: "us-east-1" + # # unmasking of the AWS account ID allows the acct id to pass through outputs + # mask-aws-account-id: "no" + + # - name: Login to Amazon ECR + # id: login-ecr + # uses: aws-actions/amazon-ecr-login@v1 + + # - name: Get ECR secret names + # id: ecr + # run: | + # usernameKey=docker_username_$(echo ${{ steps.login-ecr.outputs.registry }} | tr '.-' _) + # echo "username-key=$usernameKey" >> $GITHUB_OUTPUT + # passwordKey=docker_password_$(echo ${{ steps.login-ecr.outputs.registry }} | tr '.-' _) + # echo "password-key=$passwordKey" >> $GITHUB_OUTPUT + # - uses: actions/checkout@v3 + + # - name: Set up Docker Buildx + # uses: docker/setup-buildx-action@v2 + + # - name: Build and push to ECR + # id: docker-build-push + # uses: docker/build-push-action@v4 + # with: + # push: true + # tags: ${{ steps.login-ecr.outputs.registry }}/pytest:${{ github.ref_name }}_${{ matrix.tag_name }} + # file: ${{ matrix.dockerfile }} + # cache-from: type=local,src=/tmp/.buildx-cache + # cache-to: type=local,dest=/tmp/.buildx-cache + # outputs: + # ecr-registry: ${{ steps.login-ecr.outputs.registry }} + # ecr-username: ${{ steps.login-ecr.outputs[steps.ecr.outputs.username-key] }} + # ecr-password: ${{ steps.login-ecr.outputs[steps.ecr.outputs.password-key] }} + + + #glue-unit-tests: + # name: Run Pytest unit tests for AWS glue + # needs: pytest-docker + # environment: develop + # runs-on: ubuntu-latest + # # These permissions are needed to interact with GitHub's OIDC Token endpoint. + # permissions: + # id-token: write + # contents: read + # strategy: + # matrix: + # tag_name: + # ["aws_glue_3", "aws_glue_4"] + # container: + # image: ${{ needs.pytest-docker.outputs.ecr-registry }}/pytest:${{ github.ref_name }}_${{ matrix.tag_name }} + # credentials: + # username: ${{ needs.pytest-docker.outputs.ecr-username }} + # password: ${{ needs.pytest-docker.outputs.ecr-password }} + # env: + # DISABLE_SSL: true + # options: "--user root" + # steps: + # - name: Assume AWS role + # uses: aws-actions/configure-aws-credentials@v2 + # with: + # role-to-assume: ${{ vars.AWS_CREDENTIALS_IAM_ROLE }} + # aws-region: "us-east-1" + # - uses: actions/checkout@v3 + # - run: chown -R glue_user $GITHUB_WORKSPACE + # - run: su - glue_user --command "aws configure set aws_access_key_id $AWS_ACCESS_KEY_ID" + # - run: su - glue_user --command "aws configure set aws_secret_access_key $AWS_SECRET_ACCESS_KEY" + # - run: su - glue_user --command "aws configure set aws_session_token $AWS_SESSION_TOKEN" + # - run: su - glue_user --command "aws configure set region $AWS_REGION" + + # - name: Set namespace for non-default branch or for tag + # if: github.ref_name != 'main' + # run: echo "NAMESPACE=$GITHUB_REF_NAME" >> $GITHUB_ENV + + # - name: Run Pytest unit tests under AWS 3.0 + # if: matrix.tag_name == 'aws_glue_3' + # run: | + # su - glue_user --command "cd $GITHUB_WORKSPACE && python3 -m pytest tests/test_s3_to_json.py -v" + # su - glue_user --command "cd $GITHUB_WORKSPACE && python3 -m pytest tests/test_compare_parquet_datasets.py -v" + + # - name: Run Pytest unit tests under AWS 4.0 + # if: matrix.tag_name == 'aws_glue_4' + # run: > + # su - glue_user --command "cd $GITHUB_WORKSPACE && + # python3 -m pytest tests/test_json_to_parquet.py --namespace $NAMESPACE -v" + + + #sceptre-deploy-develop: + # name: Deploys branch using sceptre + # runs-on: ubuntu-latest + # needs: [upload-files, nonglue-unit-tests, glue-unit-tests] + # environment: develop + # # These permissions are needed to interact with GitHub's OIDC Token endpoint. + # permissions: + # id-token: write + # contents: read + + # steps: + # - name: Setup code, pipenv, aws + # uses: Sage-Bionetworks/action-pipenv-aws-setup@v3 + # with: + # role_to_assume: ${{ vars.AWS_CREDENTIALS_IAM_ROLE }} + # role_session_name: GitHubActions-${{ github.repository_owner }}-${{ github.event.repository.name }}-${{ github.run_id }} + # python_version: ${{ env.PYTHON_VERSION }} + + # - name: Create directory for remote sceptre templates + # run: mkdir -p templates/remote/ + + # - name: Set namespace for non-default branch + # if: github.ref_name != 'main' + # run: echo "NAMESPACE=$GITHUB_REF_NAME" >> $GITHUB_ENV + + # - name: "Deploy sceptre stacks to dev" + # run: pipenv run sceptre --var "namespace=${{ env.NAMESPACE }}" launch develop --yes + + # - name: Delete preexisting S3 event notification for this namespace + # uses: gagoar/invoke-aws-lambda@v3 + # with: + # AWS_ACCESS_KEY_ID: ${{ env.AWS_ACCESS_KEY_ID }} + # AWS_SECRET_ACCESS_KEY: ${{ env.AWS_SECRET_ACCESS_KEY }} + # AWS_SESSION_TOKEN: ${{ env.AWS_SESSION_TOKEN }} + # REGION: ${{ env.AWS_REGION }} + # FunctionName: ${{ env.NAMESPACE }}-S3EventConfig + # Payload: '{"RequestType": "Delete"}' + # LogType: Tail + + # - name: Create S3 event notification for this namespace + # uses: gagoar/invoke-aws-lambda@v3 + # with: + # AWS_ACCESS_KEY_ID: ${{ env.AWS_ACCESS_KEY_ID }} + # AWS_SECRET_ACCESS_KEY: ${{ env.AWS_SECRET_ACCESS_KEY }} + # AWS_SESSION_TOKEN: ${{ env.AWS_SESSION_TOKEN }} + # REGION: ${{ env.AWS_REGION }} + # FunctionName: ${{ env.NAMESPACE }}-S3EventConfig + # Payload: '{"RequestType": "Create"}' + # LogType: Tail integration-test-develop: name: Triggers ETL workflow with S3 test files runs-on: ubuntu-latest - needs: sceptre-deploy-develop + #needs: sceptre-deploy-develop environment: develop # These permissions are needed to interact with GitHub's OIDC Token endpoint. permissions: @@ -277,11 +277,23 @@ jobs: if: github.ref_name != 'main' run: echo "NAMESPACE=$GITHUB_REF_NAME" >> $GITHUB_ENV + - name: Fetch the most recent exports. + id: recent-exports + run: | + # Retrieve the last ~2 weeks of exports from each cohort + # Ignore files which end with "/" + echo "KEYS=$( + aws s3api list-objects-v2 \ + --bucket $DEV_INPUT_BUCKET \ + --prefix 'pilot-data/' \ + --query '((sort_by(Contents[? !ends_with(Key, `/`) && !contains(Key, `owner.txt`)], &LastModified)[::-1])[:28])[*].Key' + )" >> "$GITHUB_OUTPUT" + - name: Copies over test files from ingestion bucket run: > - aws s3 cp s3://recover-dev-ingestion/pilot-data/ s3://$DEV_INPUT_BUCKET/$NAMESPACE/ - --recursive - --exclude "owner.txt" + echo ${{ steps.recent-exports.outputs.KEYS }} | jq -r '.[]' | while read -r key; do + aws s3 cp "s3://$DEV_INPUT_BUCKET/$key" "s3://$DEV_INPUT_BUCKET/$NAMESPACE/$key" + done sceptre-deploy-staging: name: Deploys to staging of prod using sceptre