diff --git a/.github/pull_request_template.md b/.github/pull_request_template.md index 53a4f10..4ca53ff 100644 --- a/.github/pull_request_template.md +++ b/.github/pull_request_template.md @@ -10,3 +10,4 @@ A short description of the changes in this PR. * [ ] Jira ticket acceptance criteria met. * [ ] Tests added/updated and passing. * [ ] Documentation updated (if needed). +* [ ] `version.txt` and `CHANGELOG.md` updated (if publishing a new release). diff --git a/.github/release-drafter.yml b/.github/release-drafter.yml deleted file mode 100644 index b230f4c..0000000 --- a/.github/release-drafter.yml +++ /dev/null @@ -1,15 +0,0 @@ -version-resolver: - major: - labels: - - 'major' - minor: - labels: - - 'minor' - patch: - labels: - - 'patch' - default: patch -name-template: 'v$RESOLVED_VERSION' -tag-template: 'v$RESOLVED_VERSION' -template: | - $CHANGES diff --git a/.github/workflows/github-actions.yml b/.github/workflows/github-actions.yml deleted file mode 100644 index d4219e9..0000000 --- a/.github/workflows/github-actions.yml +++ /dev/null @@ -1,66 +0,0 @@ -# This workflow will install python dependencies, run tests, -# and report test results and code coverage as artifacts. - -name: Github actions - -on: - push: - branches: [ main ] - pull_request: - branches: [ main ] - -jobs: - build_and_test: - - runs-on: ubuntu-20.04 - strategy: - fail-fast: false - matrix: - python-version: [3.8, 3.9] - - steps: - - uses: actions/checkout@v2 - - name: Set up Python ${{ matrix.python-version }} - uses: actions/setup-python@v2 - with: - python-version: ${{ matrix.python-version }} - - - name: Install dependencies - run: | - python -m pip install --upgrade pip - pip install -r requirements/core.txt -r requirements/dev.txt - - - name: Run lint - run: | - flake8 --ignore=W503 harmony_netcdf_to_zarr - - - name: Run test - run: | - bin/test >& test_results-${{ matrix.python-version }}.txt - - - name: Generate coverage report - if: ${{ always() }} - run: | - coverage report -m >& coverage_report-${{ matrix.python-version }}.txt - coverage html --dir htmlcov-${{ matrix.python-version }} - - - name: Archive test results - if: ${{ always() }} - uses: actions/upload-artifact@v2 - with: - name: test result (for python ${{ matrix.python-version }}) - path: test_results-${{ matrix.python-version }}.txt - - - name: Archive code coverage report (plain text) - if: ${{ always() }} - uses: actions/upload-artifact@v2 - with: - name: code coverage report (plain text) (for python ${{ matrix.python-version }}) - path: coverage_report-${{ matrix.python-version }}.txt - - - name: Archive code coverage report (html) - if: ${{ always() }} - uses: actions/upload-artifact@v2 - with: - name: code coverage report (html) (for python ${{ matrix.python-version }}) - path: htmlcov-${{ matrix.python-version }}/* diff --git a/.github/workflows/publish_docker_image.yml b/.github/workflows/publish_docker_image.yml new file mode 100644 index 0000000..8f6f1fb --- /dev/null +++ b/.github/workflows/publish_docker_image.yml @@ -0,0 +1,93 @@ +# This workflow will run when changes are detected in the `main` branch. It +# will first trigger the reusable workflow in `.github/workflows/run_tests.yml`, +# which sets up the local Python environment and runs the `unittest` suite. +# If that workflow is successful, a tag is added to the latest git commit, a +# GitHub release is created and the latest version of the service Docker image +# is pushed to ghcr.io. + +name: Publish Harmony NetCDF-to-Zarr Docker image to ghcr.io + +on: + push: + branches: [ main ] + paths: version.txt + +env: + IMAGE_NAME: ${{ github.repository }} + REGISTRY: ghcr.io + +jobs: + run_tests: + uses: ./.github/workflows/run_tests.yml + + create_github_release: + needs: run_tests + permissions: + # write permission is required to create a GitHub release + contents: write + runs-on: ubuntu-20.04 + + steps: + - name: Checkout harmony-netcdf-to-zarr repository + uses: actions/checkout@v3 + + - name: Set up Python + uses: actions/setup-python@v4 + with: + python-version: 3.9 + + - name: Extract semantic version number + run: echo "semantic_version=$(cat version.txt)" >> $GITHUB_ENV + + - name: Extract markdown notes from CHANGELOG.md + run: | + python bin/extract_version_notes.py + + - name: Create Git tag + uses: mathieudutour/github-tag-action@v6.1 + with: + github_token: ${{ secrets.GITHUB_TOKEN }} + custom_tag: ${{ env.semantic_version }} + + - name: Create GitHub release + uses: ncipollo/release-action@v1 + with: + bodyFile: "version_notes.md" + tag: ${{ env.semantic_version }} + + build_and_publish_image: + needs: run_tests + runs-on: ubuntu-20.04 + strategy: + fail-fast: false + + steps: + - name: Checkout harmony-netcdf-to-zarr repository + uses: actions/checkout@v3 + + - name: Extract semantic version number + run: echo "semantic_version=$(cat version.txt)" >> $GITHUB_ENV + + - name: Log-in to ghcr.io registry + uses: docker/login-action@v2 + with: + registry: ${{ env.REGISTRY }} + username: ${{ github.actor }} + password: ${{ secrets.GITHUB_TOKEN }} + + - name: Add tags to the Docker image + id: meta + uses: docker/metadata-action@v4 + with: + images: ${{ env.REGISTRY }}/${{ env.IMAGE_NAME }} + tags: | + type=semver,pattern={{version}},value=${{ env.semantic_version }} + + - name: Push Docker image + uses: docker/build-push-action@v3 + with: + context: . + file: Dockerfile + push: true + tags: ${{ steps.meta.outputs.tags }} + labels: ${{ steps.meta.outputs.labels }} diff --git a/.github/workflows/run_tests.yml b/.github/workflows/run_tests.yml new file mode 100644 index 0000000..76b4751 --- /dev/null +++ b/.github/workflows/run_tests.yml @@ -0,0 +1,64 @@ +# This workflow will install Python dependencies, run tests, +# and report test results and code coverage as artifacts. It will +# be called by the workflow that run tests against new PRs and as +# a first step in the workflow that publishes new Docker images. + +name: A reusable workflow to build and run the unit test suite + +on: + workflow_call + +jobs: + build_and_test: + runs-on: ubuntu-20.04 + strategy: + fail-fast: false + + steps: + - name: Retrieve harmony-netcdf-to-zarr repository + uses: actions/checkout@v3 + + - name: Set up Python 3.9 (version used by service) + uses: actions/setup-python@v4 + with: + python-version: 3.9 + + - name: Install dependencies + run: | + python -m pip install --upgrade pip + pip install -r requirements/core.txt -r requirements/dev.txt + + - name: Run lint + run: | + flake8 --ignore=W503 harmony_netcdf_to_zarr + + - name: Run tests + run: | + bin/test >& test_results.txt + + - name: Generate coverage report + if: ${{ always() }} + run: | + coverage report -m >& coverage_report.txt + coverage html --dir htmlcov + + - name: Archive test results + if: ${{ always() }} + uses: actions/upload-artifact@v3 + with: + name: test result (Python 3.9) + path: test_results.txt + + - name: Archive code coverage report (plain text) + if: ${{ always() }} + uses: actions/upload-artifact@v3 + with: + name: code coverage report (plain text) + path: coverage_report.txt + + - name: Archive code coverage report (HTML) + if: ${{ always() }} + uses: actions/upload-artifact@v3 + with: + name: code coverage report (HTML) + path: htmlcov/* diff --git a/.github/workflows/run_tests_on_pull_requests.yml b/.github/workflows/run_tests_on_pull_requests.yml new file mode 100644 index 0000000..d0673da --- /dev/null +++ b/.github/workflows/run_tests_on_pull_requests.yml @@ -0,0 +1,13 @@ +# This workflow will run when a PR is opened against the `main` branch. It will +# trigger the reusable workflow in `.github/workflows/run_tests.yml`, which +# sets up the local Python environment and runs the `unittest` suite. + +name: Build and test PR branches + +on: + pull_request: + branches: [ main ] + +jobs: + build_and_test: + uses: ./.github/workflows/run_tests.yml diff --git a/CHANGELOG.md b/CHANGELOG.md new file mode 100644 index 0000000..3ae4d6c --- /dev/null +++ b/CHANGELOG.md @@ -0,0 +1,44 @@ +## v1.0.3 +### 2022-12-13 + +* DAS-1713 - Publish Docker images to ghcr.io. +* DAS-1712 - Disable DockerHub publication actions. +* DAS-1695 - Ensure correct metadata is written to store. +* Give `compute_chunksize` a side effect to prevent pickling error in testing end to end. +* Check multiprocess exit codes. +* DAS-1685 - Ensure single granule requests don't attempt aggregation. +* DAS 1673 - Implement smart chunking. +* DAS-1536 - Ensure raised exception messages are correctly tested. +* HARMONY-1189 - Update harmony-service-lib dependency to 1.0.20. +* HARMONY-1178 - Handle paged STAC input +* DAS-1438 - Ensure zero-dimensional variables, like a CRS, resolve full paths. +* DAS-1438 - Ensure Zarr groups always use a `ProcessSynchronizer`. +* DAS-1455 - Copy exact data from NetCDF-4 input to Zarr store, including scale and offset metadata. +* DAS-1379 - Only aggregate temporal dimensions. +* DAS-1432 - Ensure only the Zarr store is in the STAC. +* DAS-1379 - Write many input NetCDF-4 granules to single Zarr store. +* DAS-1414 - Check input granule is NetCDF-4 via media type or extension. +* DAS-1400 - Support bounds during dimension aggregation. +* DAS-1376 - Update `HarmonyAdapter` to perform many-to-one operations. +* DAS-1375 - Scale dimension values to integers before finding differences in resolution calculation. +* DAS-1375 - Add dimension aggregation to `DimensionsMapping` class. +* DAS-1374 - Add NetCDF-4 dimension parsing classes, ready for aggregation. + +## v1.0.2 +### 2021-11-29 + +* Add trigger on release publish. +* HARMONY-388 - Make publish-image consistent with service-example. +* Performance and chunk sizing improvements (HARMONY-953, HARMONY-953, HARMONY-992, HARMONY-877, HARMONY-855). + +## v1.0.1 +### 2021-06-17 + +* HARMONY-388 - Improve consistency across Python repositories. + +## v1.0.0 +### 2021-05-17 + +* HARMONY-817 - Change occurrences of "harmony" to "harmonyservices" for Docker images. +* HARMONY-817 - Publish to DockerHub on merge and release. +* HARMONY-816 - Add user agent logging. diff --git a/Dockerfile b/Dockerfile index 3a968e5..14bb115 100644 --- a/Dockerfile +++ b/Dockerfile @@ -11,15 +11,6 @@ RUN apt-get update && apt-get install -y build-essential git COPY requirements/core.txt requirements/core.txt RUN pip3 install -r requirements/core.txt -# This is below the preceding layer to prevent Docker from rebuilding the -# previous layer (forcing a reload of dependencies) whenever the -# status of a local service library changes -ARG service_lib_dir=NO_SUCH_DIR - -# Install a local harmony-service-lib-py if we have one -COPY deps ./deps/ -RUN if [ -d deps/${service_lib_dir} ]; then echo "Installing from local copy of harmony-service-lib"; pip install -e deps/${service_lib_dir}; fi - COPY . . ENTRYPOINT ["python3", "-m", "harmony_netcdf_to_zarr"] diff --git a/README.md b/README.md index bdf6af7..e904cc4 100644 --- a/README.md +++ b/README.md @@ -1,33 +1,39 @@ # Harmony NetCDF4 to Zarr converter -A Harmony service to convert NetCDF4 files to Zarr files. Takes conventional Harmony messages and translates -their input granules to Zarr using xarray. +A Harmony service to convert NetCDF4 files to Zarr files. Takes conventional +Harmony messages and translates their input granules to Zarr using xarray. -This library intentionally does very little checking of the input files and file extensions. It is designed -to work on NetCDF granules. It ought to work with any other file type that can be opened with -[xarray.open_mfdataset](http://xarray.pydata.org/en/stable/generated/xarray.open_mfdataset.html) using the -`h5netcdf` driver. This includes some HDF5 EOSDIS datasets. Individual collections must be tested to ensure -compatibility. +This library intentionally does very little checking of the input files and +file extensions. It is designed to work on NetCDF granules. It ought to work +with any other file type that can be opened with +[xarray.open_mfdataset](http://xarray.pydata.org/en/stable/generated/xarray.open_mfdataset.html) +using the `h5netcdf` driver. This includes some HDF5 EOSDIS datasets. +Individual collections must be tested to ensure compatibility. ## Development +It is recommended that the NetCDF-to-Zarr service is tested and developed using +a local Harmony instance. This can be established following the instructions in +the [Harmony repository](https://github.com/nasa/harmony). + ### Setup #### Docker -It is possible to develop and run this service locally using only Docker. This is the recommended option -for validation and small changes. Install [Docker](https://www.docker.com/get-started) on your development -machine. +It is possible to develop and run this service locally using only Docker. This +is the recommended option for validation and small changes. Install +[Docker](https://www.docker.com/get-started) on your development machine. #### Environment file This service uses the [harmony-service-lib-py](https://github.com/nasa/harmony-service-lib-py), -and requires that certain environment variables be set, as shown in the Harmony Service Lib README. For example, -`STAGING_BUCKET` and `STAGING_PATH` are required, and `EDL_USERNAME` and `EDL_PASSWORD` are required for any -data behind Earthdata Login. For local testing (not integrated into Harmony in a dev environment or AWS -deployment), use the example `.env` file in this repo: +and requires that certain environment variables be set, as shown in the Harmony +Service Lib README. For example, `STAGING_BUCKET` and `STAGING_PATH` are +required, and `EDL_USERNAME` and `EDL_PASSWORD` are required for any +data behind Earthdata Login. For local testing (not integrated into Harmony in +a dev environment or AWS deployment), use the example `.env` file in this repo: $ cp example/dotenv .env @@ -90,6 +96,11 @@ Build the image: You can now run a workflow in your local Harmony stack and it will execute using this image. +Restart the services in your local Harmony instance (the script below is +contained in the Harmony repository): + + $ bin/restart-services + ### Development without Docker #### Testing & running the Service Independently @@ -118,3 +129,34 @@ pip install -e ../harmony-service-lib-py Now any changes made to that local repo will be visible in this project when you run tests, etc. Finally, you can test & run the service in Harmony just as shown in the `Development with Docker` section above. + +## Contributions: + +Developers working on the NetCDF-to-Zarr service will need to create a feature +branch for their work. The code in the repository has a `unittest` suite, which +should be updated when any code is added or updated within the repository. + +When a feature branch is ready for review, a Pull Request (PR) should be opened +against the `main` branch. This will automatically trigger a GitHub workflow +that will run the `unittest` suite (see: +`.github/workflows/run_tests_on_pull_requests.yml`). + +When a PR is merged against the `main` branch, a different workflow will check +if there are updates to the `version.txt` file. This file should contain a +semantic version number (see: `.github/workflows/publish_docker_image.yml`). + +If there are updates to `version.txt`, the GitHub workflow will: + +* Extract the semantic version number from that file. +* Extract the latest release notes from `CHANGELOG.md`. +* Run the `unittest` suite. +* Tag the most recent commit on the `main` branch with the semantic version + number. +* Create a GitHub release using the release notes and semantic version number. +* Publish the NetCDF-to-Zarr service Docker image to ghcr.io. It will be tagged + with the semantic version number. + +For this reason, when releasing, please be sure to update both: + +* version.txt +* CHANGELOG.md diff --git a/bin/extract_version_notes.py b/bin/extract_version_notes.py new file mode 100644 index 0000000..7ef0df6 --- /dev/null +++ b/bin/extract_version_notes.py @@ -0,0 +1,22 @@ +""" A Python module that will read the contents of CHANGELOG.md and extract + the release notes for the most recent version. + + This module assumes: + + * The top entry in CHANGELOG.md is that of the release to be published. + * Each entry is separated by a new line and starts with a line: "## vX.Y.Z" + +""" +if __name__ == '__main__': + with open('CHANGELOG.md', 'r', encoding='utf-8') as file_handler: + changelog_lines = file_handler.readlines() + + split_index = next((line_index + for line_index, line + in enumerate(changelog_lines[:-2]) + if line == '\n' + and changelog_lines[line_index + 1].startswith('## v')), + None) + + with open('version_notes.md', 'w', encoding='utf-8') as file_handler: + file_handler.writelines(changelog_lines[:split_index]) diff --git a/version.txt b/version.txt new file mode 100644 index 0000000..21e8796 --- /dev/null +++ b/version.txt @@ -0,0 +1 @@ +1.0.3