From 35c82bb9687897446da2816ff3ccfc43c72234b1 Mon Sep 17 00:00:00 2001 From: Jo Walsh Date: Tue, 25 Jun 2024 15:10:53 +0100 Subject: [PATCH 01/12] add a flake8 linting action --- .github/workflows/lint.yml | 21 +++++++++++++++++++++ 1 file changed, 21 insertions(+) create mode 100644 .github/workflows/lint.yml diff --git a/.github/workflows/lint.yml b/.github/workflows/lint.yml new file mode 100644 index 0000000..ccef985 --- /dev/null +++ b/.github/workflows/lint.yml @@ -0,0 +1,21 @@ +name: flake8 Lint + +on: [push, pull_request] + +jobs: + flake8-lint: + runs-on: ubuntu-latest + name: Lint + steps: + - name: Check out source repository + uses: actions/checkout@v3 + - name: Set up Python environment + uses: actions/setup-python@v4 + with: + python-version: "3.9" + - name: flake8 Lint + uses: py-actions/flake8@v2 + with: + max-line-length: "120" + path: "cyto_ml" + plugins: "flake8-bugbear==22.1.11 flake8-black" \ No newline at end of file From 64c34eea49d61796be8769c143519a4eebdf975c Mon Sep 17 00:00:00 2001 From: Jo Walsh Date: Tue, 25 Jun 2024 15:26:00 +0100 Subject: [PATCH 02/12] clear out linting issues --- cyto_ml/data/vectorstore.py | 12 ++++++++---- cyto_ml/models/scivision.py | 2 +- 2 files changed, 9 insertions(+), 5 deletions(-) diff --git a/cyto_ml/data/vectorstore.py b/cyto_ml/data/vectorstore.py index d6e06c2..38d15c7 100644 --- a/cyto_ml/data/vectorstore.py +++ b/cyto_ml/data/vectorstore.py @@ -1,18 +1,22 @@ import chromadb -from chromadb.db.base import NotFoundError, UniqueConstraintError +from chromadb.db.base import UniqueConstraintError from typing import Optional +import logging +logging.basicConfig(level=logging.INFO) client = chromadb.PersistentClient(path="./vectors") -def vector_store(name: Optional[str] = 'test_collection'): + +def vector_store(name: Optional[str] = "test_collection"): """ Return a vector store specified by name, default test_collection """ try: collection = client.create_collection( - name=name, metadata={"hnsw:space": "cosine"} # l2 is the default + name=name, metadata={"hnsw:space": "cosine"} # default similarity ) except UniqueConstraintError as err: collection = client.get_collection(name) - + logging.info(err) + return collection diff --git a/cyto_ml/models/scivision.py b/cyto_ml/models/scivision.py index 1be163f..6f05cce 100644 --- a/cyto_ml/models/scivision.py +++ b/cyto_ml/models/scivision.py @@ -4,7 +4,7 @@ import torchvision from xarray import DataArray -SCIVISION_URL = "https://github.com/alan-turing-institute/plankton-cefas-scivision" +SCIVISION_URL = "https://github.com/alan-turing-institute/plankton-cefas-scivision" # noqa: E501 def load_model(url: str): From d89bf2854ad529f9f9d5715fb7e032d357d9ec9d Mon Sep 17 00:00:00 2001 From: Jo Walsh Date: Tue, 25 Jun 2024 15:28:12 +0100 Subject: [PATCH 03/12] remember to name the project --- README.md | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/README.md b/README.md index 1c54f9d..135ccc9 100644 --- a/README.md +++ b/README.md @@ -1,8 +1,8 @@ -# [insert name here] +# Plankton ML -This repository contains code and configuration for processing and analysing images of plankton samples. +This repository contains code and configuration for processing and analysing images of plankton samples. It's experimental, serving as much as a proposed template for new projects than as a project in itself. -It's a sister project to an image annotation app that is not yet released, written by researchers and data scientists at the UK Centre for Ecology and Hydrology in the early stages of a collaborative project that wasn't taken forward. +It's a companion project to an R-shiny based image annotation app that is not yet released, written by researchers and data scientists at the UK Centre for Ecology and Hydrology in the early stages of a collaboration that was placed on hold. ## Installation From 9f6df865c63aad940162599d7398a7d38f41fad0 Mon Sep 17 00:00:00 2001 From: Jo Walsh Date: Tue, 25 Jun 2024 15:30:06 +0100 Subject: [PATCH 04/12] format everything thru black, maybe too insistent --- cyto_ml/data/vectorstore.py | 1 + cyto_ml/models/scivision.py | 4 +++- 2 files changed, 4 insertions(+), 1 deletion(-) diff --git a/cyto_ml/data/vectorstore.py b/cyto_ml/data/vectorstore.py index 38d15c7..ed0e9bf 100644 --- a/cyto_ml/data/vectorstore.py +++ b/cyto_ml/data/vectorstore.py @@ -2,6 +2,7 @@ from chromadb.db.base import UniqueConstraintError from typing import Optional import logging + logging.basicConfig(level=logging.INFO) client = chromadb.PersistentClient(path="./vectors") diff --git a/cyto_ml/models/scivision.py b/cyto_ml/models/scivision.py index 6f05cce..3de3ea8 100644 --- a/cyto_ml/models/scivision.py +++ b/cyto_ml/models/scivision.py @@ -4,7 +4,9 @@ import torchvision from xarray import DataArray -SCIVISION_URL = "https://github.com/alan-turing-institute/plankton-cefas-scivision" # noqa: E501 +SCIVISION_URL = ( + "https://github.com/alan-turing-institute/plankton-cefas-scivision" # noqa: E501 +) def load_model(url: str): From 75466523581a14ce4d7a70739b1b43b1d39edbc0 Mon Sep 17 00:00:00 2001 From: Jo Walsh Date: Tue, 25 Jun 2024 16:22:03 +0100 Subject: [PATCH 05/12] save coverage as artefact, just making the syntax up at this point --- .github/workflows/conda.yml | 9 ++++++--- .github/workflows/coverage.yml | 3 +++ 2 files changed, 9 insertions(+), 3 deletions(-) diff --git a/.github/workflows/conda.yml b/.github/workflows/conda.yml index 08484c4..844f0ff 100644 --- a/.github/workflows/conda.yml +++ b/.github/workflows/conda.yml @@ -25,6 +25,9 @@ jobs: environment-file: environment.yml python-version: ${{ matrix.python-version }} auto-activate-base: false - - run: | - pip install pytest-cov - python -m pytest --cov=cyto_ml --cov-report xml:coverage.xml tests/ + - run: pip install pytest-cov + - run: python -m pytest --cov=cyto_ml --cov-report xml:coverage.xml tests/ + - uses: actions/upload-artifact@v4 + with: + name: coverage.xml + path: coverage.xml diff --git a/.github/workflows/coverage.yml b/.github/workflows/coverage.yml index 01344aa..129470a 100644 --- a/.github/workflows/coverage.yml +++ b/.github/workflows/coverage.yml @@ -7,6 +7,9 @@ jobs: coverage: runs-on: ubuntu-latest steps: + - uses: actions/download-artifact@v4 + with: + name: coverage.xml - name: Test coverage report uses: orgoro/coverage@v3.1 with: From c6225c6026bf6cd0747a0515124b9e89c1cac5c2 Mon Sep 17 00:00:00 2001 From: Jo Walsh Date: Tue, 25 Jun 2024 16:31:17 +0100 Subject: [PATCH 06/12] try again with the coverage step in single workflow --- .github/workflows/conda.yml | 12 +++++++----- .github/workflows/coverage.yml | 17 ----------------- 2 files changed, 7 insertions(+), 22 deletions(-) delete mode 100644 .github/workflows/coverage.yml diff --git a/.github/workflows/conda.yml b/.github/workflows/conda.yml index 844f0ff..1cfe17c 100644 --- a/.github/workflows/conda.yml +++ b/.github/workflows/conda.yml @@ -1,7 +1,5 @@ on: - push: - branches: [ "main" ] pull_request: branches: [ "main" ] @@ -27,7 +25,11 @@ jobs: auto-activate-base: false - run: pip install pytest-cov - run: python -m pytest --cov=cyto_ml --cov-report xml:coverage.xml tests/ - - uses: actions/upload-artifact@v4 + coverage: + runs-on: ubuntu-latest + steps: + - name: Test coverage report + uses: orgoro/coverage@v3.1 with: - name: coverage.xml - path: coverage.xml + coverageFile: coverage.xml + token: ${{ secrets.GITHUB_TOKEN }} diff --git a/.github/workflows/coverage.yml b/.github/workflows/coverage.yml deleted file mode 100644 index 129470a..0000000 --- a/.github/workflows/coverage.yml +++ /dev/null @@ -1,17 +0,0 @@ -name: 'coverage' -on: - pull_request: - branches: - - main -jobs: - coverage: - runs-on: ubuntu-latest - steps: - - uses: actions/download-artifact@v4 - with: - name: coverage.xml - - name: Test coverage report - uses: orgoro/coverage@v3.1 - with: - coverageFile: coverage.xml - token: ${{ secrets.GITHUB_TOKEN }} From a8c74b6b2c2950d8f6b4fb8e624480aecd7e0fdf Mon Sep 17 00:00:00 2001 From: Jo Walsh Date: Tue, 25 Jun 2024 16:44:00 +0100 Subject: [PATCH 07/12] Revert "try again with the coverage step in single workflow" This reverts commit c6225c6026bf6cd0747a0515124b9e89c1cac5c2. --- .github/workflows/conda.yml | 12 +++++------- .github/workflows/coverage.yml | 17 +++++++++++++++++ 2 files changed, 22 insertions(+), 7 deletions(-) create mode 100644 .github/workflows/coverage.yml diff --git a/.github/workflows/conda.yml b/.github/workflows/conda.yml index 1cfe17c..844f0ff 100644 --- a/.github/workflows/conda.yml +++ b/.github/workflows/conda.yml @@ -1,5 +1,7 @@ on: + push: + branches: [ "main" ] pull_request: branches: [ "main" ] @@ -25,11 +27,7 @@ jobs: auto-activate-base: false - run: pip install pytest-cov - run: python -m pytest --cov=cyto_ml --cov-report xml:coverage.xml tests/ - coverage: - runs-on: ubuntu-latest - steps: - - name: Test coverage report - uses: orgoro/coverage@v3.1 + - uses: actions/upload-artifact@v4 with: - coverageFile: coverage.xml - token: ${{ secrets.GITHUB_TOKEN }} + name: coverage.xml + path: coverage.xml diff --git a/.github/workflows/coverage.yml b/.github/workflows/coverage.yml new file mode 100644 index 0000000..129470a --- /dev/null +++ b/.github/workflows/coverage.yml @@ -0,0 +1,17 @@ +name: 'coverage' +on: + pull_request: + branches: + - main +jobs: + coverage: + runs-on: ubuntu-latest + steps: + - uses: actions/download-artifact@v4 + with: + name: coverage.xml + - name: Test coverage report + uses: orgoro/coverage@v3.1 + with: + coverageFile: coverage.xml + token: ${{ secrets.GITHUB_TOKEN }} From 908effec5a0ff24ebacbf634868ea84f07a19e27 Mon Sep 17 00:00:00 2001 From: Jo Walsh Date: Wed, 26 Jun 2024 09:19:14 +0100 Subject: [PATCH 08/12] try `needs` to make the coverage job run later --- .../workflows/{conda.yml => pytest_coverage.yml} | 14 ++++++++++++-- 1 file changed, 12 insertions(+), 2 deletions(-) rename .github/workflows/{conda.yml => pytest_coverage.yml} (72%) diff --git a/.github/workflows/conda.yml b/.github/workflows/pytest_coverage.yml similarity index 72% rename from .github/workflows/conda.yml rename to .github/workflows/pytest_coverage.yml index 844f0ff..f99692d 100644 --- a/.github/workflows/conda.yml +++ b/.github/workflows/pytest_coverage.yml @@ -1,7 +1,5 @@ on: - push: - branches: [ "main" ] pull_request: branches: [ "main" ] @@ -31,3 +29,15 @@ jobs: with: name: coverage.xml path: coverage.xml + coverage: + needs: tests + runs-on: ubuntu-latest + steps: + - uses: actions/download-artifact@v4 + with: + name: coverage.xml + - name: Test coverage report + uses: orgoro/coverage@v3.1 + with: + coverageFile: coverage.xml + token: ${{ secrets.GITHUB_TOKEN }} From 6e78733993e59a0e0b70d0fcc1356c98e9164033 Mon Sep 17 00:00:00 2001 From: Jo Walsh Date: Wed, 26 Jun 2024 09:23:11 +0100 Subject: [PATCH 09/12] reference points to encourage healthy contribution from researchers --- CONTRIBUTING.md | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index d1c28ce..bd9f539 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -7,4 +7,11 @@ This is a small prototype but we welcome contributions to it. * Please contribute a test that covers your changes * Please provide brief instructions to a reviewer about how to exercise your code +## TODO + +* add a friendly reference for relative beginners on pull requests / git workflow with a link to a Discussion where UKCEH people can come with queries +* add a reference on good style for git commit messages +* link to examples of projects with reasonable test coverage + + Thank you for thinking about contributing <3 From 782faa08c1c39962ea7356ab52acc99caeffd637 Mon Sep 17 00:00:00 2001 From: Jo Walsh Date: Wed, 26 Jun 2024 09:26:02 +0100 Subject: [PATCH 10/12] delete the standalone coverage workflow. appears working --- .github/workflows/coverage.yml | 17 ----------------- 1 file changed, 17 deletions(-) delete mode 100644 .github/workflows/coverage.yml diff --git a/.github/workflows/coverage.yml b/.github/workflows/coverage.yml deleted file mode 100644 index 129470a..0000000 --- a/.github/workflows/coverage.yml +++ /dev/null @@ -1,17 +0,0 @@ -name: 'coverage' -on: - pull_request: - branches: - - main -jobs: - coverage: - runs-on: ubuntu-latest - steps: - - uses: actions/download-artifact@v4 - with: - name: coverage.xml - - name: Test coverage report - uses: orgoro/coverage@v3.1 - with: - coverageFile: coverage.xml - token: ${{ secrets.GITHUB_TOKEN }} From 5552bfdef7387f5eeb814ae42bb80276598b51dd Mon Sep 17 00:00:00 2001 From: Jo Walsh Date: Mon, 1 Jul 2024 09:03:36 +0100 Subject: [PATCH 11/12] Make CONTRIBUTING a bit more introductory --- CONTRIBUTING.md | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index bd9f539..fb8c22a 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -2,16 +2,17 @@ This is a small prototype but we welcome contributions to it. -* Please create a pull request with additions or changes +* Please [create a pull request](https://docs.github.com/en/pull-requests/collaborating-with-pull-requests/proposing-changes-to-your-work-with-pull-requests/creating-a-pull-request) with additions or changes. * Please use a python formatter and linter to clean your code to PEP8 standard - we like `[black](https://pypi.org/project/black/)` as a formatting convention -* Please contribute a test that covers your changes -* Please provide brief instructions to a reviewer about how to exercise your code +* Please make sure you add a test for the code that you add. If you're not familiar with tests [this might be a helpful starting point](https://realpython.com/pytest-python-testing/) +* It's considerate to provide brief instructions to a reviewer about how to run your code and what to expect. + +If you've got any questions about this, we're happy to help, please reach out to the [EDS RSE team](https://github.com/NERC-CEH/rse_group/discussions) ## TODO -* add a friendly reference for relative beginners on pull requests / git workflow with a link to a Discussion where UKCEH people can come with queries -* add a reference on good style for git commit messages -* link to examples of projects with reasonable test coverage +* Add links to git commit style guides, workflow outliens +* Add links to examples of projects with reasonable test coverage Thank you for thinking about contributing <3 From 8a324f4aa0e27e75c06efd02556c18e9f3e63701 Mon Sep 17 00:00:00 2001 From: Jo Walsh Date: Mon, 1 Jul 2024 09:41:05 +0100 Subject: [PATCH 12/12] Add the lines setuptools asks for on this branch --- pyproject.toml | 2 ++ 1 file changed, 2 insertions(+) diff --git a/pyproject.toml b/pyproject.toml index deb0e18..8510474 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -4,3 +4,5 @@ version = "0.1" description = "This package supports the processing and analysis of plankton sample data" readme = "README.md" requires-python = "<3.10" +[tool.setuptools] +py-modules = []