diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 6d70a44..5ca558c 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -12,7 +12,7 @@ jobs: strategy: matrix: os: [ubuntu-latest, macos-latest, windows-latest] - python-version: ["3.10", "3.11", "3.12"] + python-version: ["3.10", "3.11", "3.12", "3.13"] steps: - uses: "actions/checkout@v4" @@ -24,14 +24,19 @@ jobs: with: python-version: "${{ matrix.python-version }}" + - name: Install uv + uses: astral-sh/setup-uv@v3 + with: + enable-cache: true + cache-dependency-glob: "pyproject.toml" + - name: "Installs for ${{ matrix.python-version }}" run: | - python --version - pip install --upgrade pip wheel setuptools flit - pip install --upgrade nox + uv venv venv -p python${{ matrix.python-version }} + uv tool install -p venv nox - name: "Run nox for ${{ matrix.python-version }}" - run: "nox -s test_coveralls-${{ matrix.python-version }} -- --cov-report lcov:lcov-${{matrix.os}}-${{matrix.python-version}}.lcov --cov-report term --cov-append --cov diverse_seq" + run: "nox -db uv -s test_coveralls-${{ matrix.python-version }} -- --cov-report lcov:lcov-${{matrix.os}}-${{matrix.python-version}}.lcov --cov-report term --cov-append --cov diverse_seq" - name: Coveralls Parallel uses: coverallsapp/github-action@v2 diff --git a/noxfile.py b/noxfile.py index 7fef8ff..647d0c9 100644 --- a/noxfile.py +++ b/noxfile.py @@ -1,6 +1,6 @@ import nox -_py_versions = range(10, 13) +_py_versions = range(10, 14) nox.options.sessions = ["test", "testcov"] @@ -16,7 +16,7 @@ def test(session): ) -@nox.session(python=["3.12"]) +@nox.session(python=["3.13"]) def testcov(session): session.install(".[test]") session.chdir("tests") diff --git a/pyproject.toml b/pyproject.toml index d60b161..e96f587 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -10,7 +10,7 @@ authors = [ keywords = ["biology", "genomics", "statistics", "phylogeny", "evolution", "bioinformatics"] readme = "README.md" license = { file = "LICENSE" } -requires-python = ">=3.10,<3.13" +requires-python = ">=3.10,<3.14" dependencies = [ "attrs", "click", @@ -32,6 +32,8 @@ classifiers = [ "Operating System :: OS Independent", "Programming Language :: Python :: 3.10", "Programming Language :: Python :: 3.11", + "Programming Language :: Python :: 3.12", + "Programming Language :: Python :: 3.13", ] # the following are inferred from the source code dynamic = ["version", "description"] @@ -50,7 +52,7 @@ test = [ "pytest", "pytest-cov", "pytest-xdist", - "ruff==0.9.1", + "ruff==0.9.3", ] dev = [ "cogapp", @@ -60,7 +62,7 @@ dev = [ "pytest", "pytest-cov", "pytest-xdist", - "ruff==0.9.1", + "ruff==0.9.3", ] doc = ["click", "ipykernel", @@ -136,7 +138,11 @@ target-version = "py310" # Unlike Flake8, Ruff doesn't enable pycodestyle warnings (`W`) or # McCabe complexity (`C901`) by default. select = ["ALL"] -ignore = ["EXE002", "FA100", "E501", "D"] +# turn of warnings for: +# - import numpy as np, we pefer explicit and 3 extra characters is no burden +# - strict camel case class names, we name classes with a __call__ method like +# functions since their instance behaves like a function +ignore = ["EXE002", "FA100", "E501", "D", "ICN001", "N801"] # Allow fix for all enabled rules (when `--fix`) is provided. fixable = ["ALL"] diff --git a/src/diverse_seq/__init__.py b/src/diverse_seq/__init__.py index 32eb48b..6bbfae2 100644 --- a/src/diverse_seq/__init__.py +++ b/src/diverse_seq/__init__.py @@ -4,4 +4,4 @@ # found by h5py import hdf5plugin # noqa -__version__ = "2024.12.26a1" +__version__ = "2024.12.26a2" diff --git a/src/diverse_seq/record.py b/src/diverse_seq/record.py index f0b42c5..b98086e 100644 --- a/src/diverse_seq/record.py +++ b/src/diverse_seq/record.py @@ -4,9 +4,9 @@ import functools from collections.abc import Iterator from math import fabs, isclose -from typing import Union import numba +import typing_extensions from attrs import asdict, define, field, validators from cogent3 import get_moltype from cogent3.app import composable @@ -16,6 +16,7 @@ from cogent3.core import sequence as c3_seq from numpy import ( array, + dtype, errstate, log2, min_scalar_type, @@ -29,7 +30,7 @@ from diverse_seq import util as dvs_utils -NumType = Union[float, int] +NumType = float | int PosDictType = dict[int, NumType] @@ -37,8 +38,9 @@ def _gettype(name) -> type: try: return name.type - except AttributeError: - raise TypeError(f"type {type(name)} not supported") + except AttributeError as e: + msg = f"type {type(name)} not supported" + raise TypeError(msg) from e @_gettype.register @@ -63,10 +65,14 @@ class lazy_kmers: num_states: int = dataclasses.field(init=False) moltype: dataclasses.InitVar[str] = dataclasses.field(default="dna") - def __post_init__(self, moltype: str): + def __post_init__(self, moltype: str) -> None: self.num_states = len(_get_canonical_states(moltype)) - def __array__(self): + def __array__( + self, + dtype: dtype | None = None, + copy: bool | None = None, + ) -> ndarray[int]: data = self.data if isinstance(self.data, ndarray) else self.data.read() return kmer_counts(data, self.num_states, self.k, dtype=self.dtype) @@ -120,7 +126,7 @@ def __init__( dtype: type = float, source: str = None, name: str = None, - ): + ) -> None: """ Parameters ---------- @@ -140,7 +146,7 @@ def __init__( self.source = source self.name = name - def __setitem__(self, index: int, value: NumType): + def __setitem__(self, index: int, value: NumType) -> None: self.data[index] = value def __getitem__(self, index: int) -> NumType: @@ -152,15 +158,15 @@ def __len__(self) -> int: def __iter__(self) -> Iterator[NumType]: yield from self.data - def __getstate__(self): + def __getstate__(self) -> dict: return asdict(self) - def __setstate__(self, data): + def __setstate__(self, data: dict) -> typing_extensions.Self: for k, v in data.items(): setattr(self, k, v) return self - def __sub__(self, other): + def __sub__(self, other: typing_extensions.Self) -> typing_extensions.Self: data = self.data - other return self.__class__( data=data, @@ -168,11 +174,11 @@ def __sub__(self, other): dtype=self.dtype, ) - def __isub__(self, other): + def __isub__(self, other: typing_extensions.Self) -> typing_extensions.Self: self.data -= other return self - def __add__(self, other): + def __add__(self, other: typing_extensions.Self) -> typing_extensions.Self: # we are creating a new instance data = self.data + other return self.__class__( @@ -181,37 +187,41 @@ def __add__(self, other): dtype=self.dtype, ) - def __iadd__(self, other): + def __iadd__(self, other: typing_extensions.Self) -> typing_extensions.Self: self.data += other return self - def __truediv__(self, other): + def __truediv__(self, other: typing_extensions.Self) -> typing_extensions.Self: # we are creating a new instance with errstate(divide="ignore", invalid="ignore"): data = nan_to_num(self.data / other, nan=0.0, copy=False) return self.__class__(data=data, vector_length=self.vector_length, dtype=float) - def __itruediv__(self, other): + def __itruediv__(self, other: typing_extensions.Self) -> typing_extensions.Self: with errstate(divide="ignore", invalid="ignore"): data = nan_to_num(self.data / other, nan=0.0, copy=False) self.dtype = float self.data = data return self - def sum(self): + def sum(self) -> NumType: return self.data.sum() def iter_nonzero(self) -> Iterator[NumType]: yield from (v for v in self.data if v) @property - def entropy(self): + def entropy(self) -> float: non_zero = self.data[self.data > 0] kfreqs = non_zero if self.dtype == float else non_zero / non_zero.sum() # taking absolute value due to precision issues return fabs(-(kfreqs * log2(kfreqs)).sum()) - def __array__(self): + def __array__( + self, + dtype: dtype | None = None, + copy: bool | None = None, + ) -> ndarray[int]: if not isinstance(self.data, ndarray): self.data = array(self.data) return self.data @@ -292,7 +302,8 @@ def indices_to_bytes( coord = index_to_coord(index, coeffs) for j in range(k): if coord[j] >= num_states: - raise IndexError("index out of character range") + msg = "index out of character range" + raise IndexError(msg) result[i][j] = states[coord[j]] return result @@ -303,7 +314,7 @@ def kmer_counts( seq: ndarray, num_states: int, k: int, - dtype=uint64, + dtype: dtype = uint64, ) -> ndarray: # pragma: no cover """return freqs of valid k-mers using 1D indices @@ -346,12 +357,14 @@ def kmer_counts( def _gt_zero(instance, attribute, value): if value <= 0: - raise ValueError(f"must be > 0, not {value}") + msg = f"must be > 0, not {value}" + raise ValueError(msg) @functools.singledispatch def _make_kcounts(data) -> vector: - raise TypeError(f"type {type(data)} not supported") + msg = f"type {type(data)} not supported" + raise TypeError(msg) @_make_kcounts.register @@ -380,7 +393,7 @@ class SeqArray: moltype: str source: str = None - def __len__(self): + def __len__(self) -> int: return len(self.data) @@ -398,15 +411,15 @@ class KmerSeq: ) @property - def size(self): + def size(self) -> int: return self.kcounts.vector_length @functools.cached_property - def entropy(self): + def entropy(self) -> float: return self.kfreqs.entropy @functools.cached_property - def kfreqs(self): + def kfreqs(self) -> vector: kcounts = array(self.kcounts) kcounts = kcounts.astype(float) kfreqs = kcounts / kcounts.sum() @@ -424,7 +437,7 @@ class seq_to_seqarray: def __init__( self, moltype: str = "dna", - ): + ) -> None: self.moltype = moltype self.str2arr = dvs_utils.str2arr(moltype=self.moltype) @@ -438,25 +451,26 @@ def main(self, seq: c3_types.SeqType) -> SeqArray: @functools.singledispatch -def make_kmerseq(data, *, dtype, k, moltype) -> KmerSeq: - raise TypeError(f"type {type(data)} not supported") +def make_kmerseq(data, *, dtype: dtype, k: int, moltype: str) -> KmerSeq: + msg = f"type {type(data)} not supported" + raise TypeError(msg) @make_kmerseq.register -def _(data: SeqArray, *, dtype, k, moltype) -> KmerSeq: +def _(data: SeqArray, *, dtype: dtype, k: int, moltype: str) -> KmerSeq: vec = lazy_kmers( data=data.data, k=k, moltype=moltype, dtype=dtype, ) - kwargs = dict( - vector_length=vec.num_states, - dtype=dtype, - source=data.source, - name=data.seqid, - data=vec, - ) + kwargs = { + "vector_length": vec.num_states, + "dtype": dtype, + "source": data.source, + "name": data.seqid, + "data": vec, + } return KmerSeq( kcounts=vector(**kwargs), @@ -465,20 +479,20 @@ def _(data: SeqArray, *, dtype, k, moltype) -> KmerSeq: @make_kmerseq.register -def _(data: c3_data_store.DataMember, *, dtype, k, moltype) -> KmerSeq: +def _(data: c3_data_store.DataMember, *, dtype: dtype, k: int, moltype: str) -> KmerSeq: vec = lazy_kmers( data=data, k=k, moltype=moltype, dtype=dtype, ) - kwargs = dict( - vector_length=vec.num_states, - dtype=dtype, - source=data.data_store.source, - name=data.unique_id, - data=vec, - ) + kwargs = { + "vector_length": vec.num_states, + "dtype": dtype, + "source": data.data_store.source, + "name": data.unique_id, + "data": vec, + } return KmerSeq( kcounts=vector(**kwargs), @@ -487,7 +501,7 @@ def _(data: c3_data_store.DataMember, *, dtype, k, moltype) -> KmerSeq: @make_kmerseq.register -def _(data: c3_seq.Sequence, *, dtype, k, moltype) -> KmerSeq: +def _(data: c3_seq.Sequence, *, dtype: dtype, k: int, moltype: str) -> KmerSeq: cnvrt = dvs_utils.str2arr(moltype=moltype) vec = lazy_kmers( data=cnvrt(str(data)), # pylint: disable=not-callable @@ -495,13 +509,13 @@ def _(data: c3_seq.Sequence, *, dtype, k, moltype) -> KmerSeq: moltype=moltype, dtype=dtype, ) - kwargs = dict( - vector_length=vec.num_states, - dtype=dtype, - source=data.info.source, - name=data.name, - data=vec, - ) + kwargs = { + "vector_length": vec.num_states, + "dtype": dtype, + "source": data.info.source, + "name": data.name, + "data": vec, + } return KmerSeq( kcounts=vector(**kwargs), @@ -510,7 +524,7 @@ def _(data: c3_seq.Sequence, *, dtype, k, moltype) -> KmerSeq: @make_kmerseq.register -def _(data: c3_new_seq.Sequence, *, dtype, k, moltype) -> KmerSeq: +def _(data: c3_new_seq.Sequence, *, dtype: dtype, k: int, moltype: str) -> KmerSeq: cnvrt = dvs_utils.str2arr(moltype=moltype) vec = lazy_kmers( data=cnvrt(str(data)), # pylint: disable=not-callable @@ -518,13 +532,13 @@ def _(data: c3_new_seq.Sequence, *, dtype, k, moltype) -> KmerSeq: moltype=moltype, dtype=dtype, ) - kwargs = dict( - vector_length=vec.num_states, - dtype=dtype, - source=data.info.source, - name=data.name, - data=vec, - ) + kwargs = { + "vector_length": vec.num_states, + "dtype": dtype, + "source": data.info.source, + "name": data.name, + "data": vec, + } return KmerSeq( kcounts=vector(**kwargs), @@ -533,7 +547,7 @@ def _(data: c3_new_seq.Sequence, *, dtype, k, moltype) -> KmerSeq: class _make_kmerseq_init: - def __init__(self, k: int, moltype: str): + def __init__(self, k: int, moltype: str) -> None: """ Parameters ---------- @@ -570,5 +584,6 @@ def _get_canonical_states(moltype: str) -> bytes: canonical = list(moltype.alphabet) v = moltype.alphabet.to_indices(canonical) if not (0 <= min(v) < max(v) < len(canonical)): - raise ValueError(f"indices of canonical states {canonical} not sequential {v}") + msg = f"indices of canonical states {canonical} not sequential {v}" + raise ValueError(msg) return "".join(canonical).encode("utf8")