diff --git a/CHANGELOG.md b/CHANGELOG.md index f80f758..9fa2d96 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -7,6 +7,16 @@ All notable changes, updates, and fixes to pod5 will be documented here The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/) and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). +## [0.3.11] + +## Added + +- Typechecking on `Writer.add_reads` to inform users incorrectly passing `ReadRecords` + +## Fixed + +- `DatasetReader` correctly handles string paths + ## [0.3.10] ## Added @@ -100,7 +110,6 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 - Removed python3.7 support - ## [0.2.9] 2023-11-02 ### Fixed @@ -119,7 +128,6 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 - Remove exposed artifactory URL env var from gitlab ci config. - `convert to_fast5` writes byte encoded read_ids to match Minkow (was `str`) - ## [0.2.7] 2023-09-11 ### Added diff --git a/docs/requirements.txt b/docs/requirements.txt index fbf4061..155a39a 100644 --- a/docs/requirements.txt +++ b/docs/requirements.txt @@ -2,4 +2,4 @@ sphinx-rtd-theme sphinx==v5.3.0 myst-parser # Paths are relative to project root for ReadTheDocs and docs/Makefile -pod5==0.3.10 +pod5==0.3.11 diff --git a/python/pod5/pyproject.toml b/python/pod5/pyproject.toml index e00f9f9..0a62428 100644 --- a/python/pod5/pyproject.toml +++ b/python/pod5/pyproject.toml @@ -22,7 +22,7 @@ classifiers=[ ] dependencies = [ - "lib_pod5 == 0.3.10", + "lib_pod5 == 0.3.11", "iso8601", 'importlib-metadata; python_version<"3.8"', "more_itertools", diff --git a/python/pod5/src/pod5/dataset.py b/python/pod5/src/pod5/dataset.py index aa244a4..35e945f 100644 --- a/python/pod5/src/pod5/dataset.py +++ b/python/pod5/src/pod5/dataset.py @@ -290,13 +290,18 @@ def _collect_dataset( threads: int, ) -> Set[Path]: if isinstance(paths, (str, Path, os.PathLike)): - paths = [Path(paths)] + paths = [paths] - _paths = list(paths) + if not isinstance(paths, Collection): + raise TypeError( + f"paths must be a Collection[PathOrStr] but found {type(paths)=}" + ) + + paths = [Path(p) for p in paths] collected: Set[Path] = set() with ThreadPoolExecutor(max_workers=threads) as executor: search = partial(search_path, recursive=recursive, patterns=[pattern]) - for coll in executor.map(search, _paths): + for coll in executor.map(search, paths): collected.update(coll) return collected diff --git a/python/pod5/src/pod5/writer.py b/python/pod5/src/pod5/writer.py index 3552790..477864b 100644 --- a/python/pod5/src/pod5/writer.py +++ b/python/pod5/src/pod5/writer.py @@ -1,6 +1,7 @@ """ Tools for writing POD5 data """ + import datetime import itertools from pathlib import Path @@ -19,6 +20,7 @@ import lib_pod5 as p5b import numpy as np +from pod5.reader import ReadRecord import pytz from pod5.api_utils import Pod5ApiException, safe_close @@ -296,6 +298,11 @@ def add_reads(self, reads: Sequence[Union[Read, CompressedRead]]) -> None: np.concatenate(signal_chunk_lengths).astype(np.uint32), # type: ignore [no-untyped-call] signal_chunk_counts, ) + elif isinstance(reads[0], ReadRecord): + raise TypeError( + "Writer.add_reads(reads) does not take ReadRecords - see ReadRecord.to_read()" + ) + raise TypeError(f"Writer.add_reads(reads) - unexpected type: {type(reads[0])=}") def _prepare_add_reads_args(self, reads: Sequence[BaseRead]) -> List[Any]: """ diff --git a/python/pod5/src/tests/test_dataset.py b/python/pod5/src/tests/test_dataset.py index d3a0813..d25be8e 100644 --- a/python/pod5/src/tests/test_dataset.py +++ b/python/pod5/src/tests/test_dataset.py @@ -393,3 +393,15 @@ def test_get_path(self, nested_dataset: Path) -> None: for path in dataset.paths: read_id = dataset.get_reader(path).read_ids[0] assert dataset.get_path(read_id) == path + + def test_collect_paths(self, nested_dataset: Path) -> None: + """Pass various inputs to DatasetReader._collect_dataset""" + collect = p5.DatasetReader._collect_dataset + + expected = {nested_dataset / "root_10.pod5"} + kwargs = dict(recursive=False, pattern="*.pod5", threads=1) + assert expected == collect(nested_dataset, **kwargs) # type: ignore + assert expected == collect(str(nested_dataset), **kwargs) # type: ignore + + with pytest.raises(TypeError, match="paths must be a Collection"): + collect(1, **kwargs) # type: ignore diff --git a/python/pod5/src/tests/test_writer.py b/python/pod5/src/tests/test_writer.py index e2ce801..fe8dea3 100644 --- a/python/pod5/src/tests/test_writer.py +++ b/python/pod5/src/tests/test_writer.py @@ -1,6 +1,7 @@ """ Testing Pod5Writer """ + import math import lib_pod5 as p5b import numpy as np @@ -113,3 +114,17 @@ def test_read_copy(self, reader: p5.Reader, writer: p5.Writer) -> None: assert before.has_cached_signal == after.has_cached_signal assert np.array_equal(before.signal, after.signal) assert np.array_equal(before.signal_pa, after.signal_pa) + + def test_read_record_type_check(self, reader: p5.Reader, writer: p5.Writer) -> None: + """Check type errors raised when passing ReadRecords to writer""" + with pytest.raises(TypeError, match="ReadRecord.to_read"): + for record in reader: + writer.add_read(record) # type: ignore + + with pytest.raises(TypeError, match="ReadRecord.to_read"): + writer.add_reads([r for r in reader]) # type: ignore + + with pytest.raises(TypeError, match="unexpected type"): + writer.add_read([1]) # type: ignore + + writer.close()