Skip to content

Commit

Permalink
DatasetReader str paths and Writer.add_reads type checking
Browse files Browse the repository at this point in the history
  • Loading branch information
HalfPhoton committed Apr 2, 2024
1 parent a491558 commit 4be3964
Show file tree
Hide file tree
Showing 7 changed files with 54 additions and 7 deletions.
12 changes: 10 additions & 2 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,16 @@ All notable changes, updates, and fixes to pod5 will be documented here
The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/)
and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).

## [0.3.11]

## Added

- Typechecking on `Writer.add_reads` to inform users incorrectly passing `ReadRecords`

## Fixed

- `DatasetReader` correctly handles string paths

## [0.3.10]

## Added
Expand Down Expand Up @@ -100,7 +110,6 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0

- Removed python3.7 support


## [0.2.9] 2023-11-02

### Fixed
Expand All @@ -119,7 +128,6 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
- Remove exposed artifactory URL env var from gitlab ci config.
- `convert to_fast5` writes byte encoded read_ids to match Minkow (was `str`)


## [0.2.7] 2023-09-11

### Added
Expand Down
2 changes: 1 addition & 1 deletion docs/requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -2,4 +2,4 @@ sphinx-rtd-theme
sphinx==v5.3.0
myst-parser
# Paths are relative to project root for ReadTheDocs and docs/Makefile
pod5==0.3.10
pod5==0.3.11
2 changes: 1 addition & 1 deletion python/pod5/pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@ classifiers=[
]

dependencies = [
"lib_pod5 == 0.3.10",
"lib_pod5 == 0.3.11",
"iso8601",
'importlib-metadata; python_version<"3.8"',
"more_itertools",
Expand Down
11 changes: 8 additions & 3 deletions python/pod5/src/pod5/dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -290,13 +290,18 @@ def _collect_dataset(
threads: int,
) -> Set[Path]:
if isinstance(paths, (str, Path, os.PathLike)):
paths = [Path(paths)]
paths = [paths]

_paths = list(paths)
if not isinstance(paths, Collection):
raise TypeError(
f"paths must be a Collection[PathOrStr] but found {type(paths)=}"
)

paths = [Path(p) for p in paths]
collected: Set[Path] = set()
with ThreadPoolExecutor(max_workers=threads) as executor:
search = partial(search_path, recursive=recursive, patterns=[pattern])
for coll in executor.map(search, _paths):
for coll in executor.map(search, paths):
collected.update(coll)
return collected

Expand Down
7 changes: 7 additions & 0 deletions python/pod5/src/pod5/writer.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
"""
Tools for writing POD5 data
"""

import datetime
import itertools
from pathlib import Path
Expand All @@ -19,6 +20,7 @@

import lib_pod5 as p5b
import numpy as np
from pod5.reader import ReadRecord
import pytz

from pod5.api_utils import Pod5ApiException, safe_close
Expand Down Expand Up @@ -296,6 +298,11 @@ def add_reads(self, reads: Sequence[Union[Read, CompressedRead]]) -> None:
np.concatenate(signal_chunk_lengths).astype(np.uint32), # type: ignore [no-untyped-call]
signal_chunk_counts,
)
elif isinstance(reads[0], ReadRecord):
raise TypeError(
"Writer.add_reads(reads) does not take ReadRecords - see ReadRecord.to_read()"
)
raise TypeError(f"Writer.add_reads(reads) - unexpected type: {type(reads[0])=}")

def _prepare_add_reads_args(self, reads: Sequence[BaseRead]) -> List[Any]:
"""
Expand Down
12 changes: 12 additions & 0 deletions python/pod5/src/tests/test_dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -393,3 +393,15 @@ def test_get_path(self, nested_dataset: Path) -> None:
for path in dataset.paths:
read_id = dataset.get_reader(path).read_ids[0]
assert dataset.get_path(read_id) == path

def test_collect_paths(self, nested_dataset: Path) -> None:
"""Pass various inputs to DatasetReader._collect_dataset"""
collect = p5.DatasetReader._collect_dataset

expected = {nested_dataset / "root_10.pod5"}
kwargs = dict(recursive=False, pattern="*.pod5", threads=1)
assert expected == collect(nested_dataset, **kwargs) # type: ignore
assert expected == collect(str(nested_dataset), **kwargs) # type: ignore

with pytest.raises(TypeError, match="paths must be a Collection"):
collect(1, **kwargs) # type: ignore
15 changes: 15 additions & 0 deletions python/pod5/src/tests/test_writer.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
"""
Testing Pod5Writer
"""

import math
import lib_pod5 as p5b
import numpy as np
Expand Down Expand Up @@ -113,3 +114,17 @@ def test_read_copy(self, reader: p5.Reader, writer: p5.Writer) -> None:
assert before.has_cached_signal == after.has_cached_signal
assert np.array_equal(before.signal, after.signal)
assert np.array_equal(before.signal_pa, after.signal_pa)

def test_read_record_type_check(self, reader: p5.Reader, writer: p5.Writer) -> None:
"""Check type errors raised when passing ReadRecords to writer"""
with pytest.raises(TypeError, match="ReadRecord.to_read"):
for record in reader:
writer.add_read(record) # type: ignore

with pytest.raises(TypeError, match="ReadRecord.to_read"):
writer.add_reads([r for r in reader]) # type: ignore

with pytest.raises(TypeError, match="unexpected type"):
writer.add_read([1]) # type: ignore

writer.close()

0 comments on commit 4be3964

Please sign in to comment.