Skip to content

Commit

Permalink
Merge branch 'subsample_qfilt' into 'dev'
Browse files Browse the repository at this point in the history
subsample_qfilt: enable filtering for proportional subsampling and change...

See merge request research/pomoxis!163
  • Loading branch information
ftostevin-ont committed Feb 13, 2023
2 parents 13f5d8e + 503c586 commit f9e47f7
Show file tree
Hide file tree
Showing 4 changed files with 10 additions and 7 deletions.
4 changes: 4 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,10 @@ All notable changes to this project will be documented in this file.
The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).

## [v0.3.12] - 2023-02-09
### Changed
- `subsample_bam`: `--quality` filtering now uses mean error probability, not mean of quality scores as previously.
- `subsample_bam`: enable filtering for proportional subsampling.
## [v0.3.11] - 2022-11-16
### Fixed
- Fix crashes in `subsample_bam` with alignment filtering and `common_errors_from_bam`
Expand Down
2 changes: 1 addition & 1 deletion Makefile
100644 → 100755
Original file line number Diff line number Diff line change
Expand Up @@ -139,7 +139,7 @@ $(BINCACHEDIR)/bedtools: | $(BINCACHEDIR) $(BINBUILDDIR)
venv: venv/bin/activate
IN_VENV=. ./venv/bin/activate
venv/bin/activate:
test -d venv || $(PYTHON) -m venv venv --prompt '(pomoxis) '
test -d venv || $(PYTHON) -m venv venv --prompt 'pomoxis'
${IN_VENV} && pip install pip --upgrade
${IN_VENV} && pip install -r requirements.txt

Expand Down
2 changes: 1 addition & 1 deletion pomoxis/__init__.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
__version__ = '0.3.11'
__version__ = '0.3.12'

import argparse
import os
Expand Down
9 changes: 4 additions & 5 deletions pomoxis/subsample_bam.py
100644 → 100755
Original file line number Diff line number Diff line change
Expand Up @@ -91,9 +91,6 @@ def main():


def subsample_region_proportionally(region, args):
if args.quality is not None or args.coverage is not None or args.accuracy is not None:
raise NotImplemented('Read filtering is not currently supported for proportion subsampling')

logger = logging.getLogger(region.ref_name)
coverage_summary = coverage_summary_of_region(region, args.bam, args.stride)
col = 'depth_{}'.format(args.orientation) if args.orientation is not None else 'depth'
Expand Down Expand Up @@ -139,6 +136,8 @@ def _read_iter():
return found_enough_depth


QSCORES_TO_PROBS = 10 ** (-0.1 * np.array(np.arange(100)))

def filter_read(r, bam, args, logger):
"""Decide whether a read should be filtered out, returning a bool"""

Expand All @@ -153,9 +152,9 @@ def filter_read(r, bam, args, logger):

# filter quality
if args.quality is not None:
mean_q = np.mean(r.query_qualities)
mean_q = -10 * np.log10(np.mean(QSCORES_TO_PROBS[r.query_qualities]))
if mean_q < args.quality:
logger.debug("Filtering {} by quality ({:.2f}).".format(r.query_name, mean_q))
logger.debug(f"Filtering {r.query_name} with quality {mean_q:.2f}")
return True

# filter accuracy or alignment coverage
Expand Down

0 comments on commit f9e47f7

Please sign in to comment.