diff --git a/src/vrsix/cli.py b/src/vrsix/cli.py index 65205ee..923b668 100644 --- a/src/vrsix/cli.py +++ b/src/vrsix/cli.py @@ -7,9 +7,6 @@ import click from vrsix import load as load_vcf -from vrsix.fetch import fetch_by_pos_range as vcf_fetch_by_pos_range -from vrsix.fetch import fetch_by_vrs_ids -from vrsix.output import generate_csv _logger = logging.getLogger(__name__) @@ -56,73 +53,3 @@ def load(vcfs: tuple[Path], db_location: Path | None) -> None: load_vcf.load_vcf(vcf, db_location) end = timer() _logger.info("Processed `%s` in %s seconds", vcf, end - start) - - -@cli.command() -@click.argument( - "vrs-ids", - nargs=-1, -) -@click.option( - "--db-location", - type=click.Path( - file_okay=True, dir_okay=True, readable=True, writable=True, path_type=Path - ), -) -@click.option( - "-o", - "--output", - type=click.Path(readable=True, writable=True, path_type=Path), -) -def fetch_by_id( - vrs_ids: list[str], db_location: Path | None, output: Path | None -) -> None: - """Fetch VCF positions by VRS ID""" - if not vrs_ids: - return - rows = fetch_by_vrs_ids(vrs_ids, db_location) - if output: - generate_csv(rows, output) - else: - for row in rows: - click.echo(",".join(row)) - - -@cli.command() -@click.argument("chrom", required=True) -@click.argument( - "start", - type=click.INT, - required=True, -) -@click.argument( - "end", - type=click.INT, - required=True, -) -@click.option( - "--db-location", - type=click.Path( - file_okay=True, dir_okay=True, readable=True, writable=True, path_type=Path - ), -) -@click.option( - "-o", - "--output", - type=click.Path(readable=True, writable=True, path_type=Path), -) -def fetch_by_range( - chrom: str, start: int, end: int, db_location: Path | None, output: Path | None -) -> None: - """Fetch VCF rows by position range. - - :param chrom: chromosome - :param start: starting position - :param end: ending position - """ - rows = vcf_fetch_by_pos_range(chrom, start, end, db_location) - if output: - generate_csv(rows, output) - else: - for row in rows: - click.echo(",".join(row)) diff --git a/src/vrsix/fetch.py b/src/vrsix/fetch.py deleted file mode 100644 index 5e683c4..0000000 --- a/src/vrsix/fetch.py +++ /dev/null @@ -1,57 +0,0 @@ -"""Fetch data from a SQLite index to support tabix-based lookups.""" - -import sqlite3 -from pathlib import Path - -from vrsix.sqlite import DEFAULT_SQLITE_LOCATION - - -def _get_connection(db_location: Path | None) -> sqlite3.Connection: - if not db_location: - db_location = DEFAULT_SQLITE_LOCATION - return sqlite3.connect(db_location) - - -def fetch_by_vrs_ids( - vrs_ids: list[str], db_location: Path | None = None -) -> list[tuple]: - """Access index by VRS ID. - - :param vrs_id: VRS ID or allele hash - :param db_location: path to sqlite file (assumed to exist) - :return: location description tuple if available - """ - vrs_ids = [ - vrs_id[9:] if vrs_id.startswith("ga4gh:VA.") else vrs_id for vrs_id in vrs_ids - ] - conn = _get_connection(db_location) - # have to manually make placeholders for python sqlite API -- - # should still be safe against injection by using parameterized query - placeholders = ",".join("?" for _ in vrs_ids) - result = conn.cursor().execute( - f"SELECT vrs_id, chr, pos FROM vrs_locations WHERE vrs_id IN ({placeholders})", # noqa: S608 - vrs_ids, - ) - data = result.fetchall() - conn.close() - return [(f"ga4gh:VA.{row[0]}", row[1], row[2]) for row in data] - - -def fetch_by_pos_range( - chrom: str, start: int, end: int, db_location: Path | None = None -) -> list[tuple]: - """Access index by location range. - - :param chrom: chromosome name - :param start: start of range - :param end: end of range - :param db_location: path to sqlite file (assumed to exist) - """ - conn = _get_connection(db_location) - result = conn.cursor().execute( - "SELECT vrs_id, chr, pos FROM vrs_locations WHERE chr = ? AND pos BETWEEN ? AND ?", - (chrom, start, end), - ) - data = result.fetchall() - conn.close() - return [(f"ga4gh:VA.{row[0]}", row[1], row[2]) for row in data] diff --git a/src/vrsix/output.py b/src/vrsix/output.py deleted file mode 100644 index ae0c142..0000000 --- a/src/vrsix/output.py +++ /dev/null @@ -1,22 +0,0 @@ -"""Provide utilities for generating data output in response to user queries.""" - -import csv -import datetime -from pathlib import Path - - -def generate_csv(rows: list[tuple], output_location: Path) -> None: - """Generate CSV at indicated location with provided data. - - :param rows: rows returned from sqlite index - :param output_location: location to save output to - """ - if output_location.is_dir(): - output_location = ( - output_location - / f"vrs_vcf_index_results_{datetime.datetime.now(tz=datetime.timezone.utc)}.csv" - ) - with output_location.open("w") as f: - writer = csv.writer(f) - for row in rows: - writer.writerow(row) diff --git a/tests/test_fetch.py b/tests/test_fetch.py deleted file mode 100644 index 6a4331b..0000000 --- a/tests/test_fetch.py +++ /dev/null @@ -1,52 +0,0 @@ -from pathlib import Path - -import pytest - -from vrsix import fetch - - -@pytest.fixture() -def db_fixture(fixture_dir: Path) -> Path: - return fixture_dir / "index.db" - - -def test_fetch_by_id(db_fixture: Path): - result = fetch.fetch_by_vrs_ids( - ["ga4gh:VA.eXPR_T0angq2prNwkqkRQr800N1mRE7J"], db_fixture - ) - assert result == [("ga4gh:VA.eXPR_T0angq2prNwkqkRQr800N1mRE7J", "Y", 2781674)] - - result = fetch.fetch_by_vrs_ids( - [ - "ga4gh:VA.eXPR_T0angq2prNwkqkRQr800N1mRE7J", - "nSND_n_mYcrCnOTxJyWJ3OzRdkwND4rT", - ], - db_fixture, - ) - result.sort() - assert result == [ - ("ga4gh:VA.eXPR_T0angq2prNwkqkRQr800N1mRE7J", "Y", 2781674), - ("ga4gh:VA.nSND_n_mYcrCnOTxJyWJ3OzRdkwND4rT", "3", 10180), - ] - - result = fetch.fetch_by_vrs_ids(["sdfljksdfaj;kl"], db_fixture) - assert result == [] - - -def test_fetch_by_range(db_fixture: Path): - result = fetch.fetch_by_pos_range("Y", 2781670, 2781675, db_location=db_fixture) - result.sort() - assert result == [ - ("ga4gh:VA.6flVGYer2yZRLjSfuJm_tpSAoT_ttTaF", "Y", 2781674), - ("ga4gh:VA.eXPR_T0angq2prNwkqkRQr800N1mRE7J", "Y", 2781674), - ] - - result = fetch.fetch_by_pos_range("Y", 2781670, 2781671, db_location=db_fixture) - assert result == [] - - result = fetch.fetch_by_pos_range("3", 10180, 10181, db_location=db_fixture) - result.sort() - assert result == [ - ("ga4gh:VA.aPDv4__2RdPqXPinSRQMKNQsaGw2eak7", "3", 10181), - ("ga4gh:VA.nSND_n_mYcrCnOTxJyWJ3OzRdkwND4rT", "3", 10180), - ]