From 0a5139cb3b63a7ff50284b34f6d5d7833ff51910 Mon Sep 17 00:00:00 2001 From: jgould Date: Thu, 7 Nov 2019 14:27:03 -0500 Subject: [PATCH] read gzipped gtf files --- velocyto/counter.py | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/velocyto/counter.py b/velocyto/counter.py index a56823a..de4ceca 100644 --- a/velocyto/counter.py +++ b/velocyto/counter.py @@ -337,7 +337,7 @@ def read_repeats(self, gtf_file: str, tolerance: int=5) -> Dict[str, List[vcy.Fe repeat_ivls_list: List[vcy.Feature] = [] # fin = open(gtf_file) - gtf_lines = [line for line in open(gtf_file) if not line.startswith('#')] + gtf_lines = read_gtf_lines(gtf_file) def sorting_key(entry: str) -> Tuple[str, bool, int, str]: """This sorting strategy is equivalent to sort -k1,1 -k7,7 -k4,4n""" @@ -460,7 +460,7 @@ def read_transcriptmodels(self, gtf_file: str) -> Dict[str, Dict[str, vcy.Transc # Initialize containers # headerlines: List[str] = [] - gtf_lines = [line for line in open(gtf_file) if not line.startswith('#')] + gtf_lines = read_gtf_lines(gtf_file) def sorting_key(entry: str) -> Tuple[str, bool, int, str]: """This sorting strategy is equivalent to sort -k1,1 -k7,7 -k4,4n""" @@ -1265,6 +1265,11 @@ def pcount_cell_batch(self) -> Tuple[np.ndarray, np.ndarray, np.ndarray, List[st raise NotImplementedError("This will be a used by .pcount") +def read_gtf_lines(gtf_file: str) -> List[str]: + return [line for line in (open(gtf_file) if not gtf_file.endswith('.gz') else gzip.open(gtf_file)) if + not line.startswith('#')] + + def reverse(strand: str) -> str: if strand == "+": return "-"