From 54af2a0f6564a56b84cf90ca5b3cc69cbe134afa Mon Sep 17 00:00:00 2001 From: Daniel Burkhardt Date: Mon, 30 Jan 2023 14:37:11 -0500 Subject: [PATCH 1/5] polars version requirement --- requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/requirements.txt b/requirements.txt index fda8337..4a3756f 100644 --- a/requirements.txt +++ b/requirements.txt @@ -18,7 +18,7 @@ numba numpy pyopenssl pandas>=1.2.0,!=1.3.0 -polars +polars>=0.11.0 pybedtools pyfasta pyranges From 970ef10d831063951e44fb35311d826611ec4ac8 Mon Sep 17 00:00:00 2001 From: Daniel Burkhardt Date: Mon, 30 Jan 2023 14:38:24 -0500 Subject: [PATCH 2/5] Fix #57 - update pl.read_csv --- pycisTopic/utils.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pycisTopic/utils.py b/pycisTopic/utils.py index a7c0e80..3b07051 100644 --- a/pycisTopic/utils.py +++ b/pycisTopic/utils.py @@ -394,7 +394,7 @@ def read_fragments_from_file( df = ( pl.read_csv( fragments_bed_filename, - has_headers=False, + has_header=False, skip_rows=skip_rows, sep="\t", use_pyarrow=True, From e6b75c13a547291e4fbc05b7070508840d1f6ec7 Mon Sep 17 00:00:00 2001 From: Daniel Burkhardt Date: Mon, 30 Jan 2023 18:11:22 -0500 Subject: [PATCH 3/5] Fix string formatting error --- pycisTopic/pseudobulk_peak_calling.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pycisTopic/pseudobulk_peak_calling.py b/pycisTopic/pseudobulk_peak_calling.py index b3a0cfa..e74a089 100644 --- a/pycisTopic/pseudobulk_peak_calling.py +++ b/pycisTopic/pseudobulk_peak_calling.py @@ -691,7 +691,7 @@ def call_peak(self): self.ext_size, self.keep_dup, ) - log.info("Calling peaks for " + self.name + " with %s", cmd) + log.info(f"Calling peaks for {self.name} with {cmd}") try: subprocess.check_output(args=cmd, shell=True, stderr=subprocess.STDOUT) except subprocess.CalledProcessError as e: From 82fa2b3950274d21abf7c872c9d801009fdf31cd Mon Sep 17 00:00:00 2001 From: dburkhardt Date: Mon, 30 Jan 2023 23:15:04 +0000 Subject: [PATCH 4/5] Update doc string for export_pseudobulk --- pycisTopic/pseudobulk_peak_calling.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/pycisTopic/pseudobulk_peak_calling.py b/pycisTopic/pseudobulk_peak_calling.py index e74a089..5234a55 100644 --- a/pycisTopic/pseudobulk_peak_calling.py +++ b/pycisTopic/pseudobulk_peak_calling.py @@ -64,7 +64,8 @@ def export_pseudobulk( remove_duplicates: bool, optional Whether duplicates should be removed before converting the data to bigwig. split_pattern: str, optional - Pattern to split cell barcode from sample id. Default: ___ . + Pattern to split cell barcode from sample id. Default: '___'. Note, if `split_pattern` is not None, then `export_pseudobulk` will + attempt to infer `sample_id` from the index of `input_data` and ignore `sample_id_col`. use_polars: bool, optional Whether to use polars to read fragments files. Default: True. **kwargs From cf4ec85099dcf8a15cf95e3d2a72a78f5e2ce703 Mon Sep 17 00:00:00 2001 From: dburkhardt Date: Mon, 30 Jan 2023 23:32:38 +0000 Subject: [PATCH 5/5] Fix #61 --- pycisTopic/pseudobulk_peak_calling.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/pycisTopic/pseudobulk_peak_calling.py b/pycisTopic/pseudobulk_peak_calling.py index 5234a55..7d6fdd1 100644 --- a/pycisTopic/pseudobulk_peak_calling.py +++ b/pycisTopic/pseudobulk_peak_calling.py @@ -522,7 +522,7 @@ def macs_call_peak( q_value=q_value, nolambda=nolambda, ) - log.info(name + " done!") + log.info(f"{name} done!") return MACS_peak_calling @ray.remote @@ -646,7 +646,7 @@ def __init__( ): self.macs_path = macs_path self.treatment = bed_path - self.name = name + self.name = str(name) self.outdir = outdir self.input_format = input_format self.gsize = genome_size @@ -708,7 +708,7 @@ def load_narrow_peak(self): Load MACS2 narrow peak files as :class:`pr.PyRanges`. """ narrow_peak = pd.read_csv( - os.path.join(self.outdir, self.name + "_peaks.narrowPeak"), + os.path.join(self.outdir, f"{self.name}_peaks.narrowPeak"), sep="\t", header=None, )