Skip to content

Commit

Permalink
- new tests and controls CI
Browse files Browse the repository at this point in the history
  • Loading branch information
emattei committed Dec 19, 2024
1 parent 3c6b36e commit cdcabcc
Show file tree
Hide file tree
Showing 6 changed files with 53 additions and 43 deletions.
2 changes: 0 additions & 2 deletions .gitattributes
Original file line number Diff line number Diff line change
@@ -1,2 +0,0 @@
test_data/genome/IGVFFI7254HLWI_chr19.fasta.gz filter=lfs diff=lfs merge=lfs -text
test_data/gtf/IGVFFI5842FWGQ_chr19.gtf.gz filter=lfs diff=lfs merge=lfs -text
12 changes: 7 additions & 5 deletions .github/workflows/build-docker-kallisto-bustools.yml
Original file line number Diff line number Diff line change
@@ -1,15 +1,14 @@

name: IGVF kallisto-bustools CI

on:
push:
branches: [ "dev", "main" ]
paths:
- 'modules/igvf-kallisto-bustools/**'
- 'modules/igvf-kallisto-bustools/**'
pull_request:
branches: [ "dev", "main" ]
paths:
- 'modules/igvf-kallisto-bustools/**'
- 'modules/igvf-kallisto-bustools/**'
workflow_dispatch:
inputs:
image_tag:
Expand All @@ -26,7 +25,7 @@ env:

jobs:

build-for-dockerhub:
build-and-test:

runs-on: ubuntu-latest
defaults:
Expand All @@ -42,5 +41,8 @@ jobs:
ls -lht
- name: Build the Docker image
run: docker build . --file docker_builder.dockerfile --tag ${{ env.DOCKER_REGISTRY }}/${{ env.DOCKER_PATH }}:${{ env.TAG }}
- name: Run tests in Docker container
run: |
docker run --init --rm -v ${{ github.workspace }}/test_data:/software/test_data ${{ env.DOCKER_REGISTRY }}/${{ env.DOCKER_PATH }}:${{ env.TAG }} pytest /software/tests/test_run_kallisto.py
- name: Push image
run: "docker push ${{ env.DOCKER_REGISTRY }}/${{ env.DOCKER_PATH }}:${{ env.TAG }}"
run: docker push ${{ env.DOCKER_REGISTRY }}/${{ env.DOCKER_PATH }}:${{ env.TAG }}
2 changes: 1 addition & 1 deletion modules/igvf-kallisto-bustools/docker_builder.dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
# Based on Python
############################################################

FROM python@sha256:fd0fa50d997eb56ce560c6e5ca6a1f5cf8fdff87572a16ac07fb1f5ca01eb608
FROM --platform="linux/amd64" python:3.10-slim

LABEL maintainer="Eugenio Mattei"
LABEL software="IGVF single-cell pipeline"
Expand Down
2 changes: 1 addition & 1 deletion modules/igvf-kallisto-bustools/pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
name = "igvf-kallisto-bustools"
version = "1.0.0"
description = "Align scRNA using kallisto-bustools"
requires-python = "==3.10.15"
requires-python = "==3.10.16"
dependencies = [
"click>=8.1.7",
"kb-python==0.29.1",
Expand Down
25 changes: 25 additions & 0 deletions modules/igvf-kallisto-bustools/run_kallisto.py
Original file line number Diff line number Diff line change
@@ -1,12 +1,33 @@
import click
import gzip
import logging
import shutil
import sys
import subprocess

# Configure logging
logging.basicConfig(stream=sys.stderr, level=logging.INFO)


def check_and_unzip(file_path):
"""
Checks if a file is gzipped and unzips it if necessary.
Parameters:
file_path (str): Path to the file to check and unzip.
Returns:
str: Path to the unzipped file.
"""
if file_path.endswith('.gz'):
unzipped_file_path = file_path[:-3] # Remove the .gz extension
with gzip.open(file_path, 'rb') as f_in:
with open(unzipped_file_path, 'wb') as f_out:
shutil.copyfileobj(f_in, f_out)
return unzipped_file_path
return file_path


@click.group()
@click.version_option(package_name="igvf-kallisto-bustools")
def cli():
Expand Down Expand Up @@ -39,6 +60,8 @@ def index_standard(output_dir, genome_fasta, gtf):
{output_dir}.tar.gz: A tarball of the output directory containing all the indexes.
"""
logging.info(f"Creating standard kallisto index in {output_dir}.")
genome_fasta = check_and_unzip(genome_fasta)
gtf = check_and_unzip(gtf)
# Create the command line string and run it using subprocess
cmd = f"kb ref -i {output_dir}/index.idx -g {output_dir}/t2g.txt -f1 {output_dir}/transcriptome.fa {genome_fasta} {gtf}"
logging.info(f"Running command: {cmd}")
Expand Down Expand Up @@ -75,6 +98,8 @@ def index_nac(output_dir, genome_fasta, gtf):
{output_dir}.tar.gz (File): A tarball of the output directory containing all the indexes.
"""
logging.info(f"Creating nac kallisto index in {output_dir}.")
genome_fasta = check_and_unzip(genome_fasta)
gtf = check_and_unzip(gtf)
# Create the command line string and run it using subprocess
cmd = f"kb ref --workflow=nac -i {output_dir}/index.idx -g {output_dir}/t2g.txt -c1 ~{output_dir}/cdna.txt -c2 ~{output_dir}/nascent.txt -f1 ~{output_dir}/cdna.fasta -f2 ~{output_dir}/nascent.fasta ~{genome_fasta} {gtf}"
logging.info(f"Running command: {cmd}")
Expand Down
53 changes: 19 additions & 34 deletions modules/igvf-kallisto-bustools/tests/test_run_kallisto.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,76 +4,61 @@
from click.testing import CliRunner
from run_kallisto import cli


@pytest.fixture
def runner():
return CliRunner()


@patch('subprocess.run')
def test_index_standard(mock_subprocess_run, runner):
mock_subprocess_run.return_value = subprocess.CompletedProcess(args=['kb ref'], returncode=0, stdout='Success', stderr='')

result = runner.invoke(cli, [
'index', 'standard',
'--output_dir', 'test_output',
'--genome-fasta', 'test_genome.fa',
'--gtf', 'test_annotations.gtf'
'--output_dir', 'tests/data/expected_output',
'--genome-fasta', '/software/test_data/genome/IGVFFI7254HLWI_chr19.fasta.gz',
'--gtf', '/software/test_data/gtf/IGVFFI5842FWGQ_chr19.gtf.gz'
])

assert result.exit_code == 0
assert 'Creating standard kallisto index in test_output.' in result.output
assert 'Running command: kb ref -i test_output/index.idx -g test_output/t2g.txt -f1 test_output/transcriptome.fa test_genome.fa test_annotations.gtf' in result.output
assert 'Creating standard kallisto index in tests/data/expected_output.' in result.output
assert 'Running command: kb ref -i tests/data/expected_output/index.idx -g tests/data/expected_output/t2g.txt -f1 tests/data/expected_output/transcriptome.fa /software/test_data/genome/IGVFFI7254HLWI_chr19.fasta.gz /software/test_data/gtf/IGVFFI5842FWGQ_chr19.gtf.gz' in result.output
assert 'Success' in result.output


@patch('subprocess.run')
def test_index_nac(mock_subprocess_run, runner):
mock_subprocess_run.return_value = subprocess.CompletedProcess(args=['kb ref'], returncode=0, stdout='Success', stderr='')

result = runner.invoke(cli, [
'index', 'nac',
'--output_dir', 'test_output',
'--genome-fasta', 'test_genome.fa',
'--gtf', 'test_annotations.gtf'
'--output_dir', 'tests/data/expected_output',
'--genome-fasta', '/software/test_data/genome/IGVFFI7254HLWI_chr19.fasta.gz',
'--gtf', '/software/test_data/gtf/IGVFFI5842FWGQ_chr19.gtf.gz'
])

assert result.exit_code == 0
assert 'Creating nac kallisto index in test_output.' in result.output
assert 'Running command: kb ref --workflow=nac -i test_output/index.idx -g test_output/t2g.txt -c1 ~test_output/cdna.txt -c2 ~test_output/nascent.txt -f1 ~test_output/cdna.fasta -f2 ~test_output/nascent.fasta ~test_genome.fa test_annotations.gtf' in result.output
assert 'Creating nac kallisto index in tests/data/expected_output.' in result.output
assert 'Running command: kb ref --workflow=nac -i tests/data/expected_output/index.idx -g tests/data/expected_output/t2g.txt -c1 ~tests/data/expected_output/cdna.txt -c2 ~tests/data/expected_output/nascent.txt -f1 ~tests/data/expected_output/cdna.fasta -f2 ~tests/data/expected_output/nascent.fasta /software/test_data/genome/IGVFFI7254HLWI_chr19.fasta.gz /software/test_data/gtf/IGVFFI5842FWGQ_chr19.gtf.gz' in result.output
assert 'Success' in result.output


@patch('subprocess.run')
def test_quant_standard(mock_subprocess_run, runner):
mock_subprocess_run.return_value = subprocess.CompletedProcess(args=['kb count'], returncode=0, stdout='Success', stderr='')

result = runner.invoke(cli, [
'quant', 'standard',
'--index_dir', 'test_index',
'--read_format', 'test_format',
'--output_dir', 'test_output',
'--strand', 'test_strand',
'--threads', '4',
'--barcode_onlist', 'test_barcode_onlist',
'test_fastq1', 'test_fastq2'
])

assert result.exit_code == 0
assert 'Running command: kb count --workflow standard -i test_index -o test_output -x test_format -t 4 --barcode test_barcode_onlist --strand test_strand test_fastq1 test_fastq2' in result.output
assert 'Success' in result.output

@patch('subprocess.run')
def test_quant_nac(mock_subprocess_run, runner):
mock_subprocess_run.return_value = subprocess.CompletedProcess(args=['kb count'], returncode=0, stdout='Success', stderr='')

result = runner.invoke(cli, [
'quant', 'nac',
'--index_dir', 'test_index',
'--index_dir', 'tests/data/expected_output',
'--read_format', 'test_format',
'--output_dir', 'test_output',
'--output_dir', 'tests/data/expected_output',
'--strand', 'test_strand',
'--threads', '4',
'--barcode_onlist', 'test_barcode_onlist',
'test_fastq1', 'test_fastq2'
'--barcode_onlist', 'tests/data/test_barcode_onlist',
'tests/data/test_fastq1', 'tests/data/test_fastq2'
])

assert result.exit_code == 0
assert 'Running command: kb count --workflow nac -i test_index -o test_output -x test_format -t 4 --barcode test_barcode_onlist --strand test_strand test_fastq1 test_fastq2' in result.output
assert 'Running command: kb count --workflow standard -i tests/data/expected_output -o tests/data/expected_output -x test_format -t 4 --barcode tests/data/test_barcode_onlist --strand test_strand tests/data/test_fastq1 tests/data/test_fastq2' in result.output
assert 'Success' in result.output

0 comments on commit cdcabcc

Please sign in to comment.