nf-core · alanmmobbs93 · Jan 7, 2025 · Dec 10, 2024 · Dec 11, 2024 · Dec 11, 2024
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -25,6 +25,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
 - Add nf-test to local module: `STARFUSION_DETECT`. [#586](https://github.com/nf-core/rnafusion/pull/586)
 - Added a new module `CTATSPLICING_STARTOCANCERINTRONS` and a new parameter `--ctatsplicing`. This options creates reports on cancer splicing abberations and requires one or both of `--arriba` and `--starfusion` to be given. [#587](https://github.com/nf-core/rnafusion/pull/587)
 - Add parameter `--references_only` when no data should be analysed, but only the references should be built [#505](https://github.com/nf-core/rnafusion/pull/505)
+- Add nf-test to local subworkflow: `FUSIONCATCHER_WORKFLOW` [#591](https://github.com/nf-core/rnafusion/pull/591)
 - Add nf-test to local subworkflow: `STARFUSION_WORKFLOW`. [#597](https://github.com/nf-core/rnafusion/pull/597)
 
 ### Changed

diff --git a/modules/local/fusioncatcher/build/environment.yml b/modules/local/fusioncatcher/build/environment.yml
@@ -0,0 +1,5 @@
+channels:
+  - conda-forge
+  - bioconda
+dependencies:
+  - bioconda::fusioncatcher=1.33
diff --git a/modules/local/fusioncatcher/build/main.nf b/modules/local/fusioncatcher/build/main.nf
@@ -2,21 +2,25 @@ process FUSIONCATCHER_BUILD {
     tag "fusioncatcher_build"
     label 'process_medium'
 
-    container "docker.io/rannickscilifelab/fusioncatcher:1.34"
+    conda "${projectDir}/environment.yml"
+    container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
+        'https://community-cr-prod.seqera.io/docker/registry/v2/blobs/sha256/d5/d53f36e9e01d14a0ae8e15f8046f52b2883c970c27fe43fdfbd9440a55f5403f/data' :
+        'community.wave.seqera.io/library/fusioncatcher:1.33--4733482b637ef92f' }"
 
     input:
     val genome_gencode_version
 
     output:
-    path "human_v${genome_gencode_version}"  , emit: reference
-    path "versions.yml"                      , emit: versions
+    tuple env(meta), path("human_v${genome_gencode_version}"), emit: reference
+    path "versions.yml"                                      , emit: versions
 
     when:
     task.ext.when == null || task.ext.when
 
     script:
 
     def args = task.ext.args ?: ''
+    meta = [ id: "human_v${genome_gencode_version}" ]
     """
     fusioncatcher-build.py \\
         -g homo_sapiens \\
@@ -25,7 +29,7 @@ process FUSIONCATCHER_BUILD {
 
     cat <<-END_VERSIONS > versions.yml
     "${task.process}":
-        fusioncatcher: \$(echo \$(fusioncatcher --version 2>&1))
+        fusioncatcher: "\$(fusioncatcher --version 2>&1 | awk '{print \$2}')"
     END_VERSIONS
     """
 
@@ -35,7 +39,7 @@ process FUSIONCATCHER_BUILD {
     touch human_v${genome_gencode_version}/ensembl_fully_overlapping_genes.txt
     cat <<-END_VERSIONS > versions.yml
     "${task.process}":
-        fusioncatcher: \$(echo \$(fusioncatcher --version 2>&1))
+        fusioncatcher: "\$(fusioncatcher --version 2>&1 | awk '{print \$2}')"
     END_VERSIONS
     """
 }
diff --git a/modules/local/fusioncatcher/detect/environment.yml b/modules/local/fusioncatcher/detect/environment.yml
@@ -1,4 +1,5 @@
 channels:
+  - conda-forge
   - bioconda
 dependencies:
   - bioconda::fusioncatcher=1.33
diff --git a/modules/local/fusioncatcher/detect/main.nf b/modules/local/fusioncatcher/detect/main.nf
@@ -1,44 +1,46 @@
-process FUSIONCATCHER {
+process FUSIONCATCHER_DETECT {
     tag "$meta.id"
     label 'process_high'
 
-    container "docker.io/rannickscilifelab/fusioncatcher:1.34"
+    conda "${projectDir}/environment.yml"
+    container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
+        'https://community-cr-prod.seqera.io/docker/registry/v2/blobs/sha256/d5/d53f36e9e01d14a0ae8e15f8046f52b2883c970c27fe43fdfbd9440a55f5403f/data' :
+        'community.wave.seqera.io/library/fusioncatcher:1.33--4733482b637ef92f' }"
 
     input:
-    tuple val(meta), path(fasta)
-    path reference
+    tuple val(meta), path(fastqs, stageAs: "input/*")
+    tuple val(meta2), path(reference)
 
     output:
-    tuple val(meta), path("*.fusioncatcher.fusion-genes.txt")   , optional:true  , emit: fusions
-    tuple val(meta), path("*.fusioncatcher.summary.txt")        , optional:true  , emit: summary
-    tuple val(meta), path("*.fusioncatcher.log")                                 , emit: log
-    path "versions.yml"                                                          , emit: versions
+    tuple val(meta), path("*.fusioncatcher.fusion-genes.txt"), emit: fusions, optional: true
+    tuple val(meta), path("*.fusioncatcher.summary.txt")     , emit: summary, optional: true
+    tuple val(meta), path("*.fusioncatcher.log")             , emit: log
+    path "versions.yml"                                      , emit: versions
 
     when:
     task.ext.when == null || task.ext.when
 
     script:
-    def args = task.ext.args ?: ''
-    def prefix = task.ext.prefix ?: "${meta.id}"
-    def reads = fasta.toString().replace(" ", ",")
+    def args       = task.ext.args ?: ''
+    def prefix     = task.ext.prefix ?: "${meta.id}"
     def single_end = meta.single_end ? "--single-end" : ""
     """
     fusioncatcher.py \\
-        -d $reference \\
-        -i $reads \\
-        -p $task.cpus \\
+        -d ${reference} \\
+        -i input \\
+        -p ${task.cpus} \\
         -o . \\
         --skip-blat \\
-        $single_end \\
-        $args
+        ${single_end} \\
+        ${args}
 
     mv final-list_candidate-fusion-genes.txt ${prefix}.fusioncatcher.fusion-genes.txt
     mv summary_candidate_fusions.txt ${prefix}.fusioncatcher.summary.txt
     mv fusioncatcher.log ${prefix}.fusioncatcher.log
 
     cat <<-END_VERSIONS > versions.yml
     "${task.process}":
-        fusioncatcher: \$(echo \$(fusioncatcher.py --version 2>&1)| sed 's/fusioncatcher.py //')
+        fusioncatcher: "\$(fusioncatcher.py --version 2>&1 | awk '{print \$2}')"
     END_VERSIONS
     """
 
@@ -51,7 +53,7 @@ process FUSIONCATCHER {
     touch ${prefix}.fusioncatcher.log
     cat <<-END_VERSIONS > versions.yml
     "${task.process}":
-        fusioncatcher: \$(echo \$(fusioncatcher.py --version 2>&1)| sed 's/fusioncatcher.py //')
+        fusioncatcher: "\$(fusioncatcher.py --version 2>&1 | awk '{print \$2}')"
     END_VERSIONS
     """
 }
diff --git a/modules/local/fusioncatcher/download/environment.yml b/modules/local/fusioncatcher/download/environment.yml
@@ -1,4 +1,5 @@
 channels:
+  - conda-forge
   - bioconda
 dependencies:
   - bioconda::fusioncatcher=1.33
diff --git a/modules/local/fusioncatcher/download/main.nf b/modules/local/fusioncatcher/download/main.nf
@@ -2,26 +2,25 @@ process FUSIONCATCHER_DOWNLOAD {
     tag "fusioncatcher_download"
     label 'process_medium'
 
-    container "docker.io/rannickscilifelab/fusioncatcher:1.34"
-
+    conda "${projectDir}/environment.yml"
+    container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
+        'https://community-cr-prod.seqera.io/docker/registry/v2/blobs/sha256/d5/d53f36e9e01d14a0ae8e15f8046f52b2883c970c27fe43fdfbd9440a55f5403f/data' :
+        'community.wave.seqera.io/library/fusioncatcher:1.33--4733482b637ef92f' }"
 
     input:
     val genome_gencode_version
 
-
     output:
-    path "human_v${genome_gencode_version}"                , emit: reference
-    path "versions.yml"     , emit: versions
+    tuple env(meta), path("*"), emit: reference
+    path "versions.yml"       , emit: versions
 
     when:
     task.ext.when == null || task.ext.when
 
     script:
 
     def args = task.ext.args ?: ''
-        // TODO: move to S3
-
-    // def url =
+    meta = [ id: "human_v${genome_gencode_version}" ]
     """
     wget $args http://sourceforge.net/projects/fusioncatcher/files/data/human_${genome_gencode_version}.tar.gz.aa
     wget $args http://sourceforge.net/projects/fusioncatcher/files/data/human_${genome_gencode_version}.tar.gz.ab
@@ -32,7 +31,7 @@ process FUSIONCATCHER_DOWNLOAD {
 
     cat <<-END_VERSIONS > versions.yml
     "${task.process}":
-        fusioncatcher: \$(echo \$(fusioncatcher --version 2>&1))
+        fusioncatcher: "\$(fusioncatcher --version 2>&1 | awk '{print \$2}')"
     END_VERSIONS
     """
 
@@ -42,7 +41,7 @@ process FUSIONCATCHER_DOWNLOAD {
     touch human_v${genome_gencode_version}/ensembl_fully_overlapping_genes.txt
     cat <<-END_VERSIONS > versions.yml
     "${task.process}":
-        fusioncatcher: \$(echo \$(fusioncatcher --version 2>&1))
+        fusioncatcher: "\$(fusioncatcher --version 2>&1 | awk '{print \$2}')"
     END_VERSIONS
     """
 }
diff --git a/subworkflows/local/fusioncatcher_workflow.nf b/subworkflows/local/fusioncatcher_workflow.nf
diff --git a/subworkflows/local/fusioncatcher_workflow/main.nf b/subworkflows/local/fusioncatcher_workflow/main.nf
@@ -0,0 +1,45 @@
+include { FUSIONCATCHER_DETECT } from '../../../modules/local/fusioncatcher/detect/main'
+
+// TODO: Remove fusioncatcher_fusions as parameter.
+// TODO: remove dummy file. Work with Channel.empty()
+// TODO: if the files were already produced and the user want to skip the module because of this, they should be taken them from the sample sheet
+// TODO: harmonize `run_fusioncatcher` and `fusioncatcher_only` parameters at main workflow level to activate/skip this one.
+
+workflow FUSIONCATCHER_WORKFLOW {
+    take:
+        reads                   // channel [ meta, [ fastqs ] ]
+        fusioncatcher_ref       // channel [ meta, path       ]
+        run_fusioncatcher       // boolean
+        all                     // boolean
+        fusioninspector_only    // boolean
+        fusioncatcher_fusions   // path, string
+
+    main:
+        ch_versions   = Channel.empty()
+        ch_dummy_file = file("$baseDir/assets/dummy_file_fusioncatcher.txt", checkIfExists: true)
+
+        if (( run_fusioncatcher || all) && !fusioninspector_only ) {
+            if (fusioncatcher_fusions){
+
+                ch_fusioncatcher_fusions = reads.combine(Channel.value(file(fusioncatcher_fusions, checkIfExists:true)))
+                                            .map { meta, reads, fusions -> [ meta, fusions ] }
+            } else {
+
+                FUSIONCATCHER_DETECT (
+                    reads,
+                    fusioncatcher_ref
+                )
+                ch_fusioncatcher_fusions = FUSIONCATCHER_DETECT.out.fusions
+                ch_versions              = ch_versions.mix(FUSIONCATCHER_DETECT.out.versions)
+            }
+        }
+        else {
+            ch_fusioncatcher_fusions = reads.combine(Channel.value(file(ch_dummy_file, checkIfExists:true)))
+                                        .map { meta, reads, fusions -> [ meta, fusions ] }
+        }
+
+    emit:
+        fusions  = ch_fusioncatcher_fusions     // channel [ meta, fusions ]
+        versions = ch_versions                  // channel [ versions      ]
+    }
+
diff --git a/subworkflows/local/fusioncatcher_workflow/tests/main.nf.test b/subworkflows/local/fusioncatcher_workflow/tests/main.nf.test
@@ -0,0 +1,66 @@
+nextflow_workflow {
+
+    name "Test Subworkflow FUSIONCATCHER_WORKFLOW"
+    script "../main.nf"
+    workflow "FUSIONCATCHER_WORKFLOW"
+    tag "subworkflow"
+    tag "fusioncatcher"
+    tag "fusioncatcher/build"
+    tag "fusioncatcher/detect"
+
+    // Test
+    test("FUSIONCATCHER_WORKFLOW - Homo sapiens - FASTQs chr4") {
+
+        setup {
+            // Download reference files for fusioncatch
+            run("FUSIONCATCHER_BUILD") {
+                script "../../../../modules/local/fusioncatcher/build/main.nf"
+                process {
+                """
+                input[0] = Channel.value('46')
+                """
+                }
+            }
+        }
+
+        // TODO: get smaller reference files for fusioncatcher
+        when {
+            workflow {
+                """
+                // ch_reads
+                input[0] = Channel.of(
+                    [
+                        [ id: "test_fastqs", single_end: false ],
+                        [
+                            file("https://raw.githubusercontent.com/nf-core/test-datasets/refs/heads/rnafusion/testdata/human/reads_1.fq.gz", checkIfExists: true),
+                            file("https://raw.githubusercontent.com/nf-core/test-datasets/refs/heads/rnafusion/testdata/human/reads_2.fq.gz", checkIfExists: true)
+                        ]
+                    ] )
+
+                // ch_references
+                input[1] = FUSIONCATCHER_BUILD.out.reference
+
+                // fusioncatcher (boolean)
+                input[2] = true
+
+                // all (boolean)
+                input[3] = true
+
+                // fusioninspector_only (boolean)
+                input[4] = false
+
+                // fusioncatcher_fusions (string path)
+                input[5] = null
+                """
+            }
+        }
+
+        then {
+            assertAll(
+                { assert workflow.success },
+                { assert snapshot(workflow.out).match() }
+            )
+        }
+    }
+
+}
diff --git a/subworkflows/local/fusioncatcher_workflow/tests/main.nf.test.snap b/subworkflows/local/fusioncatcher_workflow/tests/main.nf.test.snap
@@ -0,0 +1,37 @@
+{
+    "FUSIONCATCHER_WORKFLOW - Homo sapiens - FASTQs chr4": {
+        "content": [
+            {
+                "0": [
+                    [
+                        {
+                            "id": "test_fastqs",
+                            "single_end": false
+                        },
+                        "test_fastqs.fusioncatcher.fusion-genes.txt:md5,c826a24c49abfcec8164c478e1e74892"
+                    ]
+                ],
+                "1": [
+                    "versions.yml:md5,05bd93a243728a293211ce52e5f97282"
+                ],
+                "fusions": [
+                    [
+                        {
+                            "id": "test_fastqs",
+                            "single_end": false
+                        },
+                        "test_fastqs.fusioncatcher.fusion-genes.txt:md5,c826a24c49abfcec8164c478e1e74892"
+                    ]
+                ],
+                "versions": [
+                    "versions.yml:md5,05bd93a243728a293211ce52e5f97282"
+                ]
+            }
+        ],
+        "meta": {
+            "nf-test": "0.9.2",
+            "nextflow": "24.10.3"
+        },
+        "timestamp": "2025-01-03T19:29:54.767628"
+    }
+}
diff --git a/tests/nextflow.config b/tests/nextflow.config
@@ -3,6 +3,6 @@ process {
     resourceLimits = [
         cpus: 4,
         memory: '15.GB',
-        time: '1.h'
+        time: '4.h'
     ]
 }