diff --git a/.github/CONTRIBUTING.md b/.github/CONTRIBUTING.md index b10a5a24..26829c55 100644 --- a/.github/CONTRIBUTING.md +++ b/.github/CONTRIBUTING.md @@ -26,7 +26,7 @@ If you're not used to this workflow with git, you can start with some [docs from ## Tests -You have the option to test your changes locally by running the pipeline. For receiving warnings about process selectors and other `debug` information, it is recommended to use the debug profile. Execute all the tests with the following command: +You have the option to test your changes locally by running the pipeline. For receiving warnings about process selectors and other `debug` information, it is recommended to use the debug profile. Execute all the tests with the following command, BUT DON'T FORGET TO ADD THE PARAMETERS cosmic_username AND cosmic_passwd in tests/main.nf.test. ```bash nf-test test --profile debug,test,docker --verbose diff --git a/.github/workflows/awsfulltest.yml b/.github/workflows/awsfulltest.yml index 2281992a..4534f339 100644 --- a/.github/workflows/awsfulltest.yml +++ b/.github/workflows/awsfulltest.yml @@ -45,9 +45,6 @@ jobs: - name: Launch workflow via Seqera Platform uses: seqeralabs/action-tower-launch@v2 - # TODO nf-core: You can customise AWS full pipeline tests as required - # Add full size test data (but still relatively small datasets for few samples) - # on the `test_full.config` test runs with only one set of parameters with: workspace_id: ${{ secrets.TOWER_WORKSPACE_ID }} access_token: ${{ secrets.TOWER_ACCESS_TOKEN }} @@ -57,10 +54,13 @@ jobs: parameters: | { "hook_url": "${{ secrets.MEGATESTS_ALERTS_SLACK_HOOK_URL }}", - "outdir": "s3://${{ secrets.AWS_S3_BUCKET }}/rnafusion/results-${{ github.sha }}" + "outdir": "s3://${{ secrets.AWS_S3_BUCKET }}/rnafusion/results-${{ github.sha }}", + "genomes_base": "s3://${{ secrets.AWS_S3_BUCKET }}/rnafusion/results-${{ github.sha }}/references", + "cosmic_username": "${{ secrets.cosmic_username }}", + "cosmic_passwd": "${{ secrets.cosmic_passwd }}", + "all": true, } - profiles: test_full - + profiles: test_full,aws_tower - uses: actions/upload-artifact@v4 with: name: Seqera Platform debug log file diff --git a/.github/workflows/awstest.yml b/.github/workflows/awstest.yml index d3448f1b..977adb49 100644 --- a/.github/workflows/awstest.yml +++ b/.github/workflows/awstest.yml @@ -21,13 +21,17 @@ jobs: workdir: s3://${{ secrets.AWS_S3_BUCKET }}/work/rnafusion/work-${{ github.sha }} parameters: | { - "outdir": "s3://${{ secrets.AWS_S3_BUCKET }}/rnafusion/results-test-${{ github.sha }}" + "outdir": "s3://${{ secrets.AWS_S3_BUCKET }}/rnafusion/results-${{ github.sha }}", + "genomes_base": "s3://${{ secrets.AWS_S3_BUCKET }}/rnafusion/results-${{ github.sha }}/references", + "cosmic_username": "${{ secrets.cosmic_username }}", + "cosmic_passwd": "${{ secrets.cosmic_passwd }}", + "all": true, + "stub": true } - profiles: test - + profiles: test,aws_tower - uses: actions/upload-artifact@v4 with: - name: Seqera Platform debug log file + name: Tower debug log file path: | - seqera_platform_action_*.log - seqera_platform_action_*.json + tower_action_*.log + tower_action_*.json diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 78fb3b57..0dcb8fdb 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -5,11 +5,18 @@ on: branches: - dev pull_request: + branches: + - dev + - master release: types: [published] workflow_dispatch: env: + NFT_DIFF: "pdiff" + NFT_DIFF_ARGS: "--line-numbers --expand-tabs=2" + NFT_VER: "0.9.2" + NFT_WORKDIR: "~" NXF_ANSI_LOG: false NXF_SINGULARITY_CACHEDIR: ${{ github.workspace }}/.singularity NXF_SINGULARITY_LIBRARYDIR: ${{ github.workspace }}/.singularity @@ -20,29 +27,31 @@ concurrency: jobs: test: - name: "Run pipeline with test data (${{ matrix.NXF_VER }} | ${{ matrix.test_name }} | ${{ matrix.profile }})" + name: "${{ matrix.NXF_VER }} | ${{ matrix.test_profile }} | ${{ matrix.compute_profile }}" # Only run on push if this is the nf-core dev branch (merged PRs) if: "${{ github.event_name != 'push' || (github.event_name == 'push' && github.repository == 'nf-core/rnafusion') }}" runs-on: ubuntu-latest strategy: + fail-fast: false matrix: NXF_VER: - "24.04.2" - - "latest-everything" - profile: - - "conda" + - "latest-stable" + test_profile: + - "test_stub" + - "test_build" + compute_profile: - "docker" - "singularity" - test_name: - - "test" + - "conda" isMaster: - ${{ github.base_ref == 'master' }} # Exclude conda and singularity on dev exclude: - isMaster: false - profile: "conda" + compute_profile: "conda" - isMaster: false - profile: "singularity" + compute_profile: "singularity" steps: - name: Check out pipeline code uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4 @@ -55,17 +64,17 @@ jobs: version: "${{ matrix.NXF_VER }}" - name: Set up Apptainer - if: matrix.profile == 'singularity' + if: matrix.compute_profile == 'singularity' uses: eWaterCycle/setup-apptainer@main - name: Set up Singularity - if: matrix.profile == 'singularity' + if: matrix.compute_profile == 'singularity' run: | mkdir -p $NXF_SINGULARITY_CACHEDIR mkdir -p $NXF_SINGULARITY_LIBRARYDIR - name: Set up Miniconda - if: matrix.profile == 'conda' + if: matrix.compute_profile == 'conda' uses: conda-incubator/setup-miniconda@a4260408e20b96e80095f42ff7f1a15b27dd94ca # v3 with: miniconda-version: "latest" @@ -74,7 +83,7 @@ jobs: channels: conda-forge,bioconda - name: Set up Conda - if: matrix.profile == 'conda' + if: matrix.compute_profile == 'conda' run: | echo $(realpath $CONDA)/condabin >> $GITHUB_PATH echo $(realpath python) >> $GITHUB_PATH @@ -82,6 +91,32 @@ jobs: - name: Clean up Disk space uses: jlumbroso/free-disk-space@54081f138730dfa15788a46383842cd2f914a1be # v1.3.1 - - name: "Run pipeline with test data ${{ matrix.NXF_VER }} | ${{ matrix.test_name }} | ${{ matrix.profile }}" + - uses: actions/setup-python@v4 + with: + python-version: "3.11" + architecture: "x64" + + - name: Install pdiff to see diff between nf-test snapshots run: | - nextflow run ${GITHUB_WORKSPACE} -profile ${{ matrix.test_name }},${{ matrix.profile }} --outdir ./results + python -m pip install --upgrade pip + pip install pdiff + + - uses: nf-core/setup-nf-test@v1 + with: + version: ${{ env.NFT_VER }} + + - name: Run Tests (${{matrix.NXF_VER}} | ${{matrix.test_profile}} | ${{matrix.compute_profile}}) + run: | + nf-test test \ + --ci \ + --tag ${{matrix.test_profile}} \ + --profile "+${{ matrix.compute_profile }}" \ + --junitxml=test.xml \ + --debug --verbose + + - name: Publish Test Report + uses: mikepenz/action-junit-report@v3 + if: always() # always run even if the previous step fails + with: + report_paths: test.xml + annotate_only: true diff --git a/.gitignore b/.gitignore index a42ce016..23b0c7de 100644 --- a/.gitignore +++ b/.gitignore @@ -7,3 +7,4 @@ testing/ testing* *.pyc null/ +.nf-test* diff --git a/CHANGELOG.md b/CHANGELOG.md index 9b6d5e6a..cdc774e1 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -5,6 +5,535 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ## v4.0.0dev - [date] +### Added + +- Normalized gene expression calculated [#488](https://github.com/nf-core/rnafusion/pull/488) +- Primary assembly now used as main reference genome FASTA file, as recommended by the STAR manual [#488](https://github.com/nf-core/rnafusion/pull/488) +- Use of only ensembl GTF file, not chr.gtf file as GTF reference file [#488](https://github.com/nf-core/rnafusion/pull/488) +- Add nf-test to local module: `ENSEMBL_DOWNLOAD` [#539](https://github.com/nf-core/rnafusion/pull/539) +- Add nf-test to local module: `HGNC_DOWNLOAD` [#540](https://github.com/nf-core/rnafusion/pull/540) +- Add nf-test to local subworkflow: `STRINGTIE_WORKFLOW` [#541](https://github.com/nf-core/rnafusion/pull/541) +- Option to avoid using COSMIC (for example in the case of clinical use) [#547](https://github.com/nf-core/rnafusion/pull/547) +- Add nf-test to nf-core module: `PICARD_COLLECTRNASEQMETRICS` and update module [#551](https://github.com/nf-core/rnafusion/pull/551) +- Add `--skip_vcf` boolean parameter to skip vcf file generation [#554](https://github.com/nf-core/rnafusion/pull/554) +- Add nf-test to local module: `FUSIONREPORT_DOWNLOAD` [#560](https://github.com/nf-core/rnafusion/pull/560) +- Add nf-test to local subworkflow: `QC_WORKFLOW` [#568](https://github.com/nf-core/rnafusion/pull/568) +- Add nf-test to local subworkflow: `TRIM_WORKFLOW` [#572](https://github.com/nf-core/rnafusion/pull/572) +- Add nf-test to local module: `FUSIONREPORT_DETECT`. Improve `FUSIONREPORT_DOWNLOAD` module [#577](https://github.com/nf-core/rnafusion/pull/577) +- Add nf-test to local subworkflow: `ARRIBA_WORKFLOW` [#578](https://github.com/nf-core/rnafusion/pull/578) +- Add nf-test to local module: `STARFUSION_BUILD`. [#585](https://github.com/nf-core/rnafusion/pull/585) +- Add nf-test to local module: `STARFUSION_DETECT`. [#586](https://github.com/nf-core/rnafusion/pull/586) +- Added a new module `CTATSPLICING_STARTOCANCERINTRONS` and a new parameter `--ctatsplicing`. This options creates reports on cancer splicing abberations and requires one or both of `--arriba` and `--starfusion` to be given. [#587](https://github.com/nf-core/rnafusion/pull/587) +- Add parameter `--references_only` when no data should be analysed, but only the references should be built [#505](https://github.com/nf-core/rnafusion/pull/505) +- Add nf-test to local subworkflow: `FUSIONCATCHER_WORKFLOW` [#591](https://github.com/nf-core/rnafusion/pull/591) +- Add nf-test to local subworkflow: `STARFUSION_WORKFLOW`. [#597](https://github.com/nf-core/rnafusion/pull/597) +- Add nf-test to local module: `FUSIONINSPECTOR`. [#601](https://github.com/nf-core/rnafusion/pull/601) +- Add nf-test to local subworkflow: `FUSIONREPORT_WORKFLOW`. [#607](https://github.com/nf-core/rnafusion/pull/607) + +### Changed + +- Updated modules and migrated non-specific modules to nf-core/modules [#484](https://github.com/nf-core/rnafusion/pull/484) +- Updated to nf-core/tools 3.0.2 [#504](https://github.com/nf-core/rnafusion/pull/504) +- Remove local module `RRNA_TRANSCRIPTS` (replaced by nf-core module) [#541](https://github.com/nf-core/rnafusion/pull/541) +- Allow fastq files without a dot before .fn(.gz)/.fastq(.gz) files [#548](https://github.com/nf-core/rnafusion/pull/548) +- Remove double nested folder introduced in [#577](https://github.com/nf-core/rnafusion/pull/577), [#581](https://github.com/nf-core/rnafusion/pull/581) +- Use docker.io and galaxy containers for fusioncatcher and starfusion (incl. fusioninspector) instead of wave as they are not functional on wave [#588](https://github.com/nf-core/rnafusion/pull/588) +- Update STAR-Fusion to 1.14 [#588](https://github.com/nf-core/rnafusion/pull/588) +- Use "-genePredExt -geneNameAsName2 -ignoreGroupsWithoutExons" (to mimic gms/tomte) for GTF_TO_REFFLAT [#505](https://github.com/nf-core/rnafusion/pull/505) +- Integrate reference building in the main workflow [#505](https://github.com/nf-core/rnafusion/pull/505) +- Move from ensembl to gencode base [#505](https://github.com/nf-core/rnafusion/pull/505) +- Update from ensembl 102 to gencode 46 default references [#505](https://github.com/nf-core/rnafusion/pull/505) +- Update`FUSIONINSPECTOR` to v2.10.0. [#601](https://github.com/nf-core/rnafusion/pull/601) +- Remove local module `STARFUSION_DOWNLOAD` [#598](https://github.com/nf-core/rnafusion/pull/598) +- Fix error message when parameter outdir is missing [#611](https://github.com/nf-core/rnafusion/pull/611) + +### Fixed + +- Fixed some Nextflow run-commands in the docs [#491](https://github.com/nf-core/rnafusion/pull/491) +- Fixed bug when trying to build indices behind a proxy and wget was unable to download arriba indices [#495](https://github.com/nf-core/rnafusion/issues/495) +- Fixed bug in `FUSIONREPORT_DOWNLOAD` when building references with `--no_cosmic parameter` [#555](https://github.com/nf-core/rnafusion/issues/555) +- Refactor structure in `FUSIONREPORT_DOWNLOAD` to use cosmic credentials in `ext.args` [#556](https://github.com/nf-core/rnafusion/issues/556) +- Fixed bug in nf-core `RRNATRANSCRIPTS` module [#563](https://github.com/nf-core/rnafusion/issues/563) +- Fixed bug in `GFFREAD` that caused output `gffread_fasta` not being produced [#565](https://github.com/nf-core/rnafusion/issues/565) +- Fixed bug in `FUSIONCATCHER_DOWNLOAD` that caused an error when running with singularity profile [#573](https://github.com/nf-core/rnafusion/issues/573) +- Fixed missing script `gtf2bed` which caused local module `GET_RRNA_TRANSCRIPTS` to fail [#602](https://github.com/nf-core/rnafusion/issues/602) + +### Removed + +- Remove fusionGDB from documentation and fusion-report download stubs [#503](https://github.com/nf-core/rnafusion/pull/503) +- Removed test-build as reference building gets integrated in the main workflow [#505](https://github.com/nf-core/rnafusion/pull/505) +- Removed parameter `--build_references` + +### Parameters + +| Old parameter | New parameter | +| -------------------- | ------------------- | +| | `--no_cosmic` | +| `--build_references` | `--references_only` | + +## v3.0.2 - [2024-04-10] + +### Added + +### Changed + +- Update to nf-tools 2.11.1 [#457] (https://github.com/nf-core/rnafusion/pull/457) +- Update picard collectrnaseqmetrics memory requirements to 0.8x what is provided [#474](https://github.com/nf-core/rnafusion/pull/474) + +### Fixed + +- Fix bug when using parameter "whitelist" [#466](https://github.com/nf-core/rnafusion/pull/466) +- Fix VCF_COLLECT handling when a tool is absent from FUSIONREPORT report [#458](https://github.com/nf-core/rnafusion/pull/458) +- Fix VCF_COLLECT when fusioninspector output is empty but fusionreport is not [#465](https://github.com/nf-core/rnafusion/pull/465) +- Fix VCF_COLLECT bug [#481](https://github.com/nf-core/rnafusion/pull/481) +- Fix conda package for starfusion/detect[#482](https://github.com/nf-core/rnafusion/pull/482) +- Fix logical gate so when stringtie should run but not starfusion, starfusion will not run[#482](https://github.com/nf-core/rnafusion/pull/482) + +### Removed + +## v3.0.1 - [2023-11-29] + +### Added + +### Changed + +- Python3 explicit in vcf_collect [#452](https://github.com/nf-core/rnafusion/pull/452) + +### Fixed + +- software-version.yml and in general version track-keeping was incomplete [#451](https://github.com/nf-core/rnafusion/pull/451) + +### Removed + +## v3.0.0 - [2023-11-27] + +### Added + +- Add picard CollectInsertSizeMetrics to QC workflow [#408](https://github.com/nf-core/rnafusion/pull/408) +- Build CRAM index in the same directory as CRAM files for Arriba and STAR-Fusion [#427](https://github.com/nf-core/rnafusion/pull/427) + +### Changed + +- Replace PICARD_MARKDUPLICATES with GATK4_MARKDUPLICATES [#409](https://github.com/nf-core/rnafusion/pull/409) +- Removed `--fusioninspector_filter` and `--fusionreport_filter` in favor of `--tools_cutoff` (default = 1, no filters applied) [#389](https://github.com/nf-core/rnafusion/pull/389) +- Now publishing convert2bed output to convert2bed to keep the output file [#420](https://github.com/nf-core/rnafusion/pull/420) +- No more checks for existence of samplesheet, which made building references fail (building references uses a fake sample sheet if none is provided) [#420](https://github.com/nf-core/rnafusion/pull/420) +- `--annotate --examine_coding_effect` to collect more data from fusioninspector [#426](https://github.com/nf-core/rnafusion/pull/426) +- Update vcf creation to get positions/chromosomes and strands even when fusions are filtered out by fusioninspector, using the csv output from fusion-report [#443](https://github.com/nf-core/rnafusion/pull/443) +- `Arriba` updated to 2.4.0 [#429](https://github.com/nf-core/rnafusion/pull/429) +- Change megafusion into vcf_collect, taking into account e.g. the annotation and coding effects outputs from fusioninspector, HGNC ids, frame status... [#414](https://github.com/nf-core/rnafusion/pull/414) +- CI tests on `--all` instead of each tool separately, and include trimmed/not trimmed matrix tests [#430](https://github.com/nf-core/rnafusion/pull/430) +- AWS tests on `--all` instead of each tool separately, and include trimmed/not trimmed matrix tests [#433](https://github.com/nf-core/rnafusion/pull/433) +- Update `fusion-report` to 2.1.8, updated COSMIC database to fix 404 error, fix download of references via proxy and removing FusionGDB database [#445](https://github.com/nf-core/rnafusion/pull/445) +- Update documentation [#446](https://github.com/nf-core/rnafusion/pull/446) + +### Fixed + +- Fix channel i/o issue in StringTie workflow and add StringTie in github CI tests [#416](https://github.com/nf-core/rnafusion/pull/416) +- Update modules, and make sure MultiQC displays the QC results properly [#440](https://github.com/nf-core/rnafusion/pull/440) +- Add 'when' condition to run CollectInsertSizeMetrics only when STAR-fusion bam files are available [#444](https://github.com/nf-core/rnafusion/pull/444) + +### Removed + +- Remove `squid` and `pizzly` fusion detection tools [#406](https://github.com/nf-core/rnafusion/pull/406) +- Remove harsh trimming option `--trim` [#413](https://github.com/nf-core/rnafusion/pull/413) +- Remove qualimap rna_seq [#407](https://github.com/nf-core/rnafusion/pull/407) + +## v2.4.0 - [2023/09/22] + +### Added + +### Changed + +- Use institutional configs by default [#381](https://github.com/nf-core/rnafusion/pull/381) +- Remove redundant indexing in starfusion and qc workflows [#387](https://github.com/nf-core/rnafusion/pull/387) +- Output bai files in same directory as bam files [#387](https://github.com/nf-core/rnafusion/pull/387) +- Update and review documentation [#396](https://github.com/nf-core/rnafusion/pull/396) +- Update picard container for `PICARD_COLLECTRNASEQMETRICS` to 3.0.0 [#395](https://github.com/nf-core/rnafusion/pull/395) +- Renamed output files [#395](https://github.com/nf-core/rnafusion/pull/395) + - `Arriba` visualisation pdf from meta.id to meta.id_combined_fusions_arriba_visualisation + - cram file from output bam of `STAR_FOR_ARRIBA`: meta.id to meta.id_star_for_arriba + - cram file from output bam of `STAR_FOR_STARFUSION`: meta.id to meta.id.star_for_starfusion.Aligned.sortedByCoord.out + - `fusion-report` index.html file to meta.id_fusionreport_index.html + - meta.id.vcf output from `MEGAFUSION` to meta.id_fusion_data.vcf + - Update metro map [#428](https://github.com/nf-core/rnafusion/pull/428) + +### Fixed + +- Tail trimming for reverse reads [#379](https://github.com/nf-core/rnafusion/pull/379) +- Set html files as optional in fusionreport [#380](https://github.com/nf-core/rnafusion/pull/380) +- Provide gene count file by default when running STAR_FOR_STARFUSION [#385](https://github.com/nf-core/rnafusion/pull/385) +- Fix fusion-report issue with MACOXS directories [#386](https://github.com/nf-core/rnafusion/pull/386) +- The fusion lists is updated to contain two branches, one in case no fusions are detected and one for if fusions are detected, that will be used to feed to fusioninspector, megafusion, arriba visualisation [#388](https://github.com/nf-core/rnafusion/pull/388) +- Update fusionreport to 2.1.5p4 to fix 403 error in downloading databases [#403](https://github.com/nf-core/rnafusion/pull/403) + +### Removed + +- `samtools sort` and `samtools index` for `arriba` workflow were dispensable and were removed [#395](https://github.com/nf-core/rnafusion/pull/395) +- Removed trimmed fastqc report from multiqc [#394](https://github.com/nf-core/rnafusion/pull/394) + +## v2.3.0 - [2023/04/24] + +### Added + +- Shell specification to bash +- COSMIC password put into quotes +- Trimmed reads QC in MultiQC +- Add `ARRIBA_VISUALISATION` to processed affected by `--skip_vis` +- Option `fusionreport_filter` to in/activate fusionreport displaying of fusions detected by 2 or more tools + +### Changed + +- `Arriba` visualisation now runs for FusionInspector (combined tools) results, not only `Arriba` results +- Updated metro map with trimming options and placed `Arriba` visualisation after `FusionInspector` +- Exit with error when using squid in combination with any ensembl version different from 102 + +### Fixed + +- Channel issue with indexing of files with using `--cram squid` +- `Arriba` references published in the correct folder + +### Removed + +## v2.2.0 - [2023/03/13] + +### Added + +- exitStatus 140 now part of the retry strategy +- stubs to all local modules +- `--stringtie` option added with StringTie v2.2.1 to detect splicing events. Not included in `fusion-report` or `fusionInspector` summaries. Included in the `--all` workflow +- Generation of ribosomal RNA interval list with build_references and use it in picard CollectRnaMetrics +- Add csv output to fusionreport +- Trimming workflow using `fastp`: use trimmed reads for all tools +- `whitelist` parameter to add custom fusions to the detected ones and consider the whole for the `fusionInspector` analysis +- Compression to CRAM files for arriba, squid and starfusion workflows (fusioncatcher and pizzly do not produce SAM/BAM files, fusioninspector BAM files are too small to benefit from compression) +- `--qiagen` option to download from QIAGEN instead of COSMIC (use QIAGEN user and password for `cosmic_username` and `cosmic_passwd`) +- Bumped `STAR genomegenerate` time request for building as it was always crashing for most users +- Fixed issue with arriba visualisation parameters [#326](https://github.com/nf-core/rnafusion/issues/326) + +### Changed + +- Test profiles unified under 'test' but if the references do not all need to be downloaded, run with `-stub` +- Update CUSTOM_DUMPSOFTWAREVERSIONS to use multiqc version 1.13 +- Updated to nf-core template 2.7.2, with all module updates +- `MultiQC` updated to 1.13a in process dumpsoftwareversion +- Patch fusion-report version with fixed mittelman DB and DB extraction date written into software_version.yaml +- `Arriba` references back to downloading with `build_references` instead of taking from container +- `Arriba` visualisation now running with `Arriba` v2.3.0 +- Updated `STAR-Fusion` to 1.12.0 + +### Fixed + +- AWS megatest to display on nf-core website +- `arriba` visualisation references updated to 2.3.0 +- Removed issue with multiple outputs in samtools view for squid + +### Removed + +- FUSIONINSPECTOR_DEV process as the option fusioninspector_limitSjdbInsertNsj is part of the main starfusion release + +## v2.1.0 - [2022/07/12] + +### Added + +- `FusionCatcher` single_end support for single reads ABOVE 130 bp +- `--fusioninspector_only` parameter to run FusionInspector standalone feeding gene list manually with parameter `--fusioninspector_fusions PATH` +- `--fusioncatcher_limitSjdbInsertNsj` parameter to feed --limitSjdbInsertNsj to FusionCatcher +- `--fusioninspector_limitSjdbInsertNsj` parameter to feed --limitSjdbInsertNsj to FusionInspector !!Any other value than default will use the dev version of FusionInspector!! +- OPTIONAL trimming option `--trim` for hard trimming to 75 bp in case of high read-through. Only fusioncatcher uses trimmed reads as STAR-based fusion detection tools are less sensitive to read-through +- `picard` metrics, STAR final log, and QualiMap output included in `MultiQC` report + +### Changed + +- `seq_platform` and `seq_center` changed from boolean to string +- `seq_platform` set to an empty string and `seq_center` set to an empty string if not existing +- Arriba use ensembl references-built starindex independently of `starfusion_build` parameter +- ftp to http protocol for STARFUSION_BUILD process `Pfam-A.hmm.gz` download as ftp causes issues on some servers +- Updated README and usage documentation with more detailed information and metro map +- Arriba use ensembl references-built starindex independently of starfusion_build parameter +- Update of the single-end reads support table in README, added recommendation to use single-end reads only in last resort +- STAR updated to 2.7.10a +- Arriba updated to 2.3.0, references for blacklist and protein domains changed to 2.3.0 from singularity/docker container -> arriba download of references not necessary any more +- multiQC updated to 1.13a +- picard updated to 2.27.4 +- dumpsoftwareversions module updated to use multiqc=1.12 containers + +### Fixed + +- FusionInspector does not mix sample reads with fusion lists and meta information from other samples anymore +- Arriba visualisation does not mix sample reads with fusion lists and meta information from other samples anymore +- logging of STAR-fusion and fusionreport version + +### Removed + +## v2.0.0 - [2022/05/19] + +Update to DSL2 and newer software/reference versions + +### Added + +- Added `qualimap/rnaseq v2.2.2d` from nf-core modules +- Added UCSC `gtfToGenePred v377` +- Added `picard CollectRnaSeqMetrics v2.26.10` +- Added `picard MarkDuplicates v2.26.10` from nf-core modules +- Added `cat/fastqc` from nf-core modules +- Added possibility for manually feeding the results of fusions from different tools to speed-up reruns +- STAR-Fusion references can be downloaded or built but downloaded references are NOT RECOMMENDED as not thoroughly tested (--starfusion_build parameter is true by default, use --starfusion_build false to use downloaded STAR-Fusion references). + +### Changed + +- Upgrade default ensembl version to `102` +- Upgrade to `nf-core/tools v2.3.2` +- Upgrade `Arriba v1.2.0` to `Arriba v2.2.1` +- Upgrade `FusionCatcher v1.20` to `FusionCatcher v1.33` +- Upgrade `STAR-fusion v1.8.1` to `STAR-fusion v1.10.1` +- Upgrade `STAR v2.7.1` to `STAR v2.7.9` +- Upgrade `fusion-report v2.1.3` to `fusion-report v2.1.5` +- Upgrade `kallisto v0.44.0` to `kallisto v0.46.2` +- Upgrade `fastqc v0.11.8` to `fastqc v0.11.9` +- Upgrade `samtools v1.9` to `samtools v1.15.1` +- Upgrade `arriba` references from `v1.2.0` to `v2.1.0` +- Upgrade `fusioncatcher` references from `v98` to `v102` +- Use `arriba` (detect only), `kallisto` and `STAR` from nf-core modules +- Instead of separate script to build the references, added `--build_references` argument in the main +- `--fasta` argument is not required with `--build_references` and set by default to the ensembl references built in the detection workflow +- CI test done on stubs of reference building for subprocesses ensembl and arriba + +Parameters for `STAR` for `arriba` changed from: + +```bash +--readFilesCommand zcat \\ + --outSAMtype BAM Unsorted \\ +--outStd BAM_Unsorted \\ +--outSAMunmapped Within \\ +--outBAMcompression 0 \\ +--outFilterMultimapNmax 1 \\ +--outFilterMismatchNmax 3 \\ +--chimSegmentMin 10 \\ +--chimOutType WithinBAM SoftClip \\ +--chimJunctionOverhangMin 10 \\ +--chimScoreMin 1 \\ +--chimScoreDropMax 30 \\ +--chimScoreJunctionNonGTAG 0 \\ +--chimScoreSeparation 1 \\ +--alignSJstitchMismatchNmax 5 -1 5 5 \\ +--chimSegmentReadGapMax 3 \\ +--sjdbOverhang ${params.read_length - 1} +``` + +to + +```bash +--readFilesCommand zcat \ +--outSAMtype BAM Unsorted \ +--outSAMunmapped Within \ +--outBAMcompression 0 \ +--outFilterMultimapNmax 50 \ +--peOverlapNbasesMin 10 \ +--alignSplicedMateMapLminOverLmate 0.5 \ +--alignSJstitchMismatchNmax 5 -1 5 5 \ +--chimSegmentMin 10 \ +--chimOutType WithinBAM HardClip \ +--chimJunctionOverhangMin 10 \ +--chimScoreDropMax 30 \ +--chimScoreJunctionNonGTAG 0 \ +--chimScoreSeparation 1 \ +--chimSegmentReadGapMax 3 \ +--chimMultimapNmax 50 +``` + +As recommended [here](https://arriba.readthedocs.io/en/latest/workflow/). + +Parameters for `STAR` for `STAR-fusion` changed from: + +```bash +--twopassMode Basic \\ +--outReadsUnmapped None \\ +--chimSegmentMin 12 \\ +--chimJunctionOverhangMin 12 \\ +--alignSJDBoverhangMin 10 \\ +--alignMatesGapMax 100000 \\ +--alignIntronMax 100000 \\ +--chimSegmentReadGapMax 3 \\ +--alignSJstitchMismatchNmax 5 -1 5 5 \\ +--runThreadN ${task.cpus} \\ +--outSAMstrandField intronMotif ${avail_mem} \\ +--outSAMunmapped Within \\ +--outSAMtype BAM Unsorted \\ +--outSAMattrRGline ID:GRPundef \\ +--chimMultimapScoreRange 10 \\ +--chimMultimapNmax 10 \\ +--chimNonchimScoreDropMin 10 \\ +--peOverlapNbasesMin 12 \\ +--peOverlapMMp 0.1 \\ +--readFilesCommand zcat \\ +--sjdbOverhang ${params.read_length - 1} \\ +--chimOutJunctionFormat 1 +``` + +to + +```bash +--outReadsUnmapped None \ +--readFilesCommand zcat \ +--outSAMtype BAM SortedByCoordinate \ +--outSAMstrandField intronMotif \ +--outSAMunmapped Within \ +--chimSegmentMin 12 \ +--chimJunctionOverhangMin 8 \ +--chimOutJunctionFormat 1 \ +--alignSJDBoverhangMin 10 \ +--alignMatesGapMax 100000 \ +--alignIntronMax 100000 \ +--alignSJstitchMismatchNmax 5 -1 5 5 \ +--chimMultimapScoreRange 3 \ +--chimScoreJunctionNonGTAG -4 \ +--chimMultimapNmax 20 \ +--chimNonchimScoreDropMin 10 \ +--peOverlapNbasesMin 12 \ +--peOverlapMMp 0.1 \ +--alignInsertionFlush Right \ +--alignSplicedMateMapLminOverLmate 0 \ +--alignSplicedMateMapLmin 30 \ +--chimOutType Junctions +``` + +`Homo_sapiens.${params.genome}.${ensembl_version}.gtf.gz` used for squid and arriba, `Homo_sapiens.${params.genome}.${ensembl_version}.chr.gtf.gz` used for STAR-fusion and the quality control as the quality control is based on the STAR-fusion alignment. + +### Fixed + +### Removed + +- Ericscript tool +- GRCh37 support. Subdirectory with params.genome are removed +- Running with conda + +## v1.3.0 - [2020/07/15] + +- Using official STAR-Fusion container [#160](https://github.com/nf-core/rnafusion/issues/160) + +### Added + +- Added social preview image [#107](https://github.com/nf-core/rnafusion/issues/107) +- Added support for GRCh37 genome assembly [#77](https://github.com/nf-core/rnafusion/issues/77) + +### Changed + +- Upgrade `fusion-report v2.1.2` to `fusion-report v2.1.3` +- Upgrade `fusion-report v2.1.1` to `fusion-report v2.1.2` +- Upgrade `fusion-report v2.1.0` to `fusion-report v2.1.1` +- Upgrade `Arriba v1.1.0` to `Arriba v1.2.0` +- Upgrade `fusion-report v2.0.2` to `fusion-report v2.1.0` + +### Fixed + +- Missing `strip-components` in `download-references.nf/star-fusion` [#148](https://github.com/nf-core/rnafusion/issues/148) +- Missing version prefix for cdna [#143](https://github.com/nf-core/rnafusion/issues/143) +- `samtools` missing header in empty file for FusionInspector [ref](https://github.com/STAR-Fusion/STAR-Fusion/issues/191) +- Removed `profile` from helper scripts [#139](https://github.com/nf-core/rnafusion/issues/139) +- Wrong url path for `Pfam-A.hmm.gz` [#140](https://github.com/nf-core/rnafusion/issues/140) + +### Removed + +- Removed `scripts/download-singularity-img.sh` and `download-singularity-img.nf` as they are not necessary any more + +--- + +## v1.1.0 - [2020/02/10] + +- Fusion gene detection tools: + - `Arriba v1.1.0` + - `Ericscript v0.5.5` + - `Fusioncatcher v1.20` + - `Pizzly v0.37.3` + - `Squid v1.5` + - `STAR-Fusion v1.6.0` +- Visualization tools: + - `Arriba v1.1.0` + - `FusionInspector v1.3.1` +- Other tools: + - `fusion-report v2.0.1` + - `FastQ v0.11.8` + - `MultiQC v1.7` + - `STAR aligner v2.7.0f` + +### Added + +- Added `Arriba 1.1.0` [#63](https://github.com/nf-core/rnafusion/issues/63) +- Added Batch mode [#54](https://github.com/nf-core/rnafusion/issues/54) + +### Changed + +- Updated examples and configurations +- Upgraded `fusion-report v1.0.0` to `fusion-report v2.0.1` +- Divided `running_tools` into fusion and visualization tools +- Updated `STAR` in `Squid`, `Fusion-Inspector` version to `2.7.0f` +- Upgraded `STAR-Fusion v1.5.0` to `STAR-Fusion v1.6.0` [#83](https://github.com/nf-core/rnafusion/issues/83) +- Parameter `igenomesIgnore` renamed to `igenome` [#81](https://github.com/nf-core/rnafusion/issues/81) +- Finished STAR-Fusion file renaming [#18](https://github.com/nf-core/rnafusion/issues/18) +- Updated logos +- Updated to nf-core `1.8` TEMPLATE + +### Fixed + +- iGenomes optional, but not really [#91](https://github.com/nf-core/rnafusion/issues/91) +- Updated `fusioncatcher` to latest `1.20` version also solving [#95](https://github.com/nf-core/rnafusion/issues/95) + +### Removed + +- Variables `pizzly_fasta` and `pizzly_gtf` have been removed and replaced with `transcript` and `gtf` +- `Jenkisfile`, test configuration, pylintrc configuration +- Removed `igenomes.config` because the pipeline only supports `Ensembl` version + +--- + +## v1.0.2 - [2019/05/13] + +### Changed + +- Bumped nf-core template to 1.6 [#69](https://github.com/nf-core/rnafusion/pull/69) + +### Fixed + +- Fixed COSMIC parameters not wrapped in quotes [#75](https://github.com/nf-core/rnafusion/issues/75) +- Implemented output output for fusion tools [#72](https://github.com/nf-core/rnafusion/issues/72) +- Fixed reference download link for STAR-Fusion [#71](https://github.com/nf-core/rnafusion/issues/71) + +--- + +## v1.0.1 - [2019/04/06] + +### Added + +- Added support for extra parameters for tools STAR-Fusion, FusionCatcher and fusion-report +- Added example configuration for `singularity` and `docker` +- Added [fusion-report](https://github.com/matq007/fusion-report) into the stack [#62](https://github.com/nf-core/rnafusion/issues/62), [#55](https://github.com/nf-core/rnafusion/issues/55), [#53](https://github.com/nf-core/rnafusion/issues/53), [#51](https://github.com/nf-core/rnafusion/issues/51) +- Added nextflow helper script `download-singularity-img.nf` +- Added nextflow helper script `download-references.nf` +- Added `Jenkinsfile` for in-house testing + +### Changed + +- Updated installation of `FusionCatcher` (available now on bioconda) + +### Fixed + +- Fixed empty symlinks (`input.X`) in fusion-report [#68](https://github.com/nf-core/rnafusion/issues/68) +- Fixed FASTA issues [#60](https://github.com/nf-core/rnafusion/issues/60) +- Fixed centralized nf-core/config [#64](https://github.com/nf-core/rnafusion/issues/64) +- Fixed `scrape_software_versions.py` to parse tools versions correctly [#65](https://github.com/nf-core/rnafusion/issues/65) + +### Removed + +- Removed `Singularity` + +--- + +## v1.0 - [2018/02/14] + +Version 1.0 marks the first production release of this pipeline under the nf-core flag. +The pipeline includes additional help scripts to download references for fusion tools and Singularity images. + Initial release of nf-core/rnafusion, created with the [nf-core](https://nf-co.re/) template. ### `Added` diff --git a/CITATIONS.md b/CITATIONS.md index 0fc81801..95bc5d98 100644 --- a/CITATIONS.md +++ b/CITATIONS.md @@ -10,14 +10,63 @@ ## Pipeline tools +- [Arriba](https://github.com/suhrig/arriba) + + > Uhrig S, Ellermann J, Walther T, Burkhardt P, Fröhlich M, Hutter B, Toprak UH, Neumann O, Stenzinger A, Scholl C, Fröhling S, Brors B. Accurate and efficient detection of gene fusions from RNA sequencing data. Genome Research. 2021 Mar 31;448-460. doi: 10.1101/gr.257246.119. Epub 2021 Jan 13. PubMed PMID: 33441414. + +- [BEDOPS](https://bedops.readthedocs.io/en/latest/index.html) - convert2bed + + > Neph S, Scott Kuehn M, Reynolds AP, Haugen E, Thurman RE, Johnson AK, Rynes E, Maurano MT, Vierstra J, Thomas S, Sandstrom R, Humbert R, Stamatoyannopoulos JA. BEDOPS: high-performance genomic feature operations. Bioinformatics. 2012 May, 28 (14): 1919-1920. doi: 10.1093/bioinformatics/bts277, PubMed PMID: PMID: 22576172. + +- [FastP](https://academic.oup.com/bioinformatics/article/34/17/i884/5093234) + + > Shifu Chen, Yanqing Zhou, Yaru Chen, Jia Gu. fastp: an ultra-fast all-in-one FASTQ preprocessor. Bioinformatics. 2018 Sept 34:17 (i884–i890), doi: 10.1093/bioinformatics/bty560. PubMed PMID: 30423086. PubMed Central PMCID: PMC6129281 + - [FastQC](https://www.bioinformatics.babraham.ac.uk/projects/fastqc/) > Andrews, S. (2010). FastQC: A Quality Control Tool for High Throughput Sequence Data [Online]. - [MultiQC](https://pubmed.ncbi.nlm.nih.gov/27312411/) +- [FusionCatcher](https://github.com/ndaniel/fusioncatcher) + + > Nicorici D, Satalan M, Edgren H, Kangaspeska S, Murumagi A, Kallioniemi O, Virtanen S, Kilkku O. FusionCatcher – a tool for finding somatic fusion genes in paired-end RNA-sequencing data. BioRxiv, 2014 Nov. doi: 10.1101/011650. + +- [FusionInspector](https://github.com/FusionInspector/FusionInspector) + + > Haas BJ, Dobin A, Ghandi M, Van Arsdale A, Tickle T, Robinson JT, Gillani R, Kasif S, Regev A. Targeted in silico characterization of fusion transcripts in tumor and normal tissues via FusionInspector. Cell Reports Methods. 2023 May 3:5, doi: 10.1016/j.crmeth.2023.100467, PMID: 37323575 + +- [Fusion-report](https://github.com/matq007/fusion-report) + + > Proks M, Genomic Profiling of a Comprehensive Nation-wide Collection of Childhood Solid Tumors, Master Thesis, Supervisors: Grøntved L, Díaz de Ståhl T, Nistér M, Ewels P, Garcia MU, Juhos S, University of Southern Denmark, 2019, unpublished. + +- [GATK4](https://gatk.broadinstitute.org/hc/en-us) + + > Van der Auwera GA. Somatic variation discovery with GATK4. Proceedings of the American Association for Cancer Research Annual Meeting 2017. 2017 Apr 1-5. Cancer Res 2017;77(13 Suppl) doi:10.1158/1538-7445.AM2017-3590 + +- [MegaFusion](https://github.com/J35P312/MegaFusion) + +- [MultiQC](https://pubmed.ncbi.nlm.nih.gov/27312411/) + > Ewels P, Magnusson M, Lundin S, Käller M. MultiQC: summarize analysis results for multiple tools and samples in a single report. Bioinformatics. 2016 Oct 1;32(19):3047-8. doi: 10.1093/bioinformatics/btw354. Epub 2016 Jun 16. PubMed PMID: 27312411; PubMed Central PMCID: PMC5039924. +- [picard-tools](http://broadinstitute.github.io/picard) + +- [SAMtools](https://pubmed.ncbi.nlm.nih.gov/19505943/) + + > Li H, Handsaker B, Wysoker A, Fennell T, Ruan J, Homer N, Marth G, Abecasis G, Durbin R; 1000 Genome Project Data Processing Subgroup. The Sequence Alignment/Map format and SAMtools. Bioinformatics. 2009 Aug 15;25(16):2078-9. doi: 10.1093/bioinformatics/btp352. Epub 2009 Jun 8. PubMed PMID: 19505943; PubMed Central PMCID: PMC2723002. + +- [STAR](https://pubmed.ncbi.nlm.nih.gov/23104886/) + + > Dobin A, Davis CA, Schlesinger F, Drenkow J, Zaleski C, Jha S, Batut P, Chaisson M, Gingeras TR. STAR: ultrafast universal RNA-seq aligner Bioinformatics. 2013 Jan 1;29(1):15-21. doi: 10.1093/bioinformatics/bts635. Epub 2012 Oct 25. PubMed PMID: 23104886; PubMed Central PMCID: PMC3530905. + +- [STAR-Fusion](https://github.com/STAR-Fusion/STAR-Fusion) + + > Haas BJ, Dobin A, Li B, Stransky N, Pochet N, Regev A. Accuracy assessment of fusion transcript detection via read-mapping and de novo fusion transcript assembly-based methods. Genome Biology 2019 Oct;20,213. doi: 10.1186/s13059-019-1842-9 + +- [StringTie](https://ccb.jhu.edu/software/stringtie/index.shtml) + > Shumate A, Wong B, Pertea G, Pertea M. Improved transcriptome assembly using a hybrid of long and short reads with StringTie. PLOS Computational Biology 18, 6 (2022), doi.org/10.1371/journal.pcbi.1009730 + ## Software packaging/containerisation tools - [Anaconda](https://anaconda.com) diff --git a/README.md b/README.md index f40e75a6..534c8d41 100644 --- a/README.md +++ b/README.md @@ -19,53 +19,88 @@ ## Introduction -**nf-core/rnafusion** is a bioinformatics pipeline that ... - - - - -1. Read QC ([`FastQC`](https://www.bioinformatics.babraham.ac.uk/projects/fastqc/))2. Present QC for raw reads ([`MultiQC`](http://multiqc.info/)) +**nf-core/rnafusion** is a bioinformatics best-practice analysis pipeline for RNA sequencing consisting of several tools designed for detecting and visualizing fusion genes. Results from up to 5 fusion callers tools are created, and are also aggregated, most notably in a pdf visualisation document, a vcf data collection file, and html and tsv reports. + +On release, automated continuous integration tests run the pipeline on a full-sized dataset on the AWS cloud infrastructure. This ensures that the pipeline runs on AWS, has sensible resource allocation defaults set to run on real-world datasets, and permits the persistent storage of results to benchmark between pipeline releases and other analysis sources. The results obtained from the full-sized test can be viewed on the [nf-core website](https://nf-co.re/rnafusion/results). + +In rnafusion the full-sized test includes reference building and fusion detection. The test dataset is taken from [here](https://github.com/nf-core/test-datasets/tree/rnafusion/testdata/human). + +## Pipeline summary + +![nf-core/rnafusion metro map](docs/images/nf-core-rnafusion_metro_map.png) + +### Build references + +`--references_only` triggers a workflow to ONLY build references, otherwise the references are build when the analysis is run: + +1. Download gencode fasta and gtf files +2. Create [STAR](https://github.com/alexdobin/STAR) index +3. Download [Arriba](https://github.com/suhrig/arriba) references +4. Download [FusionCatcher](https://github.com/ndaniel/fusioncatcher) references +5. Download and build [STAR-Fusion](https://github.com/STAR-Fusion/STAR-Fusion) references +6. Download [Fusion-report](https://github.com/Clinical-Genomics/fusion-report) DBs + +#### Main workflow + +1. Input samplesheet check +2. Concatenate fastq files per sample ([cat](http://www.linfo.org/cat.html)) +3. Reads quality control ([FastQC](https://www.bioinformatics.babraham.ac.uk/projects/fastqc/)) +4. Optional trimming with [fastp](https://github.com/OpenGene/fastp) +5. Arriba subworkflow + - [STAR](https://github.com/alexdobin/STAR) alignment + - [Arriba](https://github.com/suhrig/arriba) fusion detection +6. STAR-fusion subworkflow + - [STAR](https://github.com/alexdobin/STAR) alignment + - [STAR-Fusion](https://github.com/STAR-Fusion/STAR-Fusion) fusion detection +7. Fusioncatcher subworkflow + - [FusionCatcher](https://github.com/ndaniel/fusioncatcher) fusion detection +8. StringTie subworkflow + - [StringTie](https://ccb.jhu.edu/software/stringtie/) +9. Fusion-report + - Merge all fusions detected by the selected tools with [Fusion-report](https://github.com/Clinical-Genomics/fusion-report) +10. Post-processing and analysis of data + - [FusionInspector](https://github.com/FusionInspector/FusionInspector) + - [Arriba](https://github.com/suhrig/arriba) visualisation + - Collect metrics ([`picard CollectRnaSeqMetrics`](https://gatk.broadinstitute.org/hc/en-us/articles/360037057492-CollectRnaSeqMetrics-Picard-), [`picard CollectInsertSizeMetrics`](https://gatk.broadinstitute.org/hc/en-us/articles/360037055772-CollectInsertSizeMetrics-Picard-) and ([`picard MarkDuplicates`](https://gatk.broadinstitute.org/hc/en-us/articles/360037052812-MarkDuplicates-Picard-)) +11. Present QC for raw reads ([`MultiQC`](http://multiqc.info/)) +12. Compress bam files to cram with [samtools view](http://www.htslib.org/) ## Usage > [!NOTE] > If you are new to Nextflow and nf-core, please refer to [this page](https://nf-co.re/docs/usage/installation) on how to set-up Nextflow. Make sure to [test your setup](https://nf-co.re/docs/usage/introduction#how-to-run-a-pipeline) with `-profile test` before running the workflow on actual data. - - -Now, you can run the pipeline using: - - +Then perform the analysis: ```bash nextflow run nf-core/rnafusion \ - -profile \ - --input samplesheet.csv \ - --outdir + -profile test, \ + --outdir \ + -stub ``` > [!WARNING] > Please provide pipeline parameters via the CLI or Nextflow `-params-file` option. Custom config files including those provided by the `-c` Nextflow option can be used to provide any configuration _**except for parameters**_; see [docs](https://nf-co.re/docs/usage/getting_started/configuration#custom-configuration-files). +> **Notes:** +> +> - Conda is not currently supported; run with singularity or docker. +> - Paths need to be absolute. +> - GRCh38 is the only supported reference. +> - Single-end reads are to be used as last-resort. Paired-end reads are recommended. FusionCatcher cannot be used with single-end reads shorter than 130 bp. + For more details and further functionality, please refer to the [usage documentation](https://nf-co.re/rnafusion/usage) and the [parameter documentation](https://nf-co.re/rnafusion/parameters). ## Pipeline output @@ -76,11 +111,14 @@ For more details about the output files and reports, please refer to the ## Credits -nf-core/rnafusion was originally written by Martin Proks, Annick Renevey. +nf-core/rnafusion was written by Martin Proks ([@matq007](https://github.com/matq007)), Maxime Garcia ([@maxulysse](https://github.com/maxulysse)) and Annick Renevey ([@rannick](https://github.com/rannick)) -We thank the following people for their extensive assistance in the development of this pipeline: +We thank the following people for their help in the development of this pipeline: - +- [Phil Ewels](https://github.com/ewels) +- [Rickard Hammarén](https://github.com/Hammarn) +- [Alexander Peltzer](https://github.com/apeltzer) +- [Praveen Raj](https://github.com/praveenraj2018) ## Contributions and Support @@ -90,10 +128,7 @@ For further information or help, don't hesitate to get in touch on the [Slack `# ## Citations - - - - +If you use nf-core/rnafusion for your analysis, please cite it using the following doi: [10.5281/zenodo.3946477](https://doi.org/10.5281/zenodo.3946477) An extensive list of references for the tools used by the pipeline can be found in the [`CITATIONS.md`](CITATIONS.md) file. diff --git a/adapter_fasta_test b/adapter_fasta_test new file mode 100644 index 00000000..e69de29b diff --git a/assets/dummy_file_arriba.txt b/assets/dummy_file_arriba.txt new file mode 100644 index 00000000..e69de29b diff --git a/assets/dummy_file_fusioncatcher.txt b/assets/dummy_file_fusioncatcher.txt new file mode 100644 index 00000000..e69de29b diff --git a/assets/dummy_file_pizzly.txt b/assets/dummy_file_pizzly.txt new file mode 100644 index 00000000..e69de29b diff --git a/assets/dummy_file_squid.txt b/assets/dummy_file_squid.txt new file mode 100644 index 00000000..e69de29b diff --git a/assets/dummy_file_starfusion.txt b/assets/dummy_file_starfusion.txt new file mode 100644 index 00000000..e69de29b diff --git a/assets/methods_description_template.yml b/assets/methods_description_template.yml index 62a8ff4e..80452425 100644 --- a/assets/methods_description_template.yml +++ b/assets/methods_description_template.yml @@ -3,8 +3,6 @@ description: "Suggested text and references to use when describing pipeline usag section_name: "nf-core/rnafusion Methods Description" section_href: "https://github.com/nf-core/rnafusion" plot_type: "html" -## TODO nf-core: Update the HTML below to your preferred methods description, e.g. add publication citation for this pipeline -## You inject any metadata in the Nextflow '${workflow}' object data: |

Methods

Data was processed using nf-core/rnafusion v${workflow.manifest.version} ${doi_text} of the nf-core collection of workflows (Ewels et al., 2020), utilising reproducible software environments from the Bioconda (Grüning et al., 2018) and Biocontainers (da Veiga Leprevost et al., 2017) projects.

diff --git a/assets/multiqc_config.yml b/assets/multiqc_config.yml index c070b89c..963555cf 100644 --- a/assets/multiqc_config.yml +++ b/assets/multiqc_config.yml @@ -2,14 +2,37 @@ report_comment: > This report has been generated by the nf-core/rnafusion analysis pipeline. For information about how to interpret these results, please see the documentation. + report_section_order: - "nf-core-rnafusion-methods-description": + nf-core-rnafusion-methods-description: order: -1000 software_versions: order: -1001 - "nf-core-rnafusion-summary": + nf-core-rnafusion-summary: order: -1002 export_plots: true - disable_version_detection: true + +# Run only these modules +run_modules: + - custom_content + - fastqc + - fastp + - star + - samtools + - picard + - arriba + +module_order: + - fastp + - fastqc: + name: "FastQC (raw)" + info: "This section of the report shows FastQC results before adapter trimming." + path_filters: + - "*.zip" + - fastqc: + name: "FastQC (trimmed)" + info: "This section of the report shows FastQC results after adapter trimming." + path_filters: + - "*_trimmed*.zip" diff --git a/assets/samplesheet.csv b/assets/samplesheet.csv deleted file mode 100644 index 5f653ab7..00000000 --- a/assets/samplesheet.csv +++ /dev/null @@ -1,3 +0,0 @@ -sample,fastq_1,fastq_2 -SAMPLE_PAIRED_END,/path/to/fastq/files/AEG588A1_S1_L002_R1_001.fastq.gz,/path/to/fastq/files/AEG588A1_S1_L002_R2_001.fastq.gz -SAMPLE_SINGLE_END,/path/to/fastq/files/AEG588A4_S4_L003_R1_001.fastq.gz, diff --git a/assets/schema_input.json b/assets/schema_input.json index 4f1f739a..a5500810 100644 --- a/assets/schema_input.json +++ b/assets/schema_input.json @@ -17,17 +17,22 @@ "type": "string", "format": "file-path", "exists": true, - "pattern": "^\\S+\\.f(ast)?q\\.gz$", + "pattern": "^\\S+f(ast)?q\\.gz$", "errorMessage": "FastQ file for reads 1 must be provided, cannot contain spaces and must have extension '.fq.gz' or '.fastq.gz'" }, "fastq_2": { "type": "string", "format": "file-path", "exists": true, - "pattern": "^\\S+\\.f(ast)?q\\.gz$", + "pattern": "^\\S+f(ast)?q\\.gz$", "errorMessage": "FastQ file for reads 2 cannot contain spaces and must have extension '.fq.gz' or '.fastq.gz'" + }, + "strandedness": { + "type": "string", + "enum": ["forward", "reverse", "unstranded", "unknown"], + "errorMessage": "Strandedness has to be forward, reverse, unstranded or unknown" } }, - "required": ["sample", "fastq_1"] + "required": ["sample", "fastq_1", "fastq_2", "strandedness"] } } diff --git a/bin/get_rrna_transcripts.py b/bin/get_rrna_transcripts.py new file mode 100755 index 00000000..670d5f06 --- /dev/null +++ b/bin/get_rrna_transcripts.py @@ -0,0 +1,81 @@ +#!/usr/bin/env python3 + +import argparse +import logging +import sys +from pathlib import Path + + +def get_rrna_intervals(file_in, file_out): + """ + Get lines containing ``#`` or ``gene_type rRNA`` or ```` or ``gene_type rRNA_pseudogene`` or ``gene_type MT_rRNA`` + Create output file + + Args: + file_in (pathlib.Path): The given GTF file. + file_out (pathlib.Path): Where the ribosomal RNA GTF file should + be created; always in GTF format. + """ + + patterns = { + "#", + 'transcript_biotype "Mt_rRNA"', + 'transcript_biotype "rRNA"', + 'transcript_biotype "rRNA_pseudogene"', + } + line_starts = {"MT", "1", "2", "3", "4", "5", "6", "7", "8", "9"} + out_lines = [] + with file_in.open() as f: + data = f.readlines() + for line in data: + for pattern in patterns: + if pattern in line: + for line_start in line_starts: + if line.startswith(line_start): + out_lines.append(line) + + with file_out.open(mode="w") as out_file: + out_file.writelines(out_lines) + + +def parse_args(argv=None): + """Define and immediately parse command line arguments.""" + parser = argparse.ArgumentParser( + description="Extract ribosomal RNA intervals from a gtf file.", + epilog="Example: python get_rrna_transcripts.py ", + ) + parser.add_argument( + "file_in", + metavar="FILE_IN", + type=Path, + help="Input in GTF format.", + ) + parser.add_argument( + "file_out", + metavar="FILE_OUT", + type=Path, + help="Transformed output intervals in GTF format.", + ) + parser.add_argument( + "-l", + "--log-level", + help="The desired log level (default WARNING).", + choices=("CRITICAL", "ERROR", "WARNING", "INFO", "DEBUG"), + default="WARNING", + ) + return parser.parse_args(argv) + + +def main(argv=None): + """Coordinate argument parsing and program execution.""" + args = parse_args(argv) + logging.basicConfig(level=args.log_level, format="[%(levelname)s] %(message)s") + if not args.file_in.is_file(): + logger.error(f"The given input file {args.file_in} was not found!") + sys.exit(2) + args.file_out.parent.mkdir(parents=True, exist_ok=True) + get_rrna_intervals(args.file_in, args.file_out) + + +if __name__ == "__main__": + sys.exit(main()) diff --git a/bin/gtf2bed b/bin/gtf2bed new file mode 100755 index 00000000..cfa91cf7 --- /dev/null +++ b/bin/gtf2bed @@ -0,0 +1,123 @@ +#!/usr/bin/env perl + +# Copyright (c) 2011 Erik Aronesty (erik@q32.com) +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in +# all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +# THE SOFTWARE. +# +# ALSO, IT WOULD BE NICE IF YOU LET ME KNOW YOU USED IT. + +use Getopt::Long; + +my $extended; +GetOptions("x"=>\$extended); + +$in = shift @ARGV; + +my $in_cmd =($in =~ /\.gz$/ ? "gunzip -c $in|" : $in =~ /\.zip$/ ? "unzip -p $in|" : "$in") || die "Can't open $in: $!\n"; +open IN, $in_cmd; + +while () { + $gff = 2 if /^##gff-version 2/; + $gff = 3 if /^##gff-version 3/; + next if /^#/ && $gff; + + s/\s+$//; + # 0-chr 1-src 2-feat 3-beg 4-end 5-scor 6-dir 7-fram 8-attr + my @f = split /\t/; + if ($gff) { + # most ver 2's stick gene names in the id field + ($id) = $f[8]=~ /\bID="([^"]+)"/; + # most ver 3's stick unquoted names in the name field + ($id) = $f[8]=~ /\bName=([^";]+)/ if !$id && $gff == 3; + } else { + ($id) = $f[8]=~ /transcript_id "([^"]+)"/; + } + + next unless $id && $f[0]; + + if ($f[2] eq 'exon') { + die "no position at exon on line $." if ! $f[3]; + # gff3 puts :\d in exons sometimes + $id =~ s/:\d+$// if $gff == 3; + push @{$exons{$id}}, \@f; + # save lowest start + $trans{$id} = \@f if !$trans{$id}; + } elsif ($f[2] eq 'start_codon') { + #optional, output codon start/stop as "thick" region in bed + $sc{$id}->[0] = $f[3]; + } elsif ($f[2] eq 'stop_codon') { + $sc{$id}->[1] = $f[4]; + } elsif ($f[2] eq 'miRNA' ) { + $trans{$id} = \@f if !$trans{$id}; + push @{$exons{$id}}, \@f; + } +} + +for $id ( + # sort by chr then pos + sort { + $trans{$a}->[0] eq $trans{$b}->[0] ? + $trans{$a}->[3] <=> $trans{$b}->[3] : + $trans{$a}->[0] cmp $trans{$b}->[0] + } (keys(%trans)) ) { + my ($chr, undef, undef, undef, undef, undef, $dir, undef, $attr, undef, $cds, $cde) = @{$trans{$id}}; + my ($cds, $cde); + ($cds, $cde) = @{$sc{$id}} if $sc{$id}; + + # sort by pos + my @ex = sort { + $a->[3] <=> $b->[3] + } @{$exons{$id}}; + + my $beg = $ex[0][3]; + my $end = $ex[-1][4]; + + if ($dir eq '-') { + # swap + $tmp=$cds; + $cds=$cde; + $cde=$tmp; + $cds -= 2 if $cds; + $cde += 2 if $cde; + } + + # not specified, just use exons + $cds = $beg if !$cds; + $cde = $end if !$cde; + + # adjust start for bed + --$beg; --$cds; + + my $exn = @ex; # exon count + my $exst = join ",", map {$_->[3]-$beg-1} @ex; # exon start + my $exsz = join ",", map {$_->[4]-$_->[3]+1} @ex; # exon size + + my $gene_id; + my $extend = ""; + if ($extended) { + ($gene_id) = $attr =~ /gene_name "([^"]+)"/; + ($gene_id) = $attr =~ /gene_id "([^"]+)"/ unless $gene_id; + $extend="\t$gene_id"; + } + # added an extra comma to make it look exactly like ucsc's beds + print "$chr\t$beg\t$end\t$id\t0\t$dir\t$cds\t$cde\t0\t$exn\t$exsz,\t$exst,$extend\n"; +} + + +close IN; diff --git a/bin/vcf_collect.py b/bin/vcf_collect.py new file mode 100755 index 00000000..1decbe90 --- /dev/null +++ b/bin/vcf_collect.py @@ -0,0 +1,612 @@ +#!/usr/bin/env python3 + +import argparse +import logging +import sys +from pathlib import Path +import pandas as pd +import ast +import numpy as np +import csv + +logger = logging.getLogger() + + +def vcf_collect( + fusioninspector_in_file: str, + fusionreport_in_file: str, + gtf: str, + fusionreport_csv: str, + hgnc: str, + sample: str, + out_file, +) -> None: + """ + Process FusionInspector and FusionReport data, + merge with GTF from FusionInspector and HGNC database, + and write a VCF file. + + Args: + fusioninspector_in_file (str): Path to FusionInspector input file. + fusionreport_in_file (str): Path to Fusion-report input file. + sample (str): Sample name for the header. + hgnc (str): Path to HGNC file. + gtf (str): Path to output GTF file from FusionInspector in TSV format. + fusionreport_csv (str): Path to Fusion-report CSV output file. + out (str): Output VCF file path. + + Adapted from: https://github.com/J35P312/MegaFusion + """ + merged_df = ( + build_fusioninspector_dataframe(fusioninspector_in_file) + .join(read_build_fusionreport(fusionreport_in_file), how="outer", on="FUSION") + .reset_index() + ) + hgnc_df = build_hgnc_dataframe(hgnc) + df_symbol = merged_df[merged_df["Left_ensembl_gene_id"].isna()] + df_not_symbol = merged_df[merged_df["Left_ensembl_gene_id"].notna()] + + df_not_symbol = hgnc_df.merge( + df_not_symbol, + how="right", + left_on="ensembl_gene_id", + right_on="Left_ensembl_gene_id", + ) + df_symbol = hgnc_df.merge( + df_symbol, how="right", left_on="symbol", right_on="GeneA" + ) + df = pd.concat([df_not_symbol, df_symbol]) + df = df.rename(columns={"hgnc_id": "Left_hgnc_id"}) + + df_symbol = df[df["Right_ensembl_gene_id"].isna()] + df_not_symbol = df[df["Right_ensembl_gene_id"].notna()] + + df_not_symbol = hgnc_df.merge( + df_not_symbol, + how="right", + left_on="ensembl_gene_id", + right_on="Right_ensembl_gene_id", + ) + df_symbol = hgnc_df.merge( + df_symbol, how="right", left_on="symbol", right_on="GeneB" + ) + df = pd.concat([df_not_symbol, df_symbol]) + df = df.rename(columns={"hgnc_id": "Right_hgnc_id"}) + + gtf_df = build_gtf_dataframe(gtf) + all_df = df.merge( + gtf_df, how="left", left_on="CDS_LEFT_ID", right_on="Transcript_id" + ) + all_df[["PosA", "orig_start", "orig_end"]] = ( + all_df[["PosA", "orig_start", "orig_end"]].fillna(0).astype(int) + ) + + all_df = all_df[ + ( + (all_df["PosA"] >= all_df["orig_start"]) + & (all_df["PosA"] <= all_df["orig_end"]) + ) + | ((all_df["orig_start"] == 0) & (all_df["orig_end"] == 0)) + ] + + all_df.replace("", np.nan, inplace=True) + all_df = all_df.drop_duplicates() + + all_df[["exon_number", "transcript_version"]] = all_df[ + ["exon_number", "transcript_version"] + ].replace(0, np.nan) + # Fill non-empty values within each group for 'exon_number' and 'transcript_version' + all_df["exon_number"] = all_df.groupby("PosA")["exon_number"].transform( + lambda x: x.fillna(method="ffill").fillna(method="bfill") + ) + all_df["transcript_version"] = all_df.groupby("PosA")[ + "transcript_version" + ].transform(lambda x: x.fillna(method="ffill").fillna(method="bfill")) + + all_df = all_df.rename(columns={"transcript_version": "Left_transcript_version"}) + all_df = all_df.rename(columns={"exon_number": "Left_exon_number"}) + all_df = all_df[ + [ + "FUSION", + "GeneA", + "GeneB", + "PosA", + "PosB", + "ChromosomeA", + "ChromosomeB", + "TOOLS_HITS", + "SCORE", + "FOUND_DB", + "FOUND_IN", + "JunctionReadCount", + "SpanningFragCount", + "FFPM", + "PROT_FUSION_TYPE", + "CDS_LEFT_ID", + "CDS_RIGHT_ID", + "Left_transcript_version", + "Left_exon_number", + "Left_hgnc_id", + "Right_hgnc_id", + "Strand1", + "Strand2", + "annots", + ] + ].drop_duplicates() + all_df["CDS_RIGHT_ID"] = all_df["CDS_RIGHT_ID"].astype("str") + all_df = all_df.merge( + gtf_df, how="left", left_on="CDS_RIGHT_ID", right_on="Transcript_id" + ) + all_df[["PosB", "orig_start", "orig_end"]] = all_df[ + ["PosB", "orig_start", "orig_end"] + ].fillna(0) + all_df[["PosB", "orig_start", "orig_end"]] = all_df[ + ["PosB", "orig_start", "orig_end"] + ].astype(int) + all_df = all_df[ + ( + (all_df["PosB"] >= all_df["orig_start"]) + & (all_df["PosB"] <= all_df["orig_end"]) + ) + | ((all_df["orig_start"] == 0) & (all_df["orig_end"] == 0)) + ] + + all_df[["PosA", "PosB"]] = all_df[["PosA", "PosB"]].replace(0, np.nan) + all_df = all_df.replace("", np.nan) + + all_df[["exon_number", "transcript_version"]] = all_df[ + ["exon_number", "transcript_version"] + ].replace(0, np.nan) + # Fill non-empty values within each group for 'exon_number' and 'transcript_version' + all_df["exon_number"] = all_df.groupby("PosB")["exon_number"].transform( + lambda x: x.fillna(method="ffill").fillna(method="bfill") + ) + all_df["transcript_version"] = all_df.groupby("PosB")[ + "transcript_version" + ].transform(lambda x: x.fillna(method="ffill").fillna(method="bfill")) + + all_df = all_df.rename(columns={"transcript_version": "Right_transcript_version"}) + all_df = all_df.rename(columns={"exon_number": "Right_exon_number"}) + + all_df = all_df[ + [ + "FUSION", + "GeneA", + "GeneB", + "PosA", + "PosB", + "ChromosomeA", + "ChromosomeB", + "TOOLS_HITS", + "SCORE", + "FOUND_DB", + "FOUND_IN", + "JunctionReadCount", + "SpanningFragCount", + "FFPM", + "PROT_FUSION_TYPE", + "CDS_LEFT_ID", + "CDS_RIGHT_ID", + "Left_transcript_version", + "Left_exon_number", + "Left_hgnc_id", + "Right_transcript_version", + "Right_exon_number", + "Right_hgnc_id", + "Strand1", + "Strand2", + "annots", + ] + ].drop_duplicates() + all_df = all_df.rename(columns={"FUSION": "Fusion"}) + all_df = all_df.set_index("Fusion") + + all_df = all_df.combine_first(read_fusionreport_csv(fusionreport_csv)) + + return write_vcf(column_manipulation(all_df), header_def(sample), out_file) + + +def parse_args(argv=None): + """Define and immediately parse command line arguments.""" + parser = argparse.ArgumentParser( + description="Validate and transform a tabular samplesheet.", + epilog="Example: python check_samplesheet.py samplesheet.csv samplesheet.valid.csv", + ) + parser.add_argument( + "--fusioninspector", + metavar="FUSIONINSPECTOR", + type=Path, + help="FusionInspector output in TSV format.", + ) + parser.add_argument( + "--fusionreport", + metavar="FUSIONREPORT", + type=Path, + help="Fusionreport output in index/html format.", + ) + parser.add_argument( + "--fusionreport_csv", + metavar="FUSIONREPORT_CSV", + type=Path, + help="Fusionreport output in CSV format.", + ) + parser.add_argument( + "--fusioninspector_gtf", + metavar="GTF", + type=Path, + help="FusionInspector GTF output.", + ) + parser.add_argument( + "--hgnc", + metavar="HGNC", + type=Path, + help="HGNC database.", + ) + parser.add_argument( + "--sample", metavar="SAMPLE", type=Path, help="Sample name.", default="Sample" + ) + parser.add_argument( + "--out", + metavar="OUT", + type=Path, + help="VCF output path.", + ) + return parser.parse_args(argv) + + +def header_def(sample: str) -> str: + """ + Define the header of the VCF file + """ + return '##fileformat=VCFv4.1\n\ +##ALT=\n\ +##INFO=\n\ +##INFO=\n\ +##INFO=\n\ +##INFO=\n\ +##INFO=\n\ +##INFO=\n\ +##INFO=\n\ +##INFO=\n\ +##INFO=\n\ +##INFO=\n\ +##INFO=\n\ +##INFO=\n\ +##INFO=\n\ +##INFO=\n\ +##INFO=\n\ +##INFO=\n\ +##INFO=\n\ +##INFO=\n\ +##INFO=\n\ +##INFO=\n\ +##INFO=\n\ +##INFO=\n\ +##FORMAT=\n\ +##FORMAT=\n\ +##FORMAT=\n\ +##FORMAT=\n\ +#CHROM\tPOS\tID\tREF\tALT\tQUAL\tFILTER\tINFO\tFORMAT\t{}'.format( + sample + ) + + +def convert_to_list(annots_str: str) -> list: + try: + return ast.literal_eval(annots_str) + except (SyntaxError, ValueError): + return np.nan + + +def build_fusioninspector_dataframe(file: str) -> pd.DataFrame: + """ + Read FusionInspector output from a CSV file, preprocess the data, and set 'FUSION' as the index. + """ + df = pd.read_csv(file, sep="\t") + df = df.rename(columns={"#FusionName": "FUSION"}) + if not (df.empty): + df[["ChromosomeA", "PosA", "Strand1"]] = df["LeftBreakpoint"].str.split( + ":", expand=True + ) + df[["ChromosomeB", "PosB", "Strand2"]] = df["RightBreakpoint"].str.split( + ":", expand=True + ) + df[["LeftGeneName", "Left_ensembl_gene_id"]] = df["LeftGene"].str.split( + "^", expand=True + ) + df[["RightGeneName", "Right_ensembl_gene_id"]] = df["RightGene"].str.split( + "^", expand=True + ) + df["annots"] = ( + df["annots"] + .apply(convert_to_list) + .apply( + lambda x: ( + ",".join(map(str, x)) + if isinstance(x, list) + else str(x) if pd.notna(x) else "" + ) + ) + ) + else: + for i in [ + "ChromosomeA", + "Strand1", + "ChromosomeB", + "Strand2", + "LeftGeneName", + "Left_ensembl_gene_id", + "RightGeneName", + "Right_ensembl_gene_id", + "annots", + ]: + df[i] = "" + for j in [ + "PosA", + "PosB", + ]: + df[j] = np.nan + + return df.set_index(["FUSION"]) + + +def replace_value_with_column_name( + row: pd.Series, value_to_replace: str, column_name: str +) -> str: + """ + Replace a specific value in a row with the corresponding column name. + """ + new_values = "" + for col_name, value in row.items(): + if col_name == column_name: + if value == value_to_replace: + new_values = col_name + else: + new_values = "" + return new_values + + +def concatenate_columns(row: pd.Series) -> str: + """ + Concatenate non-empty values in a row into a single string separated by commas. + """ + non_empty_values = [str(value) for value in row if value != ""] + return ",".join(non_empty_values) + + +def read_build_fusionreport(fusionreport_file: str) -> pd.DataFrame: + """ + Read and preprocess fusion-report data from a file, including handling missing tool columns, + getting the columns with each tool and create a new FOUND_IN column with all the tool hits. + Convert the list of databases in FOUND_DB into a joined string with a comma separator. + Make all column headers uppercase. + """ + with open(fusionreport_file) as f: + from_html = [ + line.split('rows": ')[1] for line in f if 'name="fusion_list' in line + ] + tmp = str(from_html)[2:] + tmp2 = tmp.split(', "tools": ')[0] + fusion_report = pd.DataFrame(ast.literal_eval(tmp2)) + if not "arriba" in fusion_report.columns: + fusion_report["arriba"] = "" + if not "fusioncatcher" in fusion_report.columns: + fusion_report["fusioncatcher"] = "" + if not "starfusion" in fusion_report.columns: + fusion_report["starfusion"] = "" + fusion_report["arriba"] = fusion_report[["arriba"]].apply( + replace_value_with_column_name, args=("true", "arriba"), axis=1 + ) + fusion_report["fusioncatcher"] = fusion_report[["fusioncatcher"]].apply( + replace_value_with_column_name, args=("true", "fusioncatcher"), axis=1 + ) + fusion_report["starfusion"] = fusion_report[["starfusion"]].apply( + replace_value_with_column_name, args=("true", "starfusion"), axis=1 + ) + fusion_report["FOUND_IN"] = fusion_report[ + ["arriba", "starfusion", "fusioncatcher"] + ].apply(concatenate_columns, axis=1) + fusion_report.columns = fusion_report.columns.str.upper() + fusion_report["FOUND_DB"] = fusion_report["FOUND_DB"].apply( + lambda x: ",".join(x) if len(x) > 0 else "" + ) + fusion_report[["GeneA", "GeneB"]] = fusion_report["FUSION"].str.split( + "--", expand=True + ) + + return fusion_report[ + ["FUSION", "GeneA", "GeneB", "TOOLS_HITS", "SCORE", "FOUND_DB", "FOUND_IN"] + ].set_index(["FUSION"]) + + +def read_fusionreport_csv(file: str) -> pd.DataFrame: + df = pd.read_csv(file) + columns_to_iterate = ["starfusion", "arriba", "fusioncatcher"] + for column in columns_to_iterate: + if column not in df.columns: + df[column] = "" + df[["starfusion", "arriba", "fusioncatcher"]] = df[ + ["starfusion", "arriba", "fusioncatcher"] + ].astype("str") + for index, row in df.iterrows(): + for column in columns_to_iterate: + cell_value = row[column] + + if "#" in cell_value: + df.at[index, column] = df.at[index, column].split(",")[0] + df.at[index, column] = df.at[index, column].replace("position: ", "") + df.at[index, "A"] = df.at[index, column].split("#")[0] + df.at[index, "B"] = df.at[index, column].split("#")[1] + df.at[index, "ChromosomeA"] = df.at[index, "A"].split(":")[0] + df.at[index, "PosA"] = df.at[index, "A"].split(":")[1] + if "+" in df.at[index, "A"] or "-" in df.at[index, "A"]: + df.at[index, "StrandA"] = df.at[index, "A"].split(":")[2] + else: + df.at[index, "StrandA"] = "" + + df.at[index, "ChromosomeB"] = df.at[index, "B"].split(":")[0] + df.at[index, "PosB"] = df.at[index, "B"].split(":")[1] + if "+" in df.at[index, "B"] or "-" in df.at[index, "B"]: + df.at[index, "StrandB"] = df.at[index, "B"].split(":")[2] + else: + df.at[index, "StrandB"] = "" + + break + df[["GeneA", "GeneB"]] = df["Fusion"].str.split("--", expand=True) + df = df.set_index("Fusion") + df.to_csv("tmp.csv") + return df[ + [ + "GeneA", + "GeneB", + "ChromosomeA", + "PosA", + "StrandA", + "ChromosomeB", + "PosB", + "StrandB", + ] + ] + + +def column_manipulation(df: pd.DataFrame) -> pd.DataFrame: + """ + Manipulate and prepare DataFrame for VCF file creation. + """ + df["ALT"] = "" + df = df.reset_index() + df["FORMAT"] = "GT:DV:RV:FFPM" + df["ID"] = "." + df["QUAL"] = "." + df["FILTER"] = "PASS" + df["REF"] = "N" + df["INFO"] = "" + df["Sample"] = "" + df["Strand1"] = df["Strand1"].astype(str) + df["JunctionReadCount"] = df["JunctionReadCount"].fillna(0).astype(int).astype(str) + df["SpanningFragCount"] = df["SpanningFragCount"].fillna(0).astype(int).astype(str) + df["FFPM"] = df["FFPM"].fillna(0).astype(float).astype(str) + df["ChromosomeA"] = df["ChromosomeA"].fillna(0).astype(str) + df["ChromosomeB"] = df["ChromosomeB"].fillna(0).astype(str) + df["Left_hgnc_id"] = df["Left_hgnc_id"].fillna(0).astype(int).astype(str) + df["Right_hgnc_id"] = df["Right_hgnc_id"].fillna(0).astype(int).astype(str) + df["Left_exon_number"] = df["Left_exon_number"].fillna(0).astype(int).astype(str) + df["Right_exon_number"] = df["Right_exon_number"].fillna(0).astype(int).astype(str) + df["Left_transcript_version"] = ( + df["Left_transcript_version"].fillna(0).astype(int).astype(str) + ) + df["Right_transcript_version"] = ( + df["Right_transcript_version"].fillna(0).astype(int).astype(str) + ) + df["PosA"] = df["PosA"].fillna(0).astype(int).astype(str) + df["PosB"] = df["PosB"].fillna(0).astype(int).astype(str) + df["PROT_FUSION_TYPE"] = df["PROT_FUSION_TYPE"].replace(".", "nan") + df["CDS_LEFT_ID"] = df["CDS_LEFT_ID"].replace(".", "nan") + df["CDS_RIGHT_ID"] = df["CDS_RIGHT_ID"].replace(".", "nan") + + for index, row in df.iterrows(): + if row["Strand1"] == "-" and row["Strand2"] == "-": + df.loc[index, "ALT"] = f'[{row["ChromosomeB"]}:{row["PosB"]}[N' + elif row["Strand1"] == "+" and row["Strand2"] == "-": + df.loc[index, "ALT"] = f'N]{row["ChromosomeB"]}:{row["PosB"]}]' + elif row["Strand1"] == "-" and row["Strand2"] == "+": + df.loc[index, "ALT"] = f'N]{row["ChromosomeB"]}:{row["PosB"]}]' + else: + df.loc[index, "ALT"] = f'N[{row["ChromosomeB"]}:{row["PosB"]}[' + + df.loc[index, "INFO"] = ( + f"SVTYPE=BND;CHRA={row['ChromosomeA']};CHRB={row['ChromosomeB']};GENEA={row['GeneA']};GENEB={row['GeneB']};" + f"POSA={row['PosA']};POSB={row['PosB']};ORIENTATION={row['Strand1']},{row['Strand2']};FOUND_DB={row['FOUND_DB']};" + f"FOUND_IN={row['FOUND_IN']};TOOL_HITS={row['TOOLS_HITS']};SCORE={row['SCORE']};FRAME_STATUS={row['PROT_FUSION_TYPE']};" + f"TRANSCRIPT_ID_A={row['CDS_LEFT_ID']};TRANSCRIPT_ID_B={row['CDS_RIGHT_ID']};" + f"TRANSCRIPT_VERSION_A={row['Left_transcript_version']};TRANSCRIPT_VERSION_B={row['Right_transcript_version']};" + f"HGNC_ID_A={row['Left_hgnc_id']};HGNC_ID_B={row['Right_hgnc_id']};" + f"EXON_NUMBER_A={row['Left_exon_number']};EXON_NUMBER_B={row['Right_exon_number']};" + f"ANNOTATIONS={row['annots']}" + ) + df.loc[index, "Sample"] = ( + f"./1:{row['JunctionReadCount']}:{row['SpanningFragCount']}:{row['FFPM']}" + ) + + return df + + +def write_vcf(df_to_print: pd.DataFrame, header: str, out_file: str) -> None: + """ + Write a VCF file with a specified DataFrame, header, and output file path. + """ + df_to_print[ + [ + "ChromosomeA", + "PosA", + "ID", + "REF", + "ALT", + "QUAL", + "FILTER", + "INFO", + "FORMAT", + "Sample", + ] + ].to_csv( + path_or_buf=out_file, sep="\t", header=None, index=False, quoting=csv.QUOTE_NONE + ) + + with open(out_file, "r+") as f: + content = f.read() + f.seek(0, 0) + f.write(header.rstrip("\r\n") + "\n" + content) + + +def build_hgnc_dataframe(file: str) -> pd.DataFrame: + """ + Build a DataFrame from HGNC input file, extracting 'hgnc_id' and 'ensembl_gene_id' columns. + """ + df = pd.read_csv(file, sep="\t", low_memory=False) + df["hgnc_id"] = df["hgnc_id"].str.replace("HGNC:", "") + return df[["hgnc_id", "ensembl_gene_id", "symbol"]].dropna() + + +def build_gtf_dataframe(file: str) -> pd.DataFrame: + """ + Build a DataFrame from GTF file converted in TSV, extracting relevant columns. + """ + df = pd.read_csv(file, sep="\t") + df[["fusion_dump", "Transcript_id"]] = df["transcript_id"].str.split( + "^", expand=True + ) + df[["orig_chromosome", "orig_start", "orig_end", "orig_dir"]] = df[ + "orig_coord_info" + ].str.split(",", expand=True) + return df[ + ["Transcript_id", "transcript_version", "exon_number", "orig_start", "orig_end"] + ] + + +def main(argv=None): + """Coordinate argument parsing and program execution.""" + args = parse_args(argv) + if ( + not args.fusioninspector.is_file() + or not args.fusionreport.is_file() + or not args.fusioninspector_gtf + or not args.fusionreport_csv + or not args.hgnc + ): + logger.error( + f"The given input file {args.fusioninspector} or {args.fusionreport} was not found!" + ) + sys.exit(2) + vcf_collect( + args.fusioninspector, + args.fusionreport, + args.fusioninspector_gtf, + args.fusionreport_csv, + args.hgnc, + args.sample, + args.out, + ) + + +if __name__ == "__main__": + sys.exit(main()) diff --git a/conf/base.config b/conf/base.config index c0779b55..23e2429b 100644 --- a/conf/base.config +++ b/conf/base.config @@ -10,7 +10,6 @@ process { - // TODO nf-core: Check the defaults for all processes cpus = { 1 * task.attempt } memory = { 6.GB * task.attempt } time = { 4.h * task.attempt } @@ -24,7 +23,6 @@ process { // These labels are used and recognised by default in DSL2 files hosted on nf-core/modules. // If possible, it would be nice to keep the same label naming convention when // adding in your local modules too. - // TODO nf-core: Customise requirements for specific processes. // See https://www.nextflow.io/docs/latest/config.html#config-process-selectors withLabel:process_single { cpus = { 1 } diff --git a/conf/igenomes.config b/conf/igenomes.config index 3f114377..d608b45b 100644 --- a/conf/igenomes.config +++ b/conf/igenomes.config @@ -23,6 +23,7 @@ params { mito_name = "MT" macs_gsize = "2.7e9" blacklist = "${projectDir}/assets/blacklists/GRCh37-blacklist.bed" + mirtrace_species = "hsa" } 'GRCh38' { fasta = "${params.igenomes_base}/Homo_sapiens/NCBI/GRCh38/Sequence/WholeGenomeFasta/genome.fa" @@ -35,6 +36,7 @@ params { mito_name = "chrM" macs_gsize = "2.7e9" blacklist = "${projectDir}/assets/blacklists/hg38-blacklist.bed" + mirtrace_species = "hsa" } 'CHM13' { fasta = "${params.igenomes_base}/Homo_sapiens/UCSC/CHM13/Sequence/WholeGenomeFasta/genome.fa" @@ -56,6 +58,7 @@ params { mito_name = "MT" macs_gsize = "1.87e9" blacklist = "${projectDir}/assets/blacklists/GRCm38-blacklist.bed" + mirtrace_species = "mmu" } 'TAIR10' { fasta = "${params.igenomes_base}/Arabidopsis_thaliana/Ensembl/TAIR10/Sequence/WholeGenomeFasta/genome.fa" @@ -67,6 +70,7 @@ params { bed12 = "${params.igenomes_base}/Arabidopsis_thaliana/Ensembl/TAIR10/Annotation/Genes/genes.bed" readme = "${params.igenomes_base}/Arabidopsis_thaliana/Ensembl/TAIR10/Annotation/README.txt" mito_name = "Mt" + mirtrace_species = "ath" } 'EB2' { fasta = "${params.igenomes_base}/Bacillus_subtilis_168/Ensembl/EB2/Sequence/WholeGenomeFasta/genome.fa" @@ -77,6 +81,7 @@ params { gtf = "${params.igenomes_base}/Bacillus_subtilis_168/Ensembl/EB2/Annotation/Genes/genes.gtf" bed12 = "${params.igenomes_base}/Bacillus_subtilis_168/Ensembl/EB2/Annotation/Genes/genes.bed" readme = "${params.igenomes_base}/Bacillus_subtilis_168/Ensembl/EB2/Annotation/README.txt" + // mirtrace_species = "bsu" } 'UMD3.1' { fasta = "${params.igenomes_base}/Bos_taurus/Ensembl/UMD3.1/Sequence/WholeGenomeFasta/genome.fa" @@ -88,6 +93,7 @@ params { bed12 = "${params.igenomes_base}/Bos_taurus/Ensembl/UMD3.1/Annotation/Genes/genes.bed" readme = "${params.igenomes_base}/Bos_taurus/Ensembl/UMD3.1/Annotation/README.txt" mito_name = "MT" + mirtrace_species = "bta" } 'WBcel235' { fasta = "${params.igenomes_base}/Caenorhabditis_elegans/Ensembl/WBcel235/Sequence/WholeGenomeFasta/genome.fa" @@ -99,6 +105,7 @@ params { bed12 = "${params.igenomes_base}/Caenorhabditis_elegans/Ensembl/WBcel235/Annotation/Genes/genes.bed" mito_name = "MtDNA" macs_gsize = "9e7" + mirtrace_species = "cel" } 'CanFam3.1' { fasta = "${params.igenomes_base}/Canis_familiaris/Ensembl/CanFam3.1/Sequence/WholeGenomeFasta/genome.fa" @@ -110,6 +117,7 @@ params { bed12 = "${params.igenomes_base}/Canis_familiaris/Ensembl/CanFam3.1/Annotation/Genes/genes.bed" readme = "${params.igenomes_base}/Canis_familiaris/Ensembl/CanFam3.1/Annotation/README.txt" mito_name = "MT" + mirtrace_species = "cfa" } 'GRCz10' { fasta = "${params.igenomes_base}/Danio_rerio/Ensembl/GRCz10/Sequence/WholeGenomeFasta/genome.fa" @@ -120,6 +128,7 @@ params { gtf = "${params.igenomes_base}/Danio_rerio/Ensembl/GRCz10/Annotation/Genes/genes.gtf" bed12 = "${params.igenomes_base}/Danio_rerio/Ensembl/GRCz10/Annotation/Genes/genes.bed" mito_name = "MT" + mirtrace_species = "dre" } 'BDGP6' { fasta = "${params.igenomes_base}/Drosophila_melanogaster/Ensembl/BDGP6/Sequence/WholeGenomeFasta/genome.fa" @@ -131,6 +140,7 @@ params { bed12 = "${params.igenomes_base}/Drosophila_melanogaster/Ensembl/BDGP6/Annotation/Genes/genes.bed" mito_name = "M" macs_gsize = "1.2e8" + mirtrace_species = "dme" } 'EquCab2' { fasta = "${params.igenomes_base}/Equus_caballus/Ensembl/EquCab2/Sequence/WholeGenomeFasta/genome.fa" @@ -142,6 +152,7 @@ params { bed12 = "${params.igenomes_base}/Equus_caballus/Ensembl/EquCab2/Annotation/Genes/genes.bed" readme = "${params.igenomes_base}/Equus_caballus/Ensembl/EquCab2/Annotation/README.txt" mito_name = "MT" + // mirtrace_species = "ecb" } 'EB1' { fasta = "${params.igenomes_base}/Escherichia_coli_K_12_DH10B/Ensembl/EB1/Sequence/WholeGenomeFasta/genome.fa" @@ -152,6 +163,7 @@ params { gtf = "${params.igenomes_base}/Escherichia_coli_K_12_DH10B/Ensembl/EB1/Annotation/Genes/genes.gtf" bed12 = "${params.igenomes_base}/Escherichia_coli_K_12_DH10B/Ensembl/EB1/Annotation/Genes/genes.bed" readme = "${params.igenomes_base}/Escherichia_coli_K_12_DH10B/Ensembl/EB1/Annotation/README.txt" + // mirtrace_species = "ecd" } 'Galgal4' { fasta = "${params.igenomes_base}/Gallus_gallus/Ensembl/Galgal4/Sequence/WholeGenomeFasta/genome.fa" @@ -162,6 +174,7 @@ params { gtf = "${params.igenomes_base}/Gallus_gallus/Ensembl/Galgal4/Annotation/Genes/genes.gtf" bed12 = "${params.igenomes_base}/Gallus_gallus/Ensembl/Galgal4/Annotation/Genes/genes.bed" mito_name = "MT" + mirtrace_species = "gga" } 'Gm01' { fasta = "${params.igenomes_base}/Glycine_max/Ensembl/Gm01/Sequence/WholeGenomeFasta/genome.fa" @@ -172,6 +185,7 @@ params { gtf = "${params.igenomes_base}/Glycine_max/Ensembl/Gm01/Annotation/Genes/genes.gtf" bed12 = "${params.igenomes_base}/Glycine_max/Ensembl/Gm01/Annotation/Genes/genes.bed" readme = "${params.igenomes_base}/Glycine_max/Ensembl/Gm01/Annotation/README.txt" + // mirtrace_species = "gmx" } 'Mmul_1' { fasta = "${params.igenomes_base}/Macaca_mulatta/Ensembl/Mmul_1/Sequence/WholeGenomeFasta/genome.fa" @@ -183,6 +197,7 @@ params { bed12 = "${params.igenomes_base}/Macaca_mulatta/Ensembl/Mmul_1/Annotation/Genes/genes.bed" readme = "${params.igenomes_base}/Macaca_mulatta/Ensembl/Mmul_1/Annotation/README.txt" mito_name = "MT" + // mirtrace_species = "mcc" } 'IRGSP-1.0' { fasta = "${params.igenomes_base}/Oryza_sativa_japonica/Ensembl/IRGSP-1.0/Sequence/WholeGenomeFasta/genome.fa" @@ -193,6 +208,7 @@ params { gtf = "${params.igenomes_base}/Oryza_sativa_japonica/Ensembl/IRGSP-1.0/Annotation/Genes/genes.gtf" bed12 = "${params.igenomes_base}/Oryza_sativa_japonica/Ensembl/IRGSP-1.0/Annotation/Genes/genes.bed" mito_name = "Mt" + mirtrace_species = "osa" } 'CHIMP2.1.4' { fasta = "${params.igenomes_base}/Pan_troglodytes/Ensembl/CHIMP2.1.4/Sequence/WholeGenomeFasta/genome.fa" @@ -204,6 +220,7 @@ params { bed12 = "${params.igenomes_base}/Pan_troglodytes/Ensembl/CHIMP2.1.4/Annotation/Genes/genes.bed" readme = "${params.igenomes_base}/Pan_troglodytes/Ensembl/CHIMP2.1.4/Annotation/README.txt" mito_name = "MT" + mirtrace_species = "ptr" } 'Rnor_5.0' { fasta = "${params.igenomes_base}/Rattus_norvegicus/Ensembl/Rnor_5.0/Sequence/WholeGenomeFasta/genome.fa" @@ -214,6 +231,7 @@ params { gtf = "${params.igenomes_base}/Rattus_norvegicus/Ensembl/Rnor_5.0/Annotation/Genes/genes.gtf" bed12 = "${params.igenomes_base}/Rattus_norvegicus/Ensembl/Rnor_5.0/Annotation/Genes/genes.bed" mito_name = "MT" + mirtrace_species = "rno" } 'Rnor_6.0' { fasta = "${params.igenomes_base}/Rattus_norvegicus/Ensembl/Rnor_6.0/Sequence/WholeGenomeFasta/genome.fa" @@ -224,6 +242,7 @@ params { gtf = "${params.igenomes_base}/Rattus_norvegicus/Ensembl/Rnor_6.0/Annotation/Genes/genes.gtf" bed12 = "${params.igenomes_base}/Rattus_norvegicus/Ensembl/Rnor_6.0/Annotation/Genes/genes.bed" mito_name = "MT" + mirtrace_species = "rno" } 'R64-1-1' { fasta = "${params.igenomes_base}/Saccharomyces_cerevisiae/Ensembl/R64-1-1/Sequence/WholeGenomeFasta/genome.fa" @@ -235,6 +254,7 @@ params { bed12 = "${params.igenomes_base}/Saccharomyces_cerevisiae/Ensembl/R64-1-1/Annotation/Genes/genes.bed" mito_name = "MT" macs_gsize = "1.2e7" + // mirtrace_species = "sce" } 'EF2' { fasta = "${params.igenomes_base}/Schizosaccharomyces_pombe/Ensembl/EF2/Sequence/WholeGenomeFasta/genome.fa" @@ -247,6 +267,7 @@ params { readme = "${params.igenomes_base}/Schizosaccharomyces_pombe/Ensembl/EF2/Annotation/README.txt" mito_name = "MT" macs_gsize = "1.21e7" + // mirtrace_species = "spo" } 'Sbi1' { fasta = "${params.igenomes_base}/Sorghum_bicolor/Ensembl/Sbi1/Sequence/WholeGenomeFasta/genome.fa" @@ -257,6 +278,7 @@ params { gtf = "${params.igenomes_base}/Sorghum_bicolor/Ensembl/Sbi1/Annotation/Genes/genes.gtf" bed12 = "${params.igenomes_base}/Sorghum_bicolor/Ensembl/Sbi1/Annotation/Genes/genes.bed" readme = "${params.igenomes_base}/Sorghum_bicolor/Ensembl/Sbi1/Annotation/README.txt" + mirtrace_species = "sbi" } 'Sscrofa10.2' { fasta = "${params.igenomes_base}/Sus_scrofa/Ensembl/Sscrofa10.2/Sequence/WholeGenomeFasta/genome.fa" @@ -268,6 +290,7 @@ params { bed12 = "${params.igenomes_base}/Sus_scrofa/Ensembl/Sscrofa10.2/Annotation/Genes/genes.bed" readme = "${params.igenomes_base}/Sus_scrofa/Ensembl/Sscrofa10.2/Annotation/README.txt" mito_name = "MT" + mirtrace_species = "ssc" } 'AGPv3' { fasta = "${params.igenomes_base}/Zea_mays/Ensembl/AGPv3/Sequence/WholeGenomeFasta/genome.fa" @@ -278,6 +301,7 @@ params { gtf = "${params.igenomes_base}/Zea_mays/Ensembl/AGPv3/Annotation/Genes/genes.gtf" bed12 = "${params.igenomes_base}/Zea_mays/Ensembl/AGPv3/Annotation/Genes/genes.bed" mito_name = "Mt" + mirtrace_species = "zma" } 'hg38' { fasta = "${params.igenomes_base}/Homo_sapiens/UCSC/hg38/Sequence/WholeGenomeFasta/genome.fa" @@ -290,6 +314,7 @@ params { mito_name = "chrM" macs_gsize = "2.7e9" blacklist = "${projectDir}/assets/blacklists/hg38-blacklist.bed" + mirtrace_species = "hsa" } 'hg19' { fasta = "${params.igenomes_base}/Homo_sapiens/UCSC/hg19/Sequence/WholeGenomeFasta/genome.fa" @@ -303,6 +328,7 @@ params { mito_name = "chrM" macs_gsize = "2.7e9" blacklist = "${projectDir}/assets/blacklists/hg19-blacklist.bed" + mirtrace_species = "hsa" } 'mm10' { fasta = "${params.igenomes_base}/Mus_musculus/UCSC/mm10/Sequence/WholeGenomeFasta/genome.fa" @@ -316,6 +342,7 @@ params { mito_name = "chrM" macs_gsize = "1.87e9" blacklist = "${projectDir}/assets/blacklists/mm10-blacklist.bed" + mirtrace_species = "mmu" } 'bosTau8' { fasta = "${params.igenomes_base}/Bos_taurus/UCSC/bosTau8/Sequence/WholeGenomeFasta/genome.fa" @@ -326,6 +353,7 @@ params { gtf = "${params.igenomes_base}/Bos_taurus/UCSC/bosTau8/Annotation/Genes/genes.gtf" bed12 = "${params.igenomes_base}/Bos_taurus/UCSC/bosTau8/Annotation/Genes/genes.bed" mito_name = "chrM" + mirtrace_species = "bta" } 'ce10' { fasta = "${params.igenomes_base}/Caenorhabditis_elegans/UCSC/ce10/Sequence/WholeGenomeFasta/genome.fa" @@ -338,6 +366,7 @@ params { readme = "${params.igenomes_base}/Caenorhabditis_elegans/UCSC/ce10/Annotation/README.txt" mito_name = "chrM" macs_gsize = "9e7" + mirtrace_species = "cel" } 'canFam3' { fasta = "${params.igenomes_base}/Canis_familiaris/UCSC/canFam3/Sequence/WholeGenomeFasta/genome.fa" @@ -349,6 +378,7 @@ params { bed12 = "${params.igenomes_base}/Canis_familiaris/UCSC/canFam3/Annotation/Genes/genes.bed" readme = "${params.igenomes_base}/Canis_familiaris/UCSC/canFam3/Annotation/README.txt" mito_name = "chrM" + mirtrace_species = "cfa" } 'danRer10' { fasta = "${params.igenomes_base}/Danio_rerio/UCSC/danRer10/Sequence/WholeGenomeFasta/genome.fa" @@ -360,6 +390,7 @@ params { bed12 = "${params.igenomes_base}/Danio_rerio/UCSC/danRer10/Annotation/Genes/genes.bed" mito_name = "chrM" macs_gsize = "1.37e9" + mirtrace_species = "dre" } 'dm6' { fasta = "${params.igenomes_base}/Drosophila_melanogaster/UCSC/dm6/Sequence/WholeGenomeFasta/genome.fa" @@ -371,6 +402,7 @@ params { bed12 = "${params.igenomes_base}/Drosophila_melanogaster/UCSC/dm6/Annotation/Genes/genes.bed" mito_name = "chrM" macs_gsize = "1.2e8" + mirtrace_species = "dme" } 'equCab2' { fasta = "${params.igenomes_base}/Equus_caballus/UCSC/equCab2/Sequence/WholeGenomeFasta/genome.fa" @@ -382,6 +414,7 @@ params { bed12 = "${params.igenomes_base}/Equus_caballus/UCSC/equCab2/Annotation/Genes/genes.bed" readme = "${params.igenomes_base}/Equus_caballus/UCSC/equCab2/Annotation/README.txt" mito_name = "chrM" + // mirtrace_species = "ecb" } 'galGal4' { fasta = "${params.igenomes_base}/Gallus_gallus/UCSC/galGal4/Sequence/WholeGenomeFasta/genome.fa" @@ -393,6 +426,7 @@ params { bed12 = "${params.igenomes_base}/Gallus_gallus/UCSC/galGal4/Annotation/Genes/genes.bed" readme = "${params.igenomes_base}/Gallus_gallus/UCSC/galGal4/Annotation/README.txt" mito_name = "chrM" + mirtrace_species = "gga" } 'panTro4' { fasta = "${params.igenomes_base}/Pan_troglodytes/UCSC/panTro4/Sequence/WholeGenomeFasta/genome.fa" @@ -404,6 +438,7 @@ params { bed12 = "${params.igenomes_base}/Pan_troglodytes/UCSC/panTro4/Annotation/Genes/genes.bed" readme = "${params.igenomes_base}/Pan_troglodytes/UCSC/panTro4/Annotation/README.txt" mito_name = "chrM" + mirtrace_species = "ptr" } 'rn6' { fasta = "${params.igenomes_base}/Rattus_norvegicus/UCSC/rn6/Sequence/WholeGenomeFasta/genome.fa" @@ -414,6 +449,7 @@ params { gtf = "${params.igenomes_base}/Rattus_norvegicus/UCSC/rn6/Annotation/Genes/genes.gtf" bed12 = "${params.igenomes_base}/Rattus_norvegicus/UCSC/rn6/Annotation/Genes/genes.bed" mito_name = "chrM" + mirtrace_species = "rno" } 'sacCer3' { fasta = "${params.igenomes_base}/Saccharomyces_cerevisiae/UCSC/sacCer3/Sequence/WholeGenomeFasta/genome.fa" @@ -424,6 +460,7 @@ params { readme = "${params.igenomes_base}/Saccharomyces_cerevisiae/UCSC/sacCer3/Annotation/README.txt" mito_name = "chrM" macs_gsize = "1.2e7" + // mirtrace_species = "sce" } 'susScr3' { fasta = "${params.igenomes_base}/Sus_scrofa/UCSC/susScr3/Sequence/WholeGenomeFasta/genome.fa" @@ -435,6 +472,7 @@ params { bed12 = "${params.igenomes_base}/Sus_scrofa/UCSC/susScr3/Annotation/Genes/genes.bed" readme = "${params.igenomes_base}/Sus_scrofa/UCSC/susScr3/Annotation/README.txt" mito_name = "chrM" + mirtrace_species = "ssc" } } } diff --git a/conf/modules.config b/conf/modules.config index d203d2b6..967e820a 100644 --- a/conf/modules.config +++ b/conf/modules.config @@ -18,12 +18,175 @@ process { saveAs: { filename -> filename.equals('versions.yml') ? null : filename } ] - withName: FASTQC { + withName: 'ARRIBA_ARRIBA' { + publishDir = [ + path: { "${params.outdir}/arriba" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + ] + ext.prefix = { "${meta.id}.arriba" } + } + + withName: 'ARRIBA_DOWNLOAD' { + publishDir = [ + path: { "${params.genomes_base}/arriba" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + ] + } + + withName: 'ARRIBA_VISUALISATION' { + ext.when = { {!params.fusioninspector_only} && ({params.starfusion} || {params.all}) } + ext.prefix = { "${meta.id}_combined_fusions_arriba_visualisation" } + publishDir = [ + path: { "${params.outdir}/arriba_visualisation" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + ] + } + + withName: '.*ARRIBA_WORKFLOW:.*:CTATSPLICING_STARTOCANCERINTRONS' { + ext.args = {[ + bam ? "--vis" : "", + "--sample_name ${meta.id}", + ].join(" ")} + publishDir = [ + path: { "${params.outdir}/ctatsplicing/arriba" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + ] + } + + withName: '.*STARFUSION_WORKFLOW:.*:CTATSPLICING_STARTOCANCERINTRONS' { + ext.args = {[ + bam ? "--vis" : "", + "--sample_name ${meta.id}", + ].join(" ")} + publishDir = [ + path: { "${params.outdir}/ctatsplicing/starfusion" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + ] + } + + withName: 'GENCODE_DOWNLOAD' { + publishDir = [ + path: { "${params.genomes_base}/gencode" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + ] + } + + withName: 'FASTP' { + ext.args = params.trim_tail ? "--trim_tail1 ${params.trim_tail} --trim_tail2 ${params.trim_tail} " : '' + } + + withName: 'FASTQC' { ext.args = '--quiet' + ext.when = {!params.skip_qc} + publishDir = [ + path: { "${params.outdir}/fastqc" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + ] + } + + withName: 'FASTQC_FOR_FASTP' { + ext.args = '--quiet' + ext.when = { !params.skip_qc } + ext.prefix = { "${meta.id}_trimmed" } + publishDir = [ + path: { "${params.outdir}/fastqc_for_fastp" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + ] + } + + withName: 'FUSIONCATCHER' { + ext.args = "--limitSjdbInsertNsj ${params.fusioncatcher_limitSjdbInsertNsj}" + } + + withName: 'FUSIONCATCHER_DOWNLOAD' { + publishDir = [ + path: { "${params.genomes_base}/fusioncatcher" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + ] + } + + withName: '.*FUSIONINSPECTOR_WORKFLOW:.*:FUSIONINSPECTOR' { + ext.when = { !params.skip_vis } + ext.args = { ${params.fusioninspector_limitSjdbInsertNsj} != 1000000 ? "--STAR_xtra_params \"--limitSjdbInsertNsj ${params.fusioninspector_limitSjdbInsertNsj}\"" : '' } + ext.args2 = '--annotate --examine_coding_effect' + } + + withName: 'FUSIONREPORT' { + ext.when = { !params.skip_vis } + ext.args = { {params.no_cosmic} ? "--no-cosmic" : "" } + ext.args2 = "--export csv" + publishDir = [ + path: { "${params.outdir}/fusionreport/${meta.id}" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + ] + } + + withName: 'FUSIONREPORT_DOWNLOAD' { + ext.args = { {params.no_cosmic} ? "--no-cosmic" : " --cosmic_usr ${params.cosmic_username} --cosmic_passwd ${params.cosmic_passwd}" } + ext.args2 = { params.qiagen ? "--qiagen" : "" } + publishDir = [ + path: { "${params.genomes_base}" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + ] + } + + withName: 'GATK4_BEDTOINTERVALLIST' { + publishDir = [ + path: { "${params.genomes_base}/gencode" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + ] + } + + withName: 'GATK4_MARKDUPLICATES' { + ext.when = { {!params.skip_qc} && {!params.fusioninspector_only} && ( {params.starfusion}|| {params.all}) } + publishDir = [ + path: { "${params.outdir}/picard" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename }, + ] + } + + withName: 'GFFREAD' { + ext.args = { '-w -S' } + publishDir = [ + path: { "${params.genomes_base}/gffread" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename }, + ] + } + + withName: 'GTF_TO_REFFLAT' { + ext.args = "-genePredExt -geneNameAsName2 -ignoreGroupsWithoutExons" + publishDir = [ + path: { "${params.genomes_base}/gencode" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename }, + ] + } + + withName: 'HGNC_DOWNLOAD' { + publishDir = [ + path: { "${params.genomes_base}/hgnc" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename }, + ] } withName: 'MULTIQC' { - ext.args = { params.multiqc_title ? "--title \"$params.multiqc_title\"" : '' } + ext.when = { !params.skip_qc } + ext.args = {params.multiqc_title} ? "--title \"$params.multiqc_title\"" : '' publishDir = [ path: { "${params.outdir}/multiqc" }, mode: params.publish_dir_mode, @@ -31,4 +194,198 @@ process { ] } + withName: 'PICARD_COLLECTRNASEQMETRICS' { + ext.when = { {!params.skip_qc} && {!params.fusioninspector_only} && ( {params.starfusion} || {params.all}) } + + } + + withName: 'PICARD_COLLECTINSERTSIZEMETRICS' { + ext.when = { ${!params.skip_qc} && ${!params.fusioninspector_only} && (${params.starfusion} || ${params.all}) } + ext.prefix = { "${meta.id}_collectinsertsize"} + publishDir = [ + path: { "${params.outdir}/picard" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename }, + ] + } + + withName: 'SALMON_INDEX' { + publishDir = [ + path: { "${params.genomes_base}/salmon" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename }, + ] + } + + withName: 'SALMON_QUANT' { + ext.args = { [ + '--gcBias', + '--validateMappings' + ].join(' ') } + publishDir = [ + path: { "${params.outdir}/salmon" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename }, + ] + } + + withName: 'SAMTOOLS_FAIDX' { + publishDir = [ + path: { "${params.genomes_base}/gencode" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename }, + ] + } + + withName: 'SAMTOOLS_INDEX_FOR_ARRIBA' { + ext.prefix = { "${meta.id}_star_for_arriba_sorted" } + publishDir = [ + path: { "${params.outdir}/cram_arriba" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + ] + } + + withName: 'SAMTOOLS_SORT_FOR_ARRIBA' { + ext.prefix = { "${meta.id}_star_for_arriba_sorted" } + publishDir = [ + path: { "${params.outdir}/cram_arriba" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + ] + } + + withName: 'SAMTOOLS_VIEW_FOR_ARRIBA' { + ext.args = { "--output-fmt cram" } + ext.prefix = { "${meta.id}_star_for_arriba_sorted" } + publishDir = [ + path: { "${params.outdir}/cram_arriba" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + ] + } + + withName: 'SAMTOOLS_INDEX_FOR_STARFUSION' { + publishDir = [ + path: { "${params.outdir}/star_for_starfusion" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + ] + } + + withName: 'SAMTOOLS_INDEX_FOR_STARFUSION_CRAM' { + ext.prefix = { "${meta.id}.star_for_starfusion.Aligned.sortedByCoord.out" } + publishDir = [ + path: { "${params.outdir}/cram_starfusion" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + ] + } + + withName: 'SAMTOOLS_VIEW_FOR_STARFUSION' { + ext.args = { "--output-fmt cram" } + ext.prefix = { "${meta.id}.star_for_starfusion.Aligned.sortedByCoord.out" } + publishDir = [ + path: { "${params.outdir}/cram_starfusion" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + ] + } + + withName: 'STAR_FOR_ARRIBA' { + publishDir = [ + path: { "${params.outdir}/star_for_arriba" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename }, + ] + ext.args = '--readFilesCommand zcat \ + --outSAMtype BAM SortedByCoordinate \ + --outSAMunmapped Within \ + --outBAMcompression 0 \ + --outFilterMultimapNmax 50 \ + --peOverlapNbasesMin 10 \ + --alignSplicedMateMapLminOverLmate 0.5 \ + --alignSJstitchMismatchNmax 5 -1 5 5 \ + --chimSegmentMin 10 \ + --chimOutType WithinBAM HardClip Junctions \ + --chimJunctionOverhangMin 10 \ + --chimScoreDropMax 30 \ + --chimScoreJunctionNonGTAG 0 \ + --chimScoreSeparation 1 \ + --chimSegmentReadGapMax 3 \ + --chimMultimapNmax 50' + } + + withName: 'STAR_FOR_STARFUSION' { + publishDir = [ + path: { "${params.outdir}/star_for_starfusion" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename }, + ] + ext.args = '--twopassMode Basic \ + --outReadsUnmapped None \ + --readFilesCommand zcat \ + --outSAMtype BAM SortedByCoordinate \ + --outSAMstrandField intronMotif \ + --outSAMunmapped Within \ + --chimSegmentMin 12 \ + --chimJunctionOverhangMin 8 \ + --chimOutJunctionFormat 1 \ + --alignSJDBoverhangMin 10 \ + --alignMatesGapMax 100000 \ + --alignIntronMax 100000 \ + --alignSJstitchMismatchNmax 5 -1 5 5 \ + --chimMultimapScoreRange 3 \ + --chimScoreJunctionNonGTAG -4 \ + --chimMultimapNmax 20 \ + --chimNonchimScoreDropMin 10 \ + --peOverlapNbasesMin 12 \ + --peOverlapMMp 0.1 \ + --alignInsertionFlush Right \ + --alignSplicedMateMapLminOverLmate 0 \ + --alignSplicedMateMapLmin 30 \ + --chimOutType Junctions \ + --quantMode GeneCounts' + } + + withName: 'STAR_GENOMEGENERATE' { + ext.args = "--sjdbOverhang ${params.read_length - 1}" + cpus = { 24 * task.attempt } + memory = { 100.GB * task.attempt } + time = { 2.d * task.attempt } + publishDir = [ + path: { "${params.genomes_base}" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename }, + ] + } + + withName: 'NFCORE_RNAFUSION:BUILD_REFERENCES:STARFUSION_BUILD' { + cpus = { 24 * task.attempt } + memory = { 100.GB * task.attempt } + time = { 2.d * task.attempt } + publishDir = [ + path: { "${params.genomes_base}/starfusion" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename }, + ] + ext.args = "--max_readlength ${params.read_length} --human_gencode_filter" + } + + + withName: 'STRINGTIE_MERGE' { + publishDir = [ + path: { "${params.outdir}/stringtie/${meta.id}" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + ] + } + + withName: 'VCF_COLLECT' { + ext.when = { {!params.fusioninspector_only} && {!params.skip_vcf} } + } + + withName: '.*' { + ext.when = { !params.references_only || task.process.contains('BUILD_REFERENCES') } + } } diff --git a/conf/test.config b/conf/test.config index 56ff5304..ba937e85 100644 --- a/conf/test.config +++ b/conf/test.config @@ -5,11 +5,22 @@ Defines input files and everything required to run a fast and simple pipeline test. Use as follows: - nextflow run nf-core/rnafusion -profile test, --outdir + nextflow run nf-core/rnafusion -profile test, --outdir -stub ---------------------------------------------------------------------------------------- */ +params { + config_profile_name = 'Test profile' + config_profile_description = 'Minimal test dataset to check pipeline function' + + // Input data + input = params.pipelines_testdata_base_path +'rnafusion/testdata/human/samplesheet_valid.csv' + all = true + no_cosmic = true +} + +// Limit and standardize resources for github actions and reproducibility process { resourceLimits = [ cpus: 4, @@ -17,14 +28,3 @@ process { time: '1.h' ] } - -params { - config_profile_name = 'Test profile' - config_profile_description = 'Minimal test dataset to check pipeline function' - - // Input data - // TODO nf-core: Specify the paths to your test data on nf-core/test-datasets - // TODO nf-core: Give any required params for the test so that command line flags are not needed - input = params.pipelines_testdata_base_path + 'viralrecon/samplesheet/samplesheet_test_illumina_amplicon.csv'// Genome references - genome = 'R64-1-1' -} diff --git a/conf/test_build.config b/conf/test_build.config new file mode 100644 index 00000000..971e386a --- /dev/null +++ b/conf/test_build.config @@ -0,0 +1,61 @@ +/* +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + Nextflow config file for running minimal tests +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + Defines input files and everything required to run a fast and simple pipeline test. + + Use as follows: + nextflow run nf-core/rnafusion -profile test_build, --outdir + +---------------------------------------------------------------------------------------- +Uses a minimal fasta and gtf for testing purposes. +This test is designed to test the `build_references` subworkflow with the following processes on: +- SAMTOOLS_FAIDX +- HGNC_DOWNLOAD +- GATK4_CREATESEQUENCEDICTIONARY +- GET_RRNA_TRANSCRIPTS +- GATK4_BEDTOINTERVALLIST +- GTF_TO_REFFLAT +- GFFREAD +- STAR_GENOMEGENERATE +- ARRIBA_DOWNLOAD +- STARFUSION_BUILD +- FUSIONREPORT_DOWNLOAD + +It does not test the following processes of the `build_references` subworkflow: +- GENCODE_DOWNLOAD +- FUSIONCATCHER_BUILD + +It does not test the main rnafusion subworkflows by setting references_only = true. +*/ + +params { + config_profile_name = 'Test build references profile' + config_profile_description = 'Minimal test dataset to check pipeline function' + + // Input data + references_only = true + input = 'https://raw.githubusercontent.com/nf-core/test-datasets/rnafusion/testdata/human/samplesheet_valid.csv' + no_cosmic = true + all = false + arriba = true + fusioncatcher = false + starfusion = true + fasta = 'https://github.com/STAR-Fusion/STAR-Fusion-Tutorial/raw/master/minigenome.fa' + gtf = 'https://github.com/STAR-Fusion/STAR-Fusion-Tutorial/raw/master/minigenome.gtf' + fusionreport = true + + skip_salmon_index = true + starfusion_build = true + fusion_annot_lib = 'https://github.com/STAR-Fusion/STAR-Fusion-Tutorial/raw/master/CTAT_HumanFusionLib.mini.dat.gz' + +} + +// Limit and standardize resources for github actions and reproducibility +process { + resourceLimits = [ + cpus: 4, + memory: '15.GB', + time: '1.h' + ] +} diff --git a/conf/test_cosmic.config b/conf/test_cosmic.config new file mode 100644 index 00000000..9cc1bcda --- /dev/null +++ b/conf/test_cosmic.config @@ -0,0 +1,31 @@ +/* +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + Nextflow config file for running minimal tests +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + Defines input files and everything required to run a fast and simple pipeline test. + + Use as follows: + nextflow run nf-core/rnafusion -profile test_cosmic, --outdir -stub + +---------------------------------------------------------------------------------------- +*/ + +params { + config_profile_name = 'Test cosmic profile' + config_profile_description = 'Minimal test cosmic dataset to check pipeline function' + + // Input data + input = 'https://raw.githubusercontent.com/nf-core/test-datasets/rnafusion/testdata/human/samplesheet_valid.csv' + all = true + cosmic_username = secrets.COSMIC_USERNAME + cosmic_passwd = secrets.COSMIC_PASSWD +} + +// Limit and standardize resources for github actions and reproducibility +process { + resourceLimits = [ + cpus: 4, + memory: '15.GB', + time: '1.h' + ] +} diff --git a/conf/test_full.config b/conf/test_full.config index 7662ac58..cfcb7865 100644 --- a/conf/test_full.config +++ b/conf/test_full.config @@ -3,10 +3,8 @@ Nextflow config file for running full-size tests ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Defines input files and everything required to run a full size pipeline test. - Use as follows: - nextflow run nf-core/rnafusion -profile test_full, --outdir - + nextflow run nf-core/sarek -profile test_full, --outdir ---------------------------------------------------------------------------------------- */ @@ -15,10 +13,10 @@ params { config_profile_description = 'Full test dataset to check pipeline function' // Input data for full size test - // TODO nf-core: Specify the paths to your full test data ( on nf-core/test-datasets or directly in repositories, e.g. SRA) - // TODO nf-core: Give any required params for the test so that command line flags are not needed - input = params.pipelines_testdata_base_path + 'viralrecon/samplesheet/samplesheet_full_illumina_amplicon.csv' + input = 'https://raw.githubusercontent.com/nf-core/test-datasets/rnafusion/testdata/human/samplesheet_valid.csv' + all = true + // TODO + // test_full can't run currently because the references must be given and they are not available. + // This profile should be updated once they get uploaded. - // Genome references - genome = 'R64-1-1' -} + } diff --git a/docs/images/BTB_logo.png b/docs/images/BTB_logo.png new file mode 100644 index 00000000..6a197b80 Binary files /dev/null and b/docs/images/BTB_logo.png differ diff --git a/docs/images/BTB_logo.svg b/docs/images/BTB_logo.svg new file mode 100644 index 00000000..099f1101 --- /dev/null +++ b/docs/images/BTB_logo.svg @@ -0,0 +1,184 @@ + + + +image/svg+xml \ No newline at end of file diff --git a/docs/images/NGI_logo.png b/docs/images/NGI_logo.png new file mode 100644 index 00000000..3f4b769e Binary files /dev/null and b/docs/images/NGI_logo.png differ diff --git a/docs/images/NGI_logo.svg b/docs/images/NGI_logo.svg new file mode 100644 index 00000000..aef40fd8 --- /dev/null +++ b/docs/images/NGI_logo.svg @@ -0,0 +1,333 @@ + + + +image/svg+xml \ No newline at end of file diff --git a/docs/images/SDU_logo.png b/docs/images/SDU_logo.png new file mode 100644 index 00000000..38e60b4b Binary files /dev/null and b/docs/images/SDU_logo.png differ diff --git a/docs/images/SciLifeLab_logo.png b/docs/images/SciLifeLab_logo.png new file mode 100644 index 00000000..bc4dbda6 Binary files /dev/null and b/docs/images/SciLifeLab_logo.png differ diff --git a/docs/images/SciLifeLab_logo.svg b/docs/images/SciLifeLab_logo.svg new file mode 100644 index 00000000..b8a44b79 --- /dev/null +++ b/docs/images/SciLifeLab_logo.svg @@ -0,0 +1,99 @@ + + + +image/svg+xml \ No newline at end of file diff --git a/docs/images/nf-core-rnafusion_metro_map.png b/docs/images/nf-core-rnafusion_metro_map.png new file mode 100644 index 00000000..76b6bc3f Binary files /dev/null and b/docs/images/nf-core-rnafusion_metro_map.png differ diff --git a/docs/images/nf-core-rnafusion_metro_map.svg b/docs/images/nf-core-rnafusion_metro_map.svg new file mode 100644 index 00000000..1cd7980f --- /dev/null +++ b/docs/images/nf-core-rnafusion_metro_map.svg @@ -0,0 +1,754 @@ + + + + + + + + + + + + + + + fastq + + + + + + + + + + + + + + + txt + + + + + + + + + + + + + + StringTie + Arriba + + + + + align + fastptrimming + + FusionCatcher + + + align + + + + + + + + + + + + + + STAR-Fusion + FastQC + MultiQC + FusionInspector + fusion-report + Picard:- CollectRnaSeqMetrics- CollectWgsMetrics- CollectInsertSizeMetrics + + + + + + + + + + + + + + + + + + fusioncatcher + starfusion + qc + + Workflows: + + + + arriba + stringtie + + Arribavisualisation + VCFcollect + + + + diff --git a/docs/images/rnafusion_logo.png b/docs/images/rnafusion_logo.png new file mode 100644 index 00000000..548f1578 Binary files /dev/null and b/docs/images/rnafusion_logo.png differ diff --git a/docs/images/rnafusion_logo.svg b/docs/images/rnafusion_logo.svg new file mode 100644 index 00000000..fcd196c8 --- /dev/null +++ b/docs/images/rnafusion_logo.svg @@ -0,0 +1,208 @@ + +image/svg+xmlnf- + +core/ + +rnafusion + + \ No newline at end of file diff --git a/docs/images/summary_graph_1.png b/docs/images/summary_graph_1.png new file mode 100644 index 00000000..61d32e59 Binary files /dev/null and b/docs/images/summary_graph_1.png differ diff --git a/docs/images/summary_graph_2.png b/docs/images/summary_graph_2.png new file mode 100644 index 00000000..75dffc13 Binary files /dev/null and b/docs/images/summary_graph_2.png differ diff --git a/docs/images/summary_graph_3.png b/docs/images/summary_graph_3.png new file mode 100644 index 00000000..6b87cff6 Binary files /dev/null and b/docs/images/summary_graph_3.png differ diff --git a/docs/output.md b/docs/output.md index 501e1950..ad3c78e3 100644 --- a/docs/output.md +++ b/docs/output.md @@ -6,16 +6,150 @@ This document describes the output produced by the pipeline. Most of the plots a The directories listed below will be created in the results directory after the pipeline has finished. All paths are relative to the top-level results directory. - - -## Pipeline overview +## Pipeline overview The pipeline is built using [Nextflow](https://www.nextflow.io/) and processes data using the following steps: +- [Download and build references](#references) - Build references needed to run the rest of the pipeline +- [STAR](#star) - Alignment for arriba, and STAR-fusion +- [Cat](#cat) - Concatenate fastq files per sample ID +- [Arriba](#arriba) - Arriba fusion detection +- [STAR-fusion](#starfusion) - STAR-fusion fusion detection +- [StringTie](#stringtie) - StringTie assembly +- [FusionCatcher](#fusioncatcher) - Fusion catcher fusion detection +- [CTAT-SPLICING](#ctat-splicing) - Detection and annotation of cancer splicing aberrations +- [Samtools](#samtools) - SAM/BAM file manipulation +- [Fusion-report](#fusion-report) - Summary of the findings of each tool and comparison to COSMIC, Mitelman, and FusionGDB2 databases +- [FusionInspector](#fusionInspector) - Supervised analysis of fusion predictions from fusion-report, recover and re-score evidence for such predictions +- [Arriba visualisation](#arriba-visualisation) - Arriba visualisation report for FusionInspector fusions +- [Picard](#picard) - Collect QC metrics - [FastQC](#fastqc) - Raw read QC +- [Salmon](#salmon) - Normalized gene expression calculation - [MultiQC](#multiqc) - Aggregate report describing results and QC from the whole pipeline - [Pipeline information](#pipeline-information) - Report metrics generated during the workflow execution +## Download and build references + +
+Output reference files and folder structure + +### References directory structure + +- `references/` + - `arriba` + - `blacklist_hg38_GRCh38_v2.1.0.tsv.gz` + - `protein_domains_hg38_GRCh38_v2.1.0.gff3` + - `cytobands_hg38_GRCh38_v2.1.0.tsv` + - `ensembl` + - `Homo_sapiens.GRCh38.{ensembl_version}.all.fa` + - `Homo_sapiens.GRCh38.{ensembl_version}.cdna.all.fa.gz` + - `Homo_sapiens.GRCh38.{ensembl_version}.gtf` + - `Homo_sapiens.GRCh38.{ensembl_version}.chr.gtf` + - `Homo_sapiens.GRCh38.{ensembl_version}.chr.gtf.refflat` + - `Homo_sapiens.GRCh38.{ensembl_version}.interval_list` + - `fusioncatcher` + - `human_v` - dir with all references for fusioncatcher + - `fusion_report_db` + - `cosmic.db` + - `fusiongdb2.db` + - `mitelman.db` + - `star` - dir with STAR index + - `starfusion` + - files and dirs used to build the index + - `ctat_genome_lib_build_dir` - dir containing the index + +(Only files or folders used by the pipeline are mentioned explicitly.) + +
+ +## Main pipeline workflow + +> If no argument is specified here, the tool was used with default parameters. + +### Directory structure + +```text +{outdir} +├── arriba +├── arriba_visualisation +├── cram_arriba +├── cram_starfusion +├── fastp +├── fastqc +├── fusioncatcher +├── fusioninspector +├── fusionreport +├── kallisto_quant +├── megafusion +├── multiqc +├── picard +├── pipeline_info +├── samtools_sort_for_arriba +├── star_for_arriba +├── star_for_starfusion +├── starfusion +└── work +.nextflow.log +``` + +If no parameters are specified, the default is applied. + +### Arriba + +[Arriba](https://arriba.readthedocs.io/en/latest/) is used for i) detect gene fusions and ii) create a PDF report for the fusions found (visualisation): + +#### Detection + +
+Output files + +- `arriba/` + - `.arriba.fusions.tsv` - contains the identified fusions + - `.arriba.fusions.discarded.tsv` + +
+ +#### Visualisation + +
+Output files + +- `arriba_visualisation/` + - `_combined_fusions_arriba_visualisation.pdf` + +
+ +The visualisation displays the fusions that fusioninspector outputs. That means that fusions from all callers are aggregated (by fusion-report) and then analyzed through fusioninspector (Note: Fusioninspecor contains a filtering step!). + +### Cat + +
+Output files + +- `cat/` + - `_1.merged.fastq.gz` + - `_2.merged.fastq.gz` + +
+ +If multiple libraries or runs have been provided for the same sample in the input samplesheet (e.g. to increase sequencing depth) then these will be merged at the very beginning of the pipeline in order to have consistent sample naming throughout the pipeline. Please refer to the [usage](https://nf-co.re/rnafusion/usage#samplesheet-input) documentation to see how to specify these samples in the input samplesheet. + +### Fastp + +If `--trim_fastp` is selected, [fastp](https://github.com/OpenGene/fastp) will filter low quality reads as well as bases at the 5' and 3' ends, trim adapters (automatically detected, but input with parameter `--adapter_fasta` is possible). 3' trimming is also possible via parameter `--trim_tail`. + +
+Output files + +- `fastp/` + - `_1.fastp.fastq.gz` + - `_2.fastp.fastq.gz` + - `.fastp.html` + - `.fastp.json` + - `.fastp.log` + +
+ ### FastQC
@@ -31,6 +165,153 @@ The pipeline is built using [Nextflow](https://www.nextflow.io/) and processes d ### MultiQC +![MultiQC - FastQC sequence counts plot](images/mqc_fastqc_counts.png) + +![MultiQC - FastQC mean quality scores plot](images/mqc_fastqc_quality.png) + +![MultiQC - FastQC adapter content plot](images/mqc_fastqc_adapter.png) + +:::note +The FastQC plots displayed in the MultiQC report shows _untrimmed_ reads. They may contain adapter sequence and potentially regions with low quality. +::: + +### FusionCatcher + +
+Output files + +- `fusioncatcher` + - `.fusioncatcher.fusion-genes.txt` + - `.fusioncatcher.summary.txt` + - `.fusioncatcher.log` + +
+ +[FusionCatcher](https://github.com/ndaniel/fusioncatcher) searches for novel/known somatic fusion genes translocations, and chimeras in RNA-seq data. Possibility to use parameter `--fusioncatcher_limitSjdbInsertNsj` to modify limitSjdbInsertNsj. + +### CTAT-SPLICING + +
+Output files + +- `ctatsplicing` + - `arriba` + - `.cancer_intron_reads.sorted.bam` + - `.cancer_intron_reads.sorted.bam.bai` + - `.cancer.introns` + - `.cancer.introns.prelim` + - `.chckpts` + - `.ctat-splicing.igv.html` + - `.gene_reads.sorted.sifted.bam` + - `.gene_reads.sorted.sifted.bam.bai` + - `.igv.tracks` + - `.introns` + - `.introns.for_IGV.bed` + - `starfusion` + - `.cancer_intron_reads.sorted.bam` + - `.cancer_intron_reads.sorted.bam.bai` + - `.cancer.introns` + - `.cancer.introns.prelim` + - `.chckpts` + - `.ctat-splicing.igv.html` + - `.gene_reads.sorted.sifted.bam` + - `.gene_reads.sorted.sifted.bam.bai` + - `.igv.tracks` + - `.introns` + - `.introns.for_IGV.bed` + +
+ +[CTAT-SPLICING](https://github.com/TrinityCTAT/CTAT-SPLICING/wiki) detects and annotates of aberrant splicing isoforms in cancer. This is run on the input files for `arriba` and/or `starfusion`. + +### FusionInspector + +
+Output files + +- `fusioninspector` + - `.fusion_inspector_web.html` - visualisation report described in details [here](https://github.com/FusionInspector/FusionInspector/wiki/FusionInspector-Visualizations) + - `FusionInspector.log` + - `.FusionInspector.fusions.abridged.tsv` + +
+ +[FusionInspector](https://github.com/FusionInspector/FusionInspector/tree/master) performs a validation of fusion transcript predictions. Possibility to use `--fusioninspector_limitSjdbInsertNsj` to set limitSjdbInsertNsj to anything other than the default 1000000. + +### Fusion-report + +Please note that fusion-report is executed from fork https://github.com/Clinical-Genomics/fusion-report + +
+Output files + +- `fusionreport` + - + - `.fusionreport.tsv` + - `.fusionreport_filtered.tsv` + - `_fusionreport_index.html` - general report for all filtered fusions + - `.fusions.csv` - index in csv format + - `_.html` - specific report for each filtered fusion + +
+ +[Fusion-report](https://github.com/matq007/fusion-report) is a tool for parsing outputs from fusion detection tools. +The score is explained here: . Summary: + +The weights for databases are as follows: + +- COSMIC (50) +- MITELMAN (50) +- FusionGDB2 (0) + +The final formula for calculating score is: + +$$ +score = 0.5 * \sum_{tool}^{tools} f(fusion, tool)*w(tool) + 0.5 * \sum_{db}^{dbs} g(fusion, db)*w(db) +$$ + +All tools have the same weight. + +### Salmon + +
+Output files + +- `salmon` + - `` + +
+ +Folder containing the quantification results + +### Kallisto + +
+Output files + +- `kallisto` + - `.kallisto_quant.fusions.txt` + +
+ +Quantifying abundances of transcripts from bulk and single-cell RNA-Seq data, or more generally of target sequences using high-throughput sequencing reads. + +### Vcf_collect + +
+Output files + +- `vcf_collect` + - `_fusion_data.vcf` - contains the fusions in vcf format with collected statistics. + +Vcf-collect takes as input the results of fusion-report and fusioninspector. That means fusions from all tools are aggregated. Fusioninspector applies a filter so it is possible some fusions detected by a caller are not filtered out by fusioninspector. In those cases, vcf-collect will display the fusions, but a lot of data will be missing as fusioninspector performs the analysis for each fusion. + +
+ +[Megafusion](https://github.com/J35P312/MegaFusion) converts RNA fusion files to SV VCF and collects statistics and metrics in a VCF file. + +### MultiQC +
Output files @@ -47,6 +328,150 @@ Results generated by MultiQC collate pipeline QC from supported tools e.g. FastQ ### Pipeline information +### Picard + +
+Output files + +Picard CollectRnaMetrics and picard MarkDuplicates share the same output directory. + +- `picard` + - `.MarkDuplicates.metrics.txt` - metrics from MarkDuplicates + - `_rna_metrics.txt` - metrics from CollectRnaMetrics + - `_insert_size_metrics.txt.txt` - metrics from CollectInsertSizeMetrics + - `.bam` - BAM file with marked duplicates + +
+ +### Samtools + +#### Samtools sort + +Samtools sort is used to sort BAM files from STAR_FOR_STARFUSION (for arriba visualisation) + +
+Output files + +- `samtools_sort_for_` + - `(_chimeric)_sorted.bam` - sorted BAM file + +
+ +#### Samtools index + +Samtools index is used to index BAM files from STAR_FOR_ARRIBA (for arriba visualisation) and STAR_FOR_STARFUSION (for QC) + +
+Output files + +- `samtools_for_` + - `.(Aligned.sortedByCoord).out.bam.bai` - + +
+ +### STAR + +STAR is used to align to genome reference + +STAR is run for 3 tools: + +For `arriba` with the parameters: + +```bash +--readFilesCommand zcat \ +--outSAMtype BAM Unsorted \ +--outSAMunmapped Within \ +--outBAMcompression 0 \ +--outFilterMultimapNmax 50 \ +--peOverlapNbasesMin 10 \ +--alignSplicedMateMapLminOverLmate 0.5 \ +--alignSJstitchMismatchNmax 5 -1 5 5 \ +--chimSegmentMin 10 \ +--chimOutType WithinBAM HardClip \ +--chimJunctionOverhangMin 10 \ +--chimScoreDropMax 30 \ +--chimScoreJunctionNonGTAG 0 \ +--chimScoreSeparation 1 \ +--chimSegmentReadGapMax 3 \ +--chimMultimapNmax 50 +``` + +For `STAR-fusion` with the parameters: + +```bash +--twopassMode Basic \ +--outReadsUnmapped None \ +--readFilesCommand zcat \ +--outSAMstrandField intronMotif \ +--outSAMunmapped Within \ +--chimSegmentMin 12 \ +--chimJunctionOverhangMin 8 \ +--chimOutJunctionFormat 1 \ +--alignSJDBoverhangMin 10 \ +--alignMatesGapMax 100000 \ +--alignIntronMax 100000 \ +--alignSJstitchMismatchNmax 5 -1 5 5 \ +--chimMultimapScoreRange 3 \ +--chimScoreJunctionNonGTAG -4 \ +--chimMultimapNmax 20 \ +--chimNonchimScoreDropMin 10 \ +--peOverlapNbasesMin 12 \ +--peOverlapMMp 0.1 \ +--alignInsertionFlush Right \ +--alignSplicedMateMapLminOverLmate 0 \ +--alignSplicedMateMapLmin 30 \ +--chimOutType Junctions \ +--quantMode GeneCounts +``` + +> STAR_FOR_STARFUSION uses `${params.ensembl}/Homo_sapiens.GRCh38.${params.ensembl_version}.chr.gtf` whereas STAR_FOR_ARRIBA uses `${params.ensembl_ref}/Homo_sapiens.GRCh38.${params.ensembl_version}.gtf` + +
+Output files + +**Common** + +- `star_for_` +- `.Log.final.out` +- `.Log.progress.out` +- `.SJ.out.tab` + +**For arriba:** + +- `.Aligned.out.bam` + + **For starfusion:** + +- `.Aligned.sortedByCoord.out.bam` +- `.Chimeric.out.junction` +- `.ReadsPerGene.out.tab` + +
+ +The STAR index is generated with `--sjdbOverhang ${params.read_length - 1}`, params.read_length default is 100. + +### STAR-fusion + +
+Output files + +- `starfusion` + - `.starfusion.fusion_predictions.tsv` - contains the identified fusions + - `.starfusion.abridged.tsv` - contains the identified fusions abridged + - `starfusion.abridged.coding_effect.tsv` + +
+ +### StringTie + +
+Output files + +- `stringtie//stringtie.merged.gtf` - merged gtf from annotation and stringtie output gtfs +
+ +### Pipeline information +
Output files diff --git a/docs/usage.md b/docs/usage.md index 73894cb1..f0afcebb 100644 --- a/docs/usage.md +++ b/docs/usage.md @@ -1,66 +1,168 @@ -# nf-core/rnafusion: Usage +# nf-core/rnafusion: Usage ## :warning: Please read this documentation on the nf-core website: [https://nf-co.re/rnafusion/usage](https://nf-co.re/rnafusion/usage) > _Documentation of pipeline parameters is generated automatically from the pipeline schema and can no longer be found in markdown files._ -## Introduction +## Pipeline summary - +The pipeline is divided into two parts: -## Samplesheet input +1. Download and build references -You will need to create a samplesheet with information about the samples you would like to analyse before running the pipeline. Use this parameter to specify its location. It has to be a comma-separated file with 3 columns, and a header row as shown in the examples below. +- specified with `--references_only` parameter +- required only once before running the pipeline +- **Important**: has to be run with each new release + +2. Detecting fusions + +- Supported tools: `Arriba`, `FusionCatcher`, `STAR-Fusion`, `StringTie` and `CTAT-SPLICING` +- QC: `Fastqc`, `MultiQC`, and `Picard CollectInsertSize`, `Picard CollectWgsMetrics`, `Picard Markduplicates` +- Fusions visualization: `Arriba`, `fusion-report`, `FusionInspector`, and `vcf_collect` + +## Download and build references + +The rnafusion pipeline needs references for the fusion detection tools, so downloading these is a **requirement**. + +> **IMPORTANT** +> +> - Note that this step takes about 24 hours to complete on HPC. +> - Do not provide a samplesheet via the `input` parameter, otherwise the pipeline will run the analysis directly after downloading the references (except if that is what you want). + +```bash +nextflow run nf-core/rnafusion \ + -profile \ + --references_only --all \ + --cosmic_username --cosmic_passwd \ + --genomes_base \ + --outdir +``` + +References for each tools can also be downloaded separately with: ```bash ---input '[path to samplesheet file]' +nextflow run nf-core/rnafusion \ + -profile \ + --references_only -- -- ... \ + --cosmic_username --cosmic_passwd \ + --genomes_base \ + --outdir ``` -### Multiple runs of the same sample +If you are not covered by the research COSMIC license and want to avoid using COSMIC, you can provide the additional option `--no_cosmic`. -The `sample` identifiers have to be the same when you have re-sequenced the same sample more than once e.g. to increase sequencing depth. The pipeline will concatenate the raw reads before performing any downstream analysis. Below is an example for the same sample sequenced across 3 lanes: +### Downloading the cosmic database with SANGER or QUIAGEN -```csv title="samplesheet.csv" -sample,fastq_1,fastq_2 -CONTROL_REP1,AEG588A1_S1_L002_R1_001.fastq.gz,AEG588A1_S1_L002_R2_001.fastq.gz -CONTROL_REP1,AEG588A1_S1_L003_R1_001.fastq.gz,AEG588A1_S1_L003_R2_001.fastq.gz -CONTROL_REP1,AEG588A1_S1_L004_R1_001.fastq.gz,AEG588A1_S1_L004_R2_001.fastq.gz +#### For academic users + +First register for a free account at COSMIC at [https://cancer.sanger.ac.uk/cosmic/register](https://cancer.sanger.ac.uk/cosmic/register) using a university email. The account is **only activated upon** clicking the link in the registration email. + +#### For non-academic users + +Use credentials from QIAGEN and add `--qiagen` + +```bash +nextflow run nf-core/rnafusion \ + -profile \ + --references_only -- -- ... \ + --cosmic_username --cosmic_passwd \ + --genomes_base \ + --outdir --qiagen +``` + +#### STAR-Fusion references downloaded vs built + +By default STAR-Fusion references are **built**. You can also download them from [CTAT](https://github.com/NCIP/Trinity_CTAT/wiki) by using the flag `--starfusion_build FALSE` for both reference building and fusion detection. This allows more flexibility for different organisms but **be aware that STAR-Fusion reference download is not recommended as not fully tested!** + +#### Issues with building references + +If process `FUSIONREPORT_DOWNLOAD` times out, it could be due to network restriction (for example if trying to run on HPC). As this process is lightweight in cpu, memory and time, running on local machines with the following options might solve the issue: + +```bash +nextflow run nf-core/rnafusion \ + -profile \ + --references_only \ + --cosmic_username --cosmic_passwd \ + --fusionreport \ + --genomes_base \ + --outdir +``` + +Adjustments for cpu and memory requirements can be done by feeding a custom configuration with `-c /PATH/TO/CUSTOM/CONFIG`. +Where the custom configuration could look like (adaptation to local machine necessary): + +```text +process { + withName: 'NFCORE_RNAFUSION:RNAFUSION:BUILD_REFERENCES:FUSIONREPORT_DOWNLOAD' { + memory = '8.GB' + cpus = 4 + } +} ``` -### Full samplesheet +The four `fusion-report` files: `cosmic.db`, `fusiongdb2.db`, `mitelman.db` +should then be copied into the HPC `/references/fusion_report_db`. + +#### Note about fusioncatcher references + +The references are only built based on ensembl version 102. It is not possible currently to use any other version/source. + +## Running the pipeline -The pipeline will auto-detect whether a sample is single- or paired-end using the information provided in the samplesheet. The samplesheet can have as many columns as you desire, however, there is a strict requirement for the first 3 columns to match those defined in the table below. +### Samplesheet input +You will need to create a samplesheet with information about the samples you would like to analyse before running the pipeline. The pipeline will detect whether a sample is single- or paired-end from the samplesheet - the `fastq_2` column is empty for single-end. The samplesheet has to be a comma-separated file (.csv) but can have as many columns as you desire. There is a strict requirement for the first 4 columns to match those defined in the table below with the header row included. A final samplesheet file consisting of both single- and paired-end data may look something like the one below. This is for 6 samples, where `TREATMENT_REP3` has been sequenced twice. ```csv title="samplesheet.csv" -sample,fastq_1,fastq_2 -CONTROL_REP1,AEG588A1_S1_L002_R1_001.fastq.gz,AEG588A1_S1_L002_R2_001.fastq.gz -CONTROL_REP2,AEG588A2_S2_L002_R1_001.fastq.gz,AEG588A2_S2_L002_R2_001.fastq.gz -CONTROL_REP3,AEG588A3_S3_L002_R1_001.fastq.gz,AEG588A3_S3_L002_R2_001.fastq.gz -TREATMENT_REP1,AEG588A4_S4_L003_R1_001.fastq.gz, -TREATMENT_REP2,AEG588A5_S5_L003_R1_001.fastq.gz, -TREATMENT_REP3,AEG588A6_S6_L003_R1_001.fastq.gz, -TREATMENT_REP3,AEG588A6_S6_L004_R1_001.fastq.gz, +sample,fastq_1,fastq_2,strandedness +CONTROL_REP1,AEG588A1_S1_L002_R1_001.fastq.gz,AEG588A1_S1_L002_R2_001.fastq.gz,forward +CONTROL_REP2,AEG588A2_S2_L002_R1_001.fastq.gz,AEG588A2_S2_L002_R2_001.fastq.gz,forward +CONTROL_REP3,AEG588A3_S3_L002_R1_001.fastq.gz,AEG588A3_S3_L002_R2_001.fastq.gz,forward +TREATMENT_REP1,AEG588A4_S4_L003_R1_001.fastq.gz,,forward +TREATMENT_REP2,AEG588A5_S5_L003_R1_001.fastq.gz,,forward +TREATMENT_REP3,AEG588A6_S6_L003_R1_001.fastq.gz,,forward +TREATMENT_REP3,AEG588A6_S6_L004_R1_001.fastq.gz,,forward ``` -| Column | Description | -| --------- | -------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | -| `sample` | Custom sample name. This entry will be identical for multiple sequencing libraries/runs from the same sample. Spaces in sample names are automatically converted to underscores (`_`). | -| `fastq_1` | Full path to FastQ file for Illumina short reads 1. File has to be gzipped and have the extension ".fastq.gz" or ".fq.gz". | -| `fastq_2` | Full path to FastQ file for Illumina short reads 2. File has to be gzipped and have the extension ".fastq.gz" or ".fq.gz". | +As you can see above for multiple runs of the same sample, the `sample` name has to be the same when you have re-sequenced the same sample more than once e.g. to increase sequencing depth. The pipeline will concatenate the raw reads before performing any downstream analysis. -An [example samplesheet](../assets/samplesheet.csv) has been provided with the pipeline. +| Column | Description | +| -------------- | -------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | +| `sample` | Custom sample name. This entry will be identical for multiple sequencing libraries/runs from the same sample. Spaces in sample names are automatically converted to underscores (`_`). | +| `fastq_1` | Full path to FastQ file for Illumina short reads 1. File has to be gzipped and have the extension ".fastq.gz" or ".fq.gz". | +| `fastq_2` | Full path to FastQ file for Illumina short reads 2. File has to be gzipped and have the extension ".fastq.gz" or ".fq.gz". | +| `strandedness` | Strandedness: forward or reverse. | -## Running the pipeline +### Starting commands + +The pipeline can either be run using all fusion detection tools or specifying individual tools. Visualisation tools will be run on all fusions detected. To run all tools (`arriba`, `fusioncatcher`, `starfusion`, `stringtie`, `ctat-splicing`) use the `--all` parameter: + +```bash +nextflow run nf-core/rnafusion \ + -profile \ + --all \ + --input \ + --genomes_base \ + --outdir +``` -The typical command for running the pipeline is as follows: +To run only a specific detection tool use: `--tool`: ```bash -nextflow run nf-core/rnafusion --input ./samplesheet.csv --outdir ./results --genome GRCh37 -profile docker +nextflow run nf-core/rnafusion \ + -profile \ + -- -- ... \ + --input \ + --genomes_base \ + --outdir ``` -This will launch the pipeline with the `docker` configuration profile. See below for more information about profiles. +If you are not covered by the research COSMIC license and want to avoid using COSMIC, you can provide the additional option `--no_cosmic`. + +> **IMPORTANT: Either `--all` or `--`** is necessary to run detection tools + +`--genomes_base` should be the path to the directory containing the folder `references/` that was built with `--references_only`. Note that the pipeline will create the following files in your working directory: @@ -89,12 +191,144 @@ with: ```yaml title="params.yaml" input: './samplesheet.csv' outdir: './results/' -genome: 'GRCh37' <...> ``` You can also generate such `YAML`/`JSON` files via [nf-core/launch](https://nf-co.re/launch). +:::warning +Conda is not currently supported. +Supported genome is currently only GRCh38. +::: + +### Options + +#### Trimming + +When the flag `--fastp_trim` is used, `fastp` is used to provide all tools with trimmed reads. Quality and adapter trimming by default. In addition, tail trimming and adapter_fastq specification are possible. Example usage: + +```bash +nextflow run nf-core/rnafusion \ +-profile \ +-- -- ... \ +--input \ +--genomes_base \ +--outdir \ +--fastp_trim \ +--trim_tail (optional) \ +--adapter_fastq (optional) +``` + +#### Filter fusions detected by 2 or more tools + +```bash +nextflow run nf-core/rnafusion \ + -profile \ + -- -- ... \ + --input \ + --genomes_base \ + --outdir + --tools_cutoff +``` + +`--tools_cutoff INT` will discard fusions detected by less than INT tools both for display in fusionreport html index and to consider in fusioninspector. Default = 1, no filtering. + +#### Adding custom fusions to consider as well as the detected set: whitelist + +```bash +nextflow run nf-core/rnafusion \ + -profile \ + -- -- ... \ + --input \ + --genomes_base \ + --outdir + --whitelist +``` + +The custom fusion file should have the following format: + +``` +GENE1--GENE2 +GENE3--GENE4 +``` + +#### Running FusionInspector only + +FusionInspector can be run as a standalone with: + +```bash +nextflow run nf-core/rnafusion \ +-profile \ +--fusioninspector_only \ +--fusioninspector_fusions \ +--input \ +--outdir +``` + +The custom fusion file should have the following format: + +``` +GENE1--GENE2 +GENE3--GENE4 +``` + +#### Skipping QC + +```bash +nextflow run nf-core/rnafusion \ +-profile \ +--skip_qc \ +--all OR <--tool> +--input \ +--genomes_base \ +--outdir +``` + +This will skip all QC-related processes (picard metrics collection) + +#### Skipping visualisation + +```bash +nextflow run nf-core/rnafusion \ +-profile \ +--skip_vis \ +--all OR <--tool> +--input \ +--genomes_base \ +--outdir +``` + +This will skip all visualisation processes, including `fusion-report`, `FusionInspector` and `Arriba` visualisation. + +#### Optional manual feed-in of fusion files + +It is possible to give the output of each tool manually using the argument: `--_fusions PATH/TO/FUSION/FILE`: this feature need more testing, don't hesitate to open an issue if you encounter problems. + +#### Set different `--limitSjdbInsertNsj` parameter + +There are two parameters to increase the `--limitSjdbInsertNsj` parameter if necessary: + +- `--fusioncatcher_limitSjdbInsertNsj`, default: 2000000 +- `--fusioninspector_limitSjdbInsertNsj`, default: 1000000 + +Use the parameter `--cram` to compress the BAM files to CRAM for specific tools. Options: arriba, starfusion. Leave no space between options: + +- `--cram arriba,starfusion`, default: [] +- `--cram arriba` + +### Troubleshooting + +#### GstrandBit issues + +The issue below sometimes occurs: + +``` +EXITING because of FATAL ERROR: cannot insert sequence on the fly because of strand GstrandBit problem +SOLUTION: please contact STAR author at https://groups.google.com/forum/#!forum/rna-star +``` + +As the error message suggests, it is a STAR-related error and your best luck in solving it will be the forum. + ### Updating the pipeline When you run the above command, Nextflow automatically pulls the pipeline code from GitHub and stores it as a cached version. When running the pipeline after this, it will always use the cached version if available - even if the pipeline has been updated since. To make sure that you're running the latest version of the pipeline, make sure that you regularly update the cached version of the pipeline: @@ -156,6 +390,10 @@ If `-profile` is not specified, the pipeline will run locally and expect all sof - A generic configuration profile to enable [Wave](https://seqera.io/wave/) containers. Use together with one of the above (requires Nextflow ` 24.03.0-edge` or later). - `conda` - A generic configuration profile to be used with [Conda](https://conda.io/docs/). Please only use Conda as a last resort i.e. when it's not possible to run the pipeline with Docker, Singularity, Podman, Shifter, Charliecloud, or Apptainer. +- `test` + - A profile with a complete configuration for automated testing + - Includes links to test data so needs no other parameters + - !!!! Run with `-stub` as all references need to be downloaded otherwise !!!! ### `-resume` @@ -193,7 +431,7 @@ In most cases, you will only need to create a custom config as a one-off but if See the main [Nextflow documentation](https://www.nextflow.io/docs/latest/config.html) for more information about creating your own configuration files. -If you have any questions or issues please send us a message on [Slack](https://nf-co.re/join/slack) on the [`#configs` channel](https://nfcore.slack.com/channels/configs). +If you have any questions or issues please send us a message on [Slack](https://nf-co.re/join/slack) on the [`#configs` channel](https://nfcore.slack.com/channels/configs). --> ## Running in the background diff --git a/main.nf b/main.nf index 6532daee..96e564d3 100644 --- a/main.nf +++ b/main.nf @@ -15,10 +15,11 @@ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ */ -include { RNAFUSION } from './workflows/rnafusion' include { PIPELINE_INITIALISATION } from './subworkflows/local/utils_nfcore_rnafusion_pipeline' include { PIPELINE_COMPLETION } from './subworkflows/local/utils_nfcore_rnafusion_pipeline' include { getGenomeAttribute } from './subworkflows/local/utils_nfcore_rnafusion_pipeline' +include { RNAFUSION } from './workflows/rnafusion' + /* ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ @@ -26,10 +27,9 @@ include { getGenomeAttribute } from './subworkflows/local/utils_nfcore_rnaf ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ */ -// TODO nf-core: Remove this line if you don't need a FASTA file -// This is an example of how to use getGenomeAttribute() to fetch parameters -// from igenomes.config using `--genome` -params.fasta = getGenomeAttribute('fasta') + + + /* ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ @@ -41,20 +41,19 @@ params.fasta = getGenomeAttribute('fasta') // WORKFLOW: Run main analysis pipeline depending on type of input // workflow NFCORE_RNAFUSION { - take: - samplesheet // channel: samplesheet read in from --input + samplesheet main: // // WORKFLOW: Run pipeline // - RNAFUSION ( - samplesheet - ) + + RNAFUSION(samplesheet) + emit: - multiqc_report = RNAFUSION.out.multiqc_report // channel: /path/to/multiqc_report.html + multiqc_report = RNAFUSION.out.multiqc_report } /* ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ @@ -71,18 +70,15 @@ workflow { PIPELINE_INITIALISATION ( params.version, params.validate_params, - params.monochrome_logs, args, params.outdir, - params.input ) // // WORKFLOW: Run main workflow // - NFCORE_RNAFUSION ( - PIPELINE_INITIALISATION.out.samplesheet - ) + NFCORE_RNAFUSION (PIPELINE_INITIALISATION.out.samplesheet) + // // SUBWORKFLOW: Run completion tasks // diff --git a/modules.json b/modules.json index 296d3542..01e049b6 100644 --- a/modules.json +++ b/modules.json @@ -5,15 +5,141 @@ "https://github.com/nf-core/modules.git": { "modules": { "nf-core": { + "agat/convertspgff2tsv": { + "branch": "master", + "git_sha": "666652151335353eef2fcd58880bcef5bc2928e1", + "installed_by": ["modules"] + }, + "arriba/arriba": { + "branch": "master", + "git_sha": "7741dfc830e77a8ead2fcb50b01461ee09d0cdfe", + "installed_by": ["modules"] + }, + "arriba/download": { + "branch": "master", + "git_sha": "467c202a876d26af544fa8c4b22a050a535462a7", + "installed_by": ["modules"] + }, + "cat/cat": { + "branch": "master", + "git_sha": "666652151335353eef2fcd58880bcef5bc2928e1", + "installed_by": ["modules"] + }, + "cat/fastq": { + "branch": "master", + "git_sha": "a1abf90966a2a4016d3c3e41e228bfcbd4811ccc", + "installed_by": ["modules"] + }, + "fastp": { + "branch": "master", + "git_sha": "666652151335353eef2fcd58880bcef5bc2928e1", + "installed_by": ["modules"] + }, "fastqc": { "branch": "master", "git_sha": "08108058ea36a63f141c25c4e75f9f872a5b2296", "installed_by": ["modules"] }, + "gatk4/bedtointervallist": { + "branch": "master", + "git_sha": "1999eff2c530b2b185a25cc42117a1686f09b685", + "installed_by": ["modules"] + }, + "gatk4/createsequencedictionary": { + "branch": "master", + "git_sha": "1999eff2c530b2b185a25cc42117a1686f09b685", + "installed_by": ["modules"] + }, + "gatk4/markduplicates": { + "branch": "master", + "git_sha": "1999eff2c530b2b185a25cc42117a1686f09b685", + "installed_by": ["modules"] + }, + "gffread": { + "branch": "master", + "git_sha": "bd5f75ccaf2345269810e66e85de8a70e4de8764", + "installed_by": ["modules"] + }, "multiqc": { "branch": "master", "git_sha": "f0719ae309075ae4a291533883847c3f7c441dad", "installed_by": ["modules"] + }, + "picard/collectinsertsizemetrics": { + "branch": "master", + "git_sha": "49f4e50534fe4b64101e62ea41d5dc43b1324358", + "installed_by": ["modules"] + }, + "picard/collectrnaseqmetrics": { + "branch": "master", + "git_sha": "49f4e50534fe4b64101e62ea41d5dc43b1324358", + "installed_by": ["modules"] + }, + "picard/collectwgsmetrics": { + "branch": "master", + "git_sha": "49f4e50534fe4b64101e62ea41d5dc43b1324358", + "installed_by": ["modules"] + }, + "rrnatranscripts": { + "branch": "master", + "git_sha": "812edf8cf702de42d2d8c7314d6f03b97e20abeb", + "installed_by": ["modules"], + "patch": "modules/nf-core/rrnatranscripts/rrnatranscripts.diff" + }, + "salmon/index": { + "branch": "master", + "git_sha": "49f4e50534fe4b64101e62ea41d5dc43b1324358", + "installed_by": ["modules"] + }, + "salmon/quant": { + "branch": "master", + "git_sha": "49f4e50534fe4b64101e62ea41d5dc43b1324358", + "installed_by": ["modules"] + }, + "samtools/faidx": { + "branch": "master", + "git_sha": "b13f07be4c508d6ff6312d354d09f2493243e208", + "installed_by": ["modules"] + }, + "samtools/index": { + "branch": "master", + "git_sha": "b13f07be4c508d6ff6312d354d09f2493243e208", + "installed_by": ["modules"] + }, + "samtools/sort": { + "branch": "master", + "git_sha": "b7800db9b069ed505db3f9d91b8c72faea9be17b", + "installed_by": ["modules"] + }, + "samtools/view": { + "branch": "master", + "git_sha": "2d20463181b1c38981a02e90d3084b5f9fa8d540", + "installed_by": ["modules"] + }, + "star/align": { + "branch": "master", + "git_sha": "a5ad53288c79fa52c5ae708c317e09ec2dd149ab", + "installed_by": ["modules"] + }, + "star/genomegenerate": { + "branch": "master", + "git_sha": "a5ad53288c79fa52c5ae708c317e09ec2dd149ab", + "installed_by": ["modules"] + }, + "stringtie/merge": { + "branch": "master", + "git_sha": "666652151335353eef2fcd58880bcef5bc2928e1", + "installed_by": ["modules"] + }, + "stringtie/stringtie": { + "branch": "master", + "git_sha": "49f4e50534fe4b64101e62ea41d5dc43b1324358", + "installed_by": ["modules"] + }, + "ucsc/gtftogenepred": { + "branch": "master", + "git_sha": "666652151335353eef2fcd58880bcef5bc2928e1", + "installed_by": ["modules"] } } }, diff --git a/modules/local/arriba/visualisation/main.nf b/modules/local/arriba/visualisation/main.nf new file mode 100644 index 00000000..f1aa097b --- /dev/null +++ b/modules/local/arriba/visualisation/main.nf @@ -0,0 +1,53 @@ +process ARRIBA_VISUALISATION { + tag "$meta.id" + label 'process_medium' + + conda "bioconda::arriba=2.4.0" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/arriba:2.4.0--h0033a41_2' : + 'biocontainers/arriba:2.4.0--h0033a41_2' }" + + input: + tuple val(meta), path(bam), path(bai), path(fusions) + tuple val(meta2), path(gtf) + tuple val(meta3), path(protein_domains) + tuple val(meta4), path(cytobands) + + output: + tuple val(meta), path("*.pdf") , emit: pdf + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def arg_cytobands = cytobands ? " --cytobands=$cytobands" : "" + def arg_protein_domains = protein_domains ? "--proteinDomains=$protein_domains" : "" + def prefix = task.ext.prefix ?: "${meta.id}" + """ + draw_fusions.R \\ + --fusions=$fusions \\ + --alignments=$bam \\ + --output=${prefix}.pdf \\ + --annotation=${gtf} \\ + $arg_cytobands \\ + $arg_protein_domains \\ + $args + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + arriba: \$(arriba -h | grep 'Version:' 2>&1 | sed 's/Version:\s//') + END_VERSIONS + """ + + stub: + def prefix = task.ext.prefix ?: "${meta.id}" + """ + touch ${prefix}.pdf + cat <<-END_VERSIONS > versions.yml + "${task.process}": + arriba: \$(arriba -h | grep 'Version:' 2>&1 | sed 's/Version:\s//') + END_VERSIONS + """ +} diff --git a/modules/local/arriba/visualisation/meta.yml b/modules/local/arriba/visualisation/meta.yml new file mode 100644 index 00000000..a7418ca2 --- /dev/null +++ b/modules/local/arriba/visualisation/meta.yml @@ -0,0 +1,54 @@ +name: arriba_visualisation +description: Arriba is a command-line tool for the detection of gene fusions from RNA-Seq data. +keywords: + - visualisation + - arriba +tools: + - arriba: + description: Fast and accurate gene fusion detection from RNA-Seq data + homepage: https://github.com/suhrig/arriba + documentation: https://arriba.readthedocs.io/en/latest/ + tool_dev_url: https://github.com/suhrig/arriba + doi: "10.1101/gr.257246.119" + licence: ["MIT"] + +input: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - bam: + type: file + description: BAM/CRAM/SAM file + pattern: "*.{bam,cram,sam}" + - bai: + type: file + description: BAMindex file + pattern: "*.{bai}" + - fusions: + type: file + description: Arriba fusions file + pattern: "*.{tsv}" + - gtf: + type: file + description: Annotation GTF file + pattern: "*.{gtf}" + +output: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" + - pdf: + type: file + description: File contains fusions visualisation + pattern: "*.{pdf}" + +authors: + - "@rannick" diff --git a/modules/local/ctatsplicing/startocancerintrons/main.nf b/modules/local/ctatsplicing/startocancerintrons/main.nf new file mode 100644 index 00000000..a8d683ec --- /dev/null +++ b/modules/local/ctatsplicing/startocancerintrons/main.nf @@ -0,0 +1,72 @@ +process CTATSPLICING_STARTOCANCERINTRONS { + tag "$meta.id" + label 'process_single' + + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://data.broadinstitute.org/Trinity/CTAT_SINGULARITY/CTAT-SPLICING/ctat_splicing.v0.0.2.simg' : + 'docker.io/trinityctat/ctat_splicing:0.0.2' }" + + input: + tuple val(meta), path(split_junction), path(junction), path(bam), path(bai) + tuple val(meta2), path(genome_lib) + + output: + tuple val(meta), path("*.cancer_intron_reads.sorted.bam") , emit: cancer_introns_sorted_bam + tuple val(meta), path("*.cancer_intron_reads.sorted.bam.bai") , emit: cancer_introns_sorted_bai + tuple val(meta), path("*.gene_reads.sorted.sifted.bam") , emit: gene_reads_sorted_bam + tuple val(meta), path("*.gene_reads.sorted.sifted.bam.bai") , emit: gene_reads_sorted_bai + tuple val(meta), path("*.cancer.introns") , emit: cancer_introns + tuple val(meta), path("*.cancer.introns.prelim") , emit: cancer_introns_prelim + tuple val(meta), path("*${prefix}.introns") , emit: introns + tuple val(meta), path("*.introns.for_IGV.bed") , emit: introns_igv_bed, optional: true + tuple val(meta), path("*.ctat-splicing.igv.html") , emit: igv_html, optional: true + tuple val(meta), path("*.igv.tracks") , emit: igv_tracks, optional: true + tuple val(meta), path("*.chckpts") , emit: chckpts + path "versions.yml" , emit: versions + + script: + def args = task.ext.args ?: '' + prefix = task.ext.prefix ?: "${meta.id}" + def bam_arg = bam ? "--bam_file ${bam}" : "" + def VERSION = '0.0.2' // WARN: Version information not provided by tool on CLI. Please update this string when bumping container versions. + def create_index = bam && !bai ? "samtools index ${bam}" : "" + """ + ${create_index} + + /usr/local/src/CTAT-SPLICING/STAR_to_cancer_introns.py \\ + --SJ_tab_file ${split_junction} \\ + --chimJ_file ${junction} \\ + ${bam_arg} \\ + --output_prefix ${prefix} \\ + --ctat_genome_lib ${genome_lib} \\ + ${args} + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + ctat-splicing: $VERSION + END_VERSIONS + """ + + stub: + def args = task.ext.args ?: '' + prefix = task.ext.prefix ?: "${meta.id}" + def VERSION = '0.0.2' // WARN: Version information not provided by tool on CLI. Please update this string when bumping container versions. + def create_igv_files = args.contains("--vis") ? "touch ${prefix}.introns.for_IGV.bed && touch ${prefix}.ctat-splicing.igv.html && touch ${prefix}.igv.tracks" : "" + """ + ${create_igv_files} + touch ${prefix}.cancer_intron_reads.sorted.bam + touch ${prefix}.cancer_intron_reads.sorted.bam.bai + touch ${prefix}.gene_reads.sorted.sifted.bam + touch ${prefix}.gene_reads.sorted.sifted.bam.bai + touch ${prefix}.cancer.introns + touch ${prefix}.cancer.introns.prelim + touch ${prefix}.introns + touch ${prefix}.chckpts + + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + ctat-splicing: $VERSION + END_VERSIONS + """ +} diff --git a/modules/local/ctatsplicing/startocancerintrons/tests/main.nf.test b/modules/local/ctatsplicing/startocancerintrons/tests/main.nf.test new file mode 100644 index 00000000..dad961c4 --- /dev/null +++ b/modules/local/ctatsplicing/startocancerintrons/tests/main.nf.test @@ -0,0 +1,69 @@ +nextflow_process { + + name "Test Process CTATSPLICING_STARTOCANCERINTRONS" + script "../main.nf" + process "CTATSPLICING_STARTOCANCERINTRONS" + options "-stub" + + test("test without BAM") { + + when { + params { + outdir = "tests/results" + } + process { + """ + input[0] = [ + [id:"test"], + file("test.SJ.out.tab"), + file("test.Chimeric.out.junctions"), + [], + [] + ] + input[1] = [ + [id:"reference"], + file("ctat_genome_lib") + ] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out.findAll { key, value -> !key.isNumber() }).match() } + ) + } + } + + test("test with BAM") { + + when { + params { + outdir = "tests/results" + } + process { + """ + input[0] = [ + [id:"test"], + file("test.SJ.out.tab"), + file("test.Chimeric.out.junctions"), + file("test.Aligned.sortedByCoord.out.bam"), + [] + ] + input[1] = [ + [id:"reference"], + file("ctat_genome_lib") + ] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out.findAll { key, value -> !key.isNumber() }).match() } + ) + } + } +} diff --git a/modules/local/ctatsplicing/startocancerintrons/tests/main.nf.test.snap b/modules/local/ctatsplicing/startocancerintrons/tests/main.nf.test.snap new file mode 100644 index 00000000..b0ee3416 --- /dev/null +++ b/modules/local/ctatsplicing/startocancerintrons/tests/main.nf.test.snap @@ -0,0 +1,191 @@ +{ + "test without BAM": { + "content": [ + { + "cancer_introns": [ + [ + { + "id": "test" + }, + "test.cancer.introns:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "cancer_introns_prelim": [ + [ + { + "id": "test" + }, + "test.cancer.introns.prelim:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "cancer_introns_sorted_bai": [ + [ + { + "id": "test" + }, + "test.cancer_intron_reads.sorted.bam.bai:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "cancer_introns_sorted_bam": [ + [ + { + "id": "test" + }, + "test.cancer_intron_reads.sorted.bam:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "chckpts": [ + [ + { + "id": "test" + }, + "test.chckpts:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "gene_reads_sorted_bai": [ + [ + { + "id": "test" + }, + "test.gene_reads.sorted.sifted.bam.bai:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "gene_reads_sorted_bam": [ + [ + { + "id": "test" + }, + "test.gene_reads.sorted.sifted.bam:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "igv_html": [ + + ], + "igv_tracks": [ + + ], + "introns": [ + [ + { + "id": "test" + }, + "test.introns:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "introns_igv_bed": [ + + ], + "versions": [ + "versions.yml:md5,fcf861a15f9951342a874b6bc476a37e" + ] + } + ], + "meta": { + "nf-test": "0.9.1", + "nextflow": "24.10.1" + }, + "timestamp": "2024-12-17T13:35:13.723215847" + }, + "test with BAM": { + "content": [ + { + "cancer_introns": [ + [ + { + "id": "test" + }, + "test.cancer.introns:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "cancer_introns_prelim": [ + [ + { + "id": "test" + }, + "test.cancer.introns.prelim:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "cancer_introns_sorted_bai": [ + [ + { + "id": "test" + }, + "test.cancer_intron_reads.sorted.bam.bai:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "cancer_introns_sorted_bam": [ + [ + { + "id": "test" + }, + "test.cancer_intron_reads.sorted.bam:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "chckpts": [ + [ + { + "id": "test" + }, + "test.chckpts:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "gene_reads_sorted_bai": [ + [ + { + "id": "test" + }, + "test.gene_reads.sorted.sifted.bam.bai:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "gene_reads_sorted_bam": [ + [ + { + "id": "test" + }, + "test.gene_reads.sorted.sifted.bam:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "igv_html": [ + [ + { + "id": "test" + }, + "test.ctat-splicing.igv.html:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "igv_tracks": [ + [ + { + "id": "test" + }, + "test.igv.tracks:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "introns": [ + [ + { + "id": "test" + }, + "test.introns:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "introns_igv_bed": [ + [ + { + "id": "test" + }, + "test.introns.for_IGV.bed:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "versions": [ + "versions.yml:md5,fcf861a15f9951342a874b6bc476a37e" + ] + } + ], + "meta": { + "nf-test": "0.9.1", + "nextflow": "24.10.1" + }, + "timestamp": "2024-12-17T13:33:27.36677449" + } +} \ No newline at end of file diff --git a/modules/local/fusioncatcher/build/environment.yml b/modules/local/fusioncatcher/build/environment.yml new file mode 100644 index 00000000..8e83de36 --- /dev/null +++ b/modules/local/fusioncatcher/build/environment.yml @@ -0,0 +1,5 @@ +channels: + - conda-forge + - bioconda +dependencies: + - bioconda::fusioncatcher=1.33 diff --git a/modules/local/fusioncatcher/build/main.nf b/modules/local/fusioncatcher/build/main.nf new file mode 100644 index 00000000..db1c7210 --- /dev/null +++ b/modules/local/fusioncatcher/build/main.nf @@ -0,0 +1,45 @@ +process FUSIONCATCHER_BUILD { + tag "fusioncatcher_build" + label 'process_medium' + + conda "${projectDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://community-cr-prod.seqera.io/docker/registry/v2/blobs/sha256/d5/d53f36e9e01d14a0ae8e15f8046f52b2883c970c27fe43fdfbd9440a55f5403f/data' : + 'community.wave.seqera.io/library/fusioncatcher:1.33--4733482b637ef92f' }" + + input: + val genome_gencode_version + + output: + tuple env(meta), path("human_v${genome_gencode_version}"), emit: reference + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + + def args = task.ext.args ?: '' + meta = [ id: "human_v${genome_gencode_version}" ] + """ + fusioncatcher-build.py \\ + -g homo_sapiens \\ + -o human_v${genome_gencode_version} \\ + $args + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + fusioncatcher: "\$(fusioncatcher --version 2>&1 | awk '{print \$2}')" + END_VERSIONS + """ + + stub: + """ + mkdir human_v${genome_gencode_version} + touch human_v${genome_gencode_version}/ensembl_fully_overlapping_genes.txt + cat <<-END_VERSIONS > versions.yml + "${task.process}": + fusioncatcher: "\$(fusioncatcher --version 2>&1 | awk '{print \$2}')" + END_VERSIONS + """ +} diff --git a/modules/local/fusioncatcher/build/meta.yml b/modules/local/fusioncatcher/build/meta.yml new file mode 100644 index 00000000..202be7e1 --- /dev/null +++ b/modules/local/fusioncatcher/build/meta.yml @@ -0,0 +1,24 @@ +name: fusioncatcher_build +description: Build genome for fusioncatcher +keywords: + - sort +tools: + - fusioncatcher: + description: Build genome for fusioncatcher + homepage: https://github.com/ndaniel/fusioncatcher/ + documentation: https://github.com/ndaniel/fusioncatcher/blob/master/doc/manual.md + tool_dev_url: https://github.com/ndaniel/fusioncatcher/ + doi: "10.1101/011650" + licence: ["GPL v3"] + +output: + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" + - reference: + type: directory + description: Path to fusioncatcher references + +authors: + - "@praveenraj2018, @rannick" diff --git a/modules/local/fusioncatcher/detect/environment.yml b/modules/local/fusioncatcher/detect/environment.yml new file mode 100644 index 00000000..8e83de36 --- /dev/null +++ b/modules/local/fusioncatcher/detect/environment.yml @@ -0,0 +1,5 @@ +channels: + - conda-forge + - bioconda +dependencies: + - bioconda::fusioncatcher=1.33 diff --git a/modules/local/fusioncatcher/detect/main.nf b/modules/local/fusioncatcher/detect/main.nf new file mode 100644 index 00000000..7980439f --- /dev/null +++ b/modules/local/fusioncatcher/detect/main.nf @@ -0,0 +1,59 @@ +process FUSIONCATCHER_DETECT { + tag "$meta.id" + label 'process_high' + + conda "${projectDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://community-cr-prod.seqera.io/docker/registry/v2/blobs/sha256/d5/d53f36e9e01d14a0ae8e15f8046f52b2883c970c27fe43fdfbd9440a55f5403f/data' : + 'community.wave.seqera.io/library/fusioncatcher:1.33--4733482b637ef92f' }" + + input: + tuple val(meta), path(fastqs, stageAs: "input/*") + tuple val(meta2), path(reference) + + output: + tuple val(meta), path("*.fusioncatcher.fusion-genes.txt"), emit: fusions, optional: true + tuple val(meta), path("*.fusioncatcher.summary.txt") , emit: summary, optional: true + tuple val(meta), path("*.fusioncatcher.log") , emit: log + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + def single_end = meta.single_end ? "--single-end" : "" + """ + fusioncatcher.py \\ + -d ${reference} \\ + -i input \\ + -p ${task.cpus} \\ + -o . \\ + --skip-blat \\ + ${single_end} \\ + ${args} + + mv final-list_candidate-fusion-genes.txt ${prefix}.fusioncatcher.fusion-genes.txt + mv summary_candidate_fusions.txt ${prefix}.fusioncatcher.summary.txt + mv fusioncatcher.log ${prefix}.fusioncatcher.log + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + fusioncatcher: "\$(fusioncatcher.py --version 2>&1 | awk '{print \$2}')" + END_VERSIONS + """ + + stub: + def prefix = task.ext.prefix ?: "${meta.id}" + + """ + touch ${prefix}.fusioncatcher.fusion-genes.txt + touch ${prefix}.fusioncatcher.summary.txt + touch ${prefix}.fusioncatcher.log + cat <<-END_VERSIONS > versions.yml + "${task.process}": + fusioncatcher: "\$(fusioncatcher.py --version 2>&1 | awk '{print \$2}')" + END_VERSIONS + """ +} diff --git a/modules/local/fusioncatcher/detect/meta.yml b/modules/local/fusioncatcher/detect/meta.yml new file mode 100644 index 00000000..7c8ee425 --- /dev/null +++ b/modules/local/fusioncatcher/detect/meta.yml @@ -0,0 +1,53 @@ +name: fusioncatcher +description: FusionCatcher searches for novel/known somatic fusion genes, translocations, and chimeras in RNA-seq data +keywords: + - fusioncatcher +tools: + - fusioncatcher: + description: FusionCatcher searches for novel/known somatic fusion genes, translocations, and chimeras in RNA-seq data + homepage: https://github.com/ndaniel/fusioncatcher + documentation: https://github.com/ndaniel/fusioncatcher/wiki + tool_dev_url: https://github.com/ndaniel/fusioncatcher + doi: "10.1101/011650v1" + licence: ["GPL v3"] + +input: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - reads: + type: file + description: FASTQ file + pattern: "*.{fastq}" + - reference: + type: directory + description: Path to fusioncatcher references + pattern: "*" + +output: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - fusions: + type: file + description: Final list of candidate fusion genes + pattern: "*.fusioncatcher.fusion-genes.txt" + - summary: + type: file + description: Summary of fusion results + pattern: "*.fusioncatcher_summary.txt" + - log: + type: file + description: Log of fusion results + pattern: "*.fusioncatcher.log" + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" + +authors: + - "@praveenraj2018. @rannick" diff --git a/modules/local/fusioncatcher/download/environment.yml b/modules/local/fusioncatcher/download/environment.yml new file mode 100644 index 00000000..8e83de36 --- /dev/null +++ b/modules/local/fusioncatcher/download/environment.yml @@ -0,0 +1,5 @@ +channels: + - conda-forge + - bioconda +dependencies: + - bioconda::fusioncatcher=1.33 diff --git a/modules/local/fusioncatcher/download/main.nf b/modules/local/fusioncatcher/download/main.nf new file mode 100644 index 00000000..af585d62 --- /dev/null +++ b/modules/local/fusioncatcher/download/main.nf @@ -0,0 +1,47 @@ +process FUSIONCATCHER_DOWNLOAD { + tag "fusioncatcher_download" + label 'process_medium' + + conda "${projectDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://community-cr-prod.seqera.io/docker/registry/v2/blobs/sha256/d5/d53f36e9e01d14a0ae8e15f8046f52b2883c970c27fe43fdfbd9440a55f5403f/data' : + 'community.wave.seqera.io/library/fusioncatcher:1.33--4733482b637ef92f' }" + + input: + val genome_gencode_version + + output: + tuple env(meta), path("*"), emit: reference + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + + def args = task.ext.args ?: '' + meta = [ id: "human_v${genome_gencode_version}" ] + """ + wget $args http://sourceforge.net/projects/fusioncatcher/files/data/human_${genome_gencode_version}.tar.gz.aa + wget $args http://sourceforge.net/projects/fusioncatcher/files/data/human_${genome_gencode_version}.tar.gz.ab + wget $args http://sourceforge.net/projects/fusioncatcher/files/data/human_${genome_gencode_version}.tar.gz.ac + wget $args http://sourceforge.net/projects/fusioncatcher/files/data/human_${genome_gencode_version}.tar.gz.ad + cat human_${genome_gencode_version}.tar.gz.* | tar xz + rm human_${genome_gencode_version}.tar* + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + fusioncatcher: "\$(fusioncatcher --version 2>&1 | awk '{print \$2}')" + END_VERSIONS + """ + + stub: + """ + mkdir human_v${genome_gencode_version} + touch human_v${genome_gencode_version}/ensembl_fully_overlapping_genes.txt + cat <<-END_VERSIONS > versions.yml + "${task.process}": + fusioncatcher: "\$(fusioncatcher --version 2>&1 | awk '{print \$2}')" + END_VERSIONS + """ +} diff --git a/modules/local/fusioncatcher/download/meta.yml b/modules/local/fusioncatcher/download/meta.yml new file mode 100644 index 00000000..40421a4e --- /dev/null +++ b/modules/local/fusioncatcher/download/meta.yml @@ -0,0 +1,25 @@ +name: fusioncatcher_download +description: Build genome for fusioncatcher +keywords: + - sort +tools: + - fusioncatcher: + description: Build genome for fusioncatcher + homepage: https://github.com/ndaniel/fusioncatcher/ + documentation: https://github.com/ndaniel/fusioncatcher/blob/master/doc/manual.md + tool_dev_url: https://github.com/ndaniel/fusioncatcher/ + doi: "10.1101/011650" + licence: ["GPL v3"] + +output: + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" + - reference: + type: directory + description: Path to fusioncatcher references + pattern: "*" + +authors: + - "@praveenraj2018, @rannick" diff --git a/modules/local/fusioninspector/environment.yml b/modules/local/fusioninspector/environment.yml new file mode 100644 index 00000000..26605058 --- /dev/null +++ b/modules/local/fusioninspector/environment.yml @@ -0,0 +1,12 @@ +channels: + - conda-forge + - bioconda +dependencies: + - bioconda::fusion-inspector=2.10.0 + - bioconda::igv-reports=1.14.1 + - bioconda::perl-json-xs=4.03 + - bioconda::pysam=0.22.1 + - conda-forge::perl-carp-assert=0.21 + - pip + - pip: + - intervaltree==3.1.0 diff --git a/modules/local/fusioninspector/main.nf b/modules/local/fusioninspector/main.nf new file mode 100644 index 00000000..3659220b --- /dev/null +++ b/modules/local/fusioninspector/main.nf @@ -0,0 +1,176 @@ +process FUSIONINSPECTOR { + tag "$meta.id" + label 'process_high' + + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://community-cr-prod.seqera.io/docker/registry/v2/blobs/sha256/13/139b94a1f10c6e23a8c27eaed1e5a689db978a513d0ee155e74d35f0970814fe/data' : + 'community.wave.seqera.io/library/fusion-inspector_igv-reports_perl-json-xs_pysam_pruned:c6147971d107ab11'}" + + input: + tuple val(meta), path(reads), path(fusion_list) + path reference + + output: + tuple val(meta), path("*FusionInspector.fusions.tsv") , emit: tsv + tuple val(meta), path("*.coding_effect") , optional:true, emit: tsv_coding_effect + tuple val(meta), path("*.gtf") , optional:true, emit: out_gtf + tuple val(meta), path("*FusionInspector.log") , emit: log + tuple val(meta), path("*html") , emit: html + tuple val(meta), path("*abridged.tsv") , emit: abridged_tsv + tuple val(meta), path("IGV_inputs") , emit: igv_inputs + tuple val(meta), path("fi_workdir") , emit: fi_workdir + tuple val(meta), path("chckpts_dir") , emit: chckpts_dir + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def prefix = task.ext.prefix ?: "${meta.id}" + def fasta = meta.single_end ? "--left_fq ${reads[0]}" : "--left_fq ${reads[0]} --right_fq ${reads[1]}" + def args = task.ext.args ?: '' + def args2 = task.ext.args2 ?: '' + """ + FusionInspector \\ + --fusions $fusion_list \\ + --genome_lib ${reference} \\ + $fasta \\ + --CPU ${task.cpus} \\ + -O . \\ + --out_prefix $prefix \\ + --vis $args $args2 + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + STAR-Fusion: \$(STAR-Fusion --version 2>&1 | grep -i 'version' | sed 's/STAR-Fusion version: //') + END_VERSIONS + """ + + stub: + def prefix = task.ext.prefix ?: "${meta.id}" + """ + touch ${prefix}.FusionInspector.log + touch ${prefix}.FusionInspector.fusions.abridged.tsv + touch ${prefix}.FusionInspector.fusions.tsv + touch ${prefix}.fusion_inspector_web.html + mkdir -p chckpts_dir + touch chckpts_dir/add_FFPM.ok + touch chckpts_dir/add_splice_info.ok + touch chckpts_dir/append_microH_info.ok + touch chckpts_dir/blast_filter.ok + touch chckpts_dir/coalesce_junc_n_span.ok + touch chckpts_dir/cp_consol_bam.ok + touch chckpts_dir/cp_contigs_file_workdir + touch chckpts_dir/cp_final.ok + touch chckpts_dir/cp_gtf_file_workdir.ok + touch chckpts_dir/cp_tracks_json.ok + touch chckpts_dir/create_fi_igvjs.ok + touch chckpts_dir/cytoband.ok + touch chckpts_dir/EM_adj_counts.ok + touch chckpts_dir/filter_by_frag_threshs.ok + touch chckpts_dir/final.abridged.ok + touch chckpts_dir/fusion_annotator.ok + touch chckpts_dir/fusion_coding_region_effect.ok + touch chckpts_dir/fusion_contigs.ok + touch chckpts_dir/fusion_reports_html.ok + touch chckpts_dir/get_fusion_JUNCTION_reads_from_bam.ok + touch chckpts_dir/get_fusion_SPANNING_reads_from_bam.ok + touch chckpts_dir/index_consol_bam.ok + touch chckpts_dir/init_EM_adj_counts.ok + touch chckpts_dir/init_spanning_reads_bam.ok + touch chckpts_dir/mark_dup_reads.ok + touch chckpts_dir/mark_dups_reads.index.ok + touch chckpts_dir/merged_contig_fai.ok + touch chckpts_dir/merged_contig_gtf_to_bed.ok + touch chckpts_dir/microH.dat.ok + touch chckpts_dir/prep_igv_extract_junc_reads.ok + touch chckpts_dir/prep_igv_junc_reads_bam.ok + touch chckpts_dir/prep_igv_pfam_bed.ok + touch chckpts_dir/prep_igv_pfam_gff3.ok + touch chckpts_dir/prep_igv_seqsim_bed.ok + touch chckpts_dir/prep_igv_seqsim_gff3.ok + touch chckpts_dir/prep_spanning_reads.ok + touch chckpts_dir/run_STAR.ok + touch chckpts_dir/samtools_idx_junc_reads_bam.ok + touch chckpts_dir/samtools_index_span_reads_bam.ok + touch chckpts_dir/span_reads_acc.ok + touch chckpts_dir/${prefix}.bed.bedsort.ok + touch chckpts_dir/${prefix}.bed.bgzip.ok + touch chckpts_dir/${prefix}.bed.tabix.ok + mkdir -p fi_workdir/_STARgenome + touch fi_workdir/Log.final.out + touch fi_workdir/Log.out + touch fi_workdir/Log.progress.out + touch fi_workdir/microH.dat + touch fi_workdir/pipeliner.456.cmds + touch fi_workdir/SJ.out.tab + touch fi_workdir/star_align.ok + touch fi_workdir/_STARgenome/exonGeTrInfo.tab + touch fi_workdir/_STARgenome/exonInfo.tab + touch fi_workdir/_STARgenome/geneInfo.tab + touch fi_workdir/_STARgenome/sjdbInfo.txt + touch fi_workdir/_STARgenome/sjdbList.fromGTF.out.tab + touch fi_workdir/_STARgenome/sjdbList.out.tab + touch fi_workdir/_STARgenome/transcriptInfo.tab + touch fi_workdir/${prefix}.fa + touch fi_workdir/${prefix}.fusion_preds.coalesced.summary + touch fi_workdir/${prefix}.fusion_preds.coalesced.summary.EMadj + touch fi_workdir/${prefix}.fusion_preds.coalesced.summary.EMadj.min_frag_thresh + touch fi_workdir/${prefix}.fusion_preds.coalesced.summary.EMadj.min_frag_thresh.wSpliceInfo + touch fi_workdir/${prefix}.fusion_preds.coalesced.summary.EMadj.min_frag_thresh.wSpliceInfo.post_blast_filter + touch fi_workdir/${prefix}.fusion_preds.coalesced.summary.EMadj.min_frag_thresh.wSpliceInfo.post_blast_filter.info + touch fi_workdir/${prefix}.fusion_preds.coalesced.summary.EMadj.min_frag_thresh.wSpliceInfo.post_blast_filter.post_promisc_filter + touch fi_workdir/${prefix}.fusion_preds.coalesced.summary.EMadj.min_frag_thresh.wSpliceInfo.post_blast_filter.post_promisc_filter.info + touch fi_workdir/${prefix}.fusion_preds.coalesced.summary.fusion_junction_read_accs + touch fi_workdir/${prefix}.fusion_preds.coalesced.summary.fusion_spanning_read_accs + touch fi_workdir/${prefix}.gtf + touch fi_workdir/${prefix}.igv.Pfam.gff3 + touch fi_workdir/${prefix}.igv.seqsimilar.gff3 + touch fi_workdir/${prefix}.post_blast_and_promiscuity_filter + touch fi_workdir/${prefix}.post_blast_and_promiscuity_filter.EMadj + touch fi_workdir/${prefix}.post_blast_and_promiscuity_filter.EMadj.FFPM + touch fi_workdir/${prefix}.post_blast_and_promiscuity_filter.EMadj.FFPM.wMicroH + touch fi_workdir/${prefix}.post_blast_and_promiscuity_filter.EMadj.FFPM.wMicroH.annotated + touch fi_workdir/${prefix}.post_blast_and_promiscuity_filter.EMadj.FFPM.wMicroH.annotated.coding_effect + touch fi_workdir/${prefix}.star.cSorted.dupsMarked.bam + touch fi_workdir/${prefix}.star.cSorted.dupsMarked.bam.bai + touch fi_workdir/${prefix}.star.cSorted.dupsMarked.bam.failed_reads_during_span_analysis + touch fi_workdir/${prefix}.star.cSorted.dupsMarked.bam.fusion_junc_reads.sam + touch fi_workdir/${prefix}.star.cSorted.dupsMarked.bam.fusion_junction_info + touch fi_workdir/${prefix}.star.cSorted.dupsMarked.bam.fusion_spanning_info + touch fi_workdir/${prefix}.star.cSorted.dupsMarked.bam.fusion_span_reads.sam + touch fi_workdir/${prefix}.star.cSorted.dupsMarked.bam.read_align_counts.idx + touch fi_workdir/${prefix}.star.cSorted.dupsMarked.bam.read_align_counts.idx.ok + touch fi_workdir/${prefix}.star.cSorted.dupsMarked.bam.spanning_reads_want.idx + touch fi_workdir/${prefix}.star.sortedByCoord.out.bam + touch fi_workdir/${prefix}.star.sortedByCoord.out.bam.bai + touch fi_workdir/${prefix}.star.sortedByCoord.out.bam.bai.ok + touch fi_workdir/${prefix}.star.sortedByCoord.out.bam.ok + mkdir -p IGV_inputs + touch IGV_inputs/cytoBand.txt + touch IGV_inputs/${prefix}.bed + gzip -c < /dev/null > IGV_inputs/${prefix}.bed.sorted.bed.gz + touch IGV_inputs/${prefix}.bed.sorted.bed.gz.tbi + touch IGV_inputs/${prefix}.consolidated.bam + touch IGV_inputs/${prefix}.consolidated.bam.bai + touch IGV_inputs/${prefix}.fa + touch IGV_inputs/${prefix}.fa.fai + touch IGV_inputs/${prefix}.fusion_inspector_web.json + touch IGV_inputs/${prefix}.gtf + touch IGV_inputs/${prefix}.igv.Pfam.bed + touch IGV_inputs/${prefix}.igv.seqsimilar.bed + touch IGV_inputs/${prefix}.junction_reads.bam + touch IGV_inputs/${prefix}.junction_reads.bam.bai + touch IGV_inputs/${prefix}.ROI.bed + touch IGV_inputs/${prefix}.spanning_reads.bam + touch IGV_inputs/${prefix}.spanning_reads.bam.bai + touch IGV_inputs/tracks.json + touch IGV_inputs/TrinityFusion.bed + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + STAR-Fusion: \$(STAR-Fusion --version 2>&1 | grep -i 'version' | sed 's/STAR-Fusion version: //') + END_VERSIONS + """ +} diff --git a/modules/local/fusioninspector/meta.yml b/modules/local/fusioninspector/meta.yml new file mode 100644 index 00000000..cc03239b --- /dev/null +++ b/modules/local/fusioninspector/meta.yml @@ -0,0 +1,40 @@ +name: fusioninspector +description: Validation of Fusion Transcript Predictions +keywords: + - fusioninspector +tools: + - fusioninspector: + description: Validation of Fusion Transcript Predictions + homepage: https://github.com/FusionInspector/FusionInspector + documentation: https://github.com/FusionInspector/FusionInspector/wiki + tool_dev_url: https://github.com/FusionInspector/FusionInspector + doi: 10.1101/2021.08.02.454639" + licence: https://github.com/FusionInspector/FusionInspector/blob/master/LICENSE.txt + +input: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - reads: + type: file + description: FASTQ file + pattern: "*.{fastq*}" + - reference: + type: directory + description: Path to ctat references + pattern: "*" + +output: + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" + - reference: + type: directory + description: Genome resource path + pattern: "*" + +authors: + - "@rannick" diff --git a/modules/local/fusioninspector/tests/main.nf.test b/modules/local/fusioninspector/tests/main.nf.test new file mode 100644 index 00000000..26a7ca6d --- /dev/null +++ b/modules/local/fusioninspector/tests/main.nf.test @@ -0,0 +1,147 @@ +nextflow_process { + + name "Test Process FUSIONINSPECTOR" + script "../main.nf" + process "FUSIONINSPECTOR" + tag "modules" + tag "modules_local" + tag "fusioninspector" + + test("FUSIONINSPECTOR") { + config './nextflow.config' + + setup { + run("STARFUSION_BUILD") { + script "../../starfusion/build/main.nf" + process { + """ + input[0] = [ + [ id:'minigenome fasta' ], + file("https://github.com/STAR-Fusion/STAR-Fusion-Tutorial/raw/master/minigenome.fa") + ] + input[1] = [ + [ id:'minigenome gtf' ], + file("https://github.com/STAR-Fusion/STAR-Fusion-Tutorial/raw/master/minigenome.gtf") + ] + + input[2] = file("https://github.com/STAR-Fusion/STAR-Fusion-Tutorial/raw/master/CTAT_HumanFusionLib.mini.dat.gz") + input[3] = "human" + """ + } + } + } + + when { + process { + """ + input[0] = [ + [ id:'test', single_end:false ], // meta map + [ + file("https://github.com/STAR-Fusion/STAR-Fusion-Tutorial/raw/v0.0.1/rnaseq_1.fastq.gz"), + file("https://github.com/STAR-Fusion/STAR-Fusion-Tutorial/raw/v0.0.1/rnaseq_2.fastq.gz") + ], // reads + [ + file("https://github.com/FusionInspector/FusionInspector/raw/master/test/fusion_targets.A.txt") + ] + ] + input[1] = STARFUSION_BUILD.out.reference.map { it[1] } + """ + } + } + + then { + + def unstableFiles = [ + "test.bed", + "test.consolidated.bam", + "test.consolidated.bam.bai", + "test.junction_reads.bam", + "test.junction_reads.bam.bai", + "test.spanning_reads.bam", + "Log.final.out", + "Log.out", + "microH.dat", + "test.fusion_preds.coalesced.summary", + "test.fusion_preds.coalesced.summary.EMadj", + "test.fusion_preds.coalesced.summary.min_frag_thresh", + "test.fusion_preds.coalesced.summary.wSpliceInfo", + "test.fusion_preds.coalesced.summary.wSpliceInfo.post_blast_filter", + "test.fusion_preds.coalesced.summary.wSpliceInfo.post_blast_filter.post_promisc_filter", + "test.fusion_preds.coalesced.summary.fusion_junction_reads_acc", + "test.fusion_preds.coalesced.summary.fusion_spanning_reads_acc", + "test.post_blast_and_promiscuity_filter", + "test.post_blast_and_promiscuity_filter.EMadj", + "test.post_blast_and_promiscuity_filter.EMadj.FFPM", + "test.post_blast_and_promiscuity_filter.EMadj.FFPM.wMicroH", + "test.post_blast_and_promiscuity_filter.EMadj.FFPM.wMicroH.annotated", + "test.post_blast_and_promiscuity_filter.EMadj.FFPM.wMicroH.annotated.coding_effect", + "test.star.cSorted.dupsMarked.bam", + "test.star.cSorted.dupsMarked.bam.bai", + "test.star.cSorted.dupsMarked.bam.fusion_junction_info", + "test.star.cSorted.dupsMarked.bam.fusion_spanning_info", + "test.star.cSorted.dupsMarked.bam.read_align_counts", + "test.star.cSorted.dupsMarked.bam.read_align_counts.idx", + "test.star.cSorted.dupsMarked.bam.spanning_reads_want.idx", + "test.star.sortedByCoord.out.bam", + "test.star.sortedByCoord.out.bam.bai", + "test.fusion_preds.coalesced.summary.EMadj.min_frag_thresh", + "test.fusion_preds.coalesced.summary.fusion_junction_read_accs", + "test.fusion_preds.coalesced.summary.fusion_spanning_read_accs", + "test.fusion_preds.coalesced.summary.EMadj.min_frag_thresh.wSpliceInfo", + "test.fusion_preds.coalesced.summary.EMadj.min_frag_thresh.wSpliceInfo.post_blast_filter", + "test.fusion_preds.coalesced.summary.EMadj.min_frag_thresh.wSpliceInfo.post_blast_filter.post_promisc_filter" + ] + + assertAll( + { assert process.success }, + { assert snapshot( + file(process.out.tsv[0][1]).name, + process.out.tsv_coding_effect, // TODO: These files do not seem to be produced. Consult Annick. + process.out.out_gtf, // TODO: These files do not seem to be produced. Consult Annick. + file(process.out.log[0][1]).name, + file(process.out.html[0][1]).name, + file(process.out.abridged_tsv[0][1]).name, + file(process.out.igv_inputs[0][1]).listFiles().findAll { file -> !unstableFiles.contains(file.name) }, + file(process.out.fi_workdir[0][1]).listFiles().findAll { file -> !unstableFiles.contains(file.name) }, + file(process.out.igv_inputs[0][1]).list().findAll { file -> !unstableFiles.contains(file) }, + file(process.out.fi_workdir[0][1]).list().findAll { file -> !unstableFiles.contains(file) }, + process.out.chckpts_dir, + process.out.versions + ).match() } + ) + } + + } + + test("FUSIONINSPECTOR stub") { + options "-stub" + when { + process { + """ + input[0] = [ + [ id:'test', single_end:false ], // meta map + [ + file("https://github.com/STAR-Fusion/STAR-Fusion-Tutorial/raw/v0.0.1/rnaseq_1.fastq.gz"), + file("https://github.com/STAR-Fusion/STAR-Fusion-Tutorial/raw/v0.0.1/rnaseq_2.fastq.gz") + ], // reads + [ + file("https://github.com/FusionInspector/FusionInspector/raw/master/test/fusion_targets.A.txt") + ] + ] + input[1] = [] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot( + process.out + ).match() } + ) + } + + } + +} diff --git a/modules/local/fusioninspector/tests/main.nf.test.snap b/modules/local/fusioninspector/tests/main.nf.test.snap new file mode 100644 index 00000000..6e3f05c2 --- /dev/null +++ b/modules/local/fusioninspector/tests/main.nf.test.snap @@ -0,0 +1,538 @@ +{ + "FUSIONINSPECTOR stub": { + "content": [ + { + "0": [ + [ + { + "id": "test", + "single_end": false + }, + "test.FusionInspector.fusions.tsv:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "1": [ + + ], + "2": [ + + ], + "3": [ + [ + { + "id": "test", + "single_end": false + }, + "test.FusionInspector.log:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "4": [ + [ + { + "id": "test", + "single_end": false + }, + "test.fusion_inspector_web.html:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "5": [ + [ + { + "id": "test", + "single_end": false + }, + "test.FusionInspector.fusions.abridged.tsv:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "6": [ + [ + { + "id": "test", + "single_end": false + }, + [ + "TrinityFusion.bed:md5,d41d8cd98f00b204e9800998ecf8427e", + "cytoBand.txt:md5,d41d8cd98f00b204e9800998ecf8427e", + "test.ROI.bed:md5,d41d8cd98f00b204e9800998ecf8427e", + "test.bed:md5,d41d8cd98f00b204e9800998ecf8427e", + "test.bed.sorted.bed.gz:md5,d41d8cd98f00b204e9800998ecf8427e", + "test.bed.sorted.bed.gz.tbi:md5,d41d8cd98f00b204e9800998ecf8427e", + "test.consolidated.bam:md5,d41d8cd98f00b204e9800998ecf8427e", + "test.consolidated.bam.bai:md5,d41d8cd98f00b204e9800998ecf8427e", + "test.fa:md5,d41d8cd98f00b204e9800998ecf8427e", + "test.fa.fai:md5,d41d8cd98f00b204e9800998ecf8427e", + "test.fusion_inspector_web.json:md5,d41d8cd98f00b204e9800998ecf8427e", + "test.gtf:md5,d41d8cd98f00b204e9800998ecf8427e", + "test.igv.Pfam.bed:md5,d41d8cd98f00b204e9800998ecf8427e", + "test.igv.seqsimilar.bed:md5,d41d8cd98f00b204e9800998ecf8427e", + "test.junction_reads.bam:md5,d41d8cd98f00b204e9800998ecf8427e", + "test.junction_reads.bam.bai:md5,d41d8cd98f00b204e9800998ecf8427e", + "test.spanning_reads.bam:md5,d41d8cd98f00b204e9800998ecf8427e", + "test.spanning_reads.bam.bai:md5,d41d8cd98f00b204e9800998ecf8427e", + "tracks.json:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ] + ], + "7": [ + [ + { + "id": "test", + "single_end": false + }, + [ + "Log.final.out:md5,d41d8cd98f00b204e9800998ecf8427e", + "Log.out:md5,d41d8cd98f00b204e9800998ecf8427e", + "Log.progress.out:md5,d41d8cd98f00b204e9800998ecf8427e", + "SJ.out.tab:md5,d41d8cd98f00b204e9800998ecf8427e", + [ + "exonGeTrInfo.tab:md5,d41d8cd98f00b204e9800998ecf8427e", + "exonInfo.tab:md5,d41d8cd98f00b204e9800998ecf8427e", + "geneInfo.tab:md5,d41d8cd98f00b204e9800998ecf8427e", + "sjdbInfo.txt:md5,d41d8cd98f00b204e9800998ecf8427e", + "sjdbList.fromGTF.out.tab:md5,d41d8cd98f00b204e9800998ecf8427e", + "sjdbList.out.tab:md5,d41d8cd98f00b204e9800998ecf8427e", + "transcriptInfo.tab:md5,d41d8cd98f00b204e9800998ecf8427e" + ], + "microH.dat:md5,d41d8cd98f00b204e9800998ecf8427e", + "pipeliner.456.cmds:md5,d41d8cd98f00b204e9800998ecf8427e", + "star_align.ok:md5,d41d8cd98f00b204e9800998ecf8427e", + "test.fa:md5,d41d8cd98f00b204e9800998ecf8427e", + "test.fusion_preds.coalesced.summary:md5,d41d8cd98f00b204e9800998ecf8427e", + "test.fusion_preds.coalesced.summary.EMadj:md5,d41d8cd98f00b204e9800998ecf8427e", + "test.fusion_preds.coalesced.summary.EMadj.min_frag_thresh:md5,d41d8cd98f00b204e9800998ecf8427e", + "test.fusion_preds.coalesced.summary.EMadj.min_frag_thresh.wSpliceInfo:md5,d41d8cd98f00b204e9800998ecf8427e", + "test.fusion_preds.coalesced.summary.EMadj.min_frag_thresh.wSpliceInfo.post_blast_filter:md5,d41d8cd98f00b204e9800998ecf8427e", + "test.fusion_preds.coalesced.summary.EMadj.min_frag_thresh.wSpliceInfo.post_blast_filter.info:md5,d41d8cd98f00b204e9800998ecf8427e", + "test.fusion_preds.coalesced.summary.EMadj.min_frag_thresh.wSpliceInfo.post_blast_filter.post_promisc_filter:md5,d41d8cd98f00b204e9800998ecf8427e", + "test.fusion_preds.coalesced.summary.EMadj.min_frag_thresh.wSpliceInfo.post_blast_filter.post_promisc_filter.info:md5,d41d8cd98f00b204e9800998ecf8427e", + "test.fusion_preds.coalesced.summary.fusion_junction_read_accs:md5,d41d8cd98f00b204e9800998ecf8427e", + "test.fusion_preds.coalesced.summary.fusion_spanning_read_accs:md5,d41d8cd98f00b204e9800998ecf8427e", + "test.gtf:md5,d41d8cd98f00b204e9800998ecf8427e", + "test.igv.Pfam.gff3:md5,d41d8cd98f00b204e9800998ecf8427e", + "test.igv.seqsimilar.gff3:md5,d41d8cd98f00b204e9800998ecf8427e", + "test.post_blast_and_promiscuity_filter:md5,d41d8cd98f00b204e9800998ecf8427e", + "test.post_blast_and_promiscuity_filter.EMadj:md5,d41d8cd98f00b204e9800998ecf8427e", + "test.post_blast_and_promiscuity_filter.EMadj.FFPM:md5,d41d8cd98f00b204e9800998ecf8427e", + "test.post_blast_and_promiscuity_filter.EMadj.FFPM.wMicroH:md5,d41d8cd98f00b204e9800998ecf8427e", + "test.post_blast_and_promiscuity_filter.EMadj.FFPM.wMicroH.annotated:md5,d41d8cd98f00b204e9800998ecf8427e", + "test.post_blast_and_promiscuity_filter.EMadj.FFPM.wMicroH.annotated.coding_effect:md5,d41d8cd98f00b204e9800998ecf8427e", + "test.star.cSorted.dupsMarked.bam:md5,d41d8cd98f00b204e9800998ecf8427e", + "test.star.cSorted.dupsMarked.bam.bai:md5,d41d8cd98f00b204e9800998ecf8427e", + "test.star.cSorted.dupsMarked.bam.failed_reads_during_span_analysis:md5,d41d8cd98f00b204e9800998ecf8427e", + "test.star.cSorted.dupsMarked.bam.fusion_junc_reads.sam:md5,d41d8cd98f00b204e9800998ecf8427e", + "test.star.cSorted.dupsMarked.bam.fusion_junction_info:md5,d41d8cd98f00b204e9800998ecf8427e", + "test.star.cSorted.dupsMarked.bam.fusion_span_reads.sam:md5,d41d8cd98f00b204e9800998ecf8427e", + "test.star.cSorted.dupsMarked.bam.fusion_spanning_info:md5,d41d8cd98f00b204e9800998ecf8427e", + "test.star.cSorted.dupsMarked.bam.read_align_counts.idx:md5,d41d8cd98f00b204e9800998ecf8427e", + "test.star.cSorted.dupsMarked.bam.read_align_counts.idx.ok:md5,d41d8cd98f00b204e9800998ecf8427e", + "test.star.cSorted.dupsMarked.bam.spanning_reads_want.idx:md5,d41d8cd98f00b204e9800998ecf8427e", + "test.star.sortedByCoord.out.bam:md5,d41d8cd98f00b204e9800998ecf8427e", + "test.star.sortedByCoord.out.bam.bai:md5,d41d8cd98f00b204e9800998ecf8427e", + "test.star.sortedByCoord.out.bam.bai.ok:md5,d41d8cd98f00b204e9800998ecf8427e", + "test.star.sortedByCoord.out.bam.ok:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ] + ], + "8": [ + [ + { + "id": "test", + "single_end": false + }, + [ + "EM_adj_counts.ok:md5,d41d8cd98f00b204e9800998ecf8427e", + "add_FFPM.ok:md5,d41d8cd98f00b204e9800998ecf8427e", + "add_splice_info.ok:md5,d41d8cd98f00b204e9800998ecf8427e", + "append_microH_info.ok:md5,d41d8cd98f00b204e9800998ecf8427e", + "blast_filter.ok:md5,d41d8cd98f00b204e9800998ecf8427e", + "coalesce_junc_n_span.ok:md5,d41d8cd98f00b204e9800998ecf8427e", + "cp_consol_bam.ok:md5,d41d8cd98f00b204e9800998ecf8427e", + "cp_contigs_file_workdir:md5,d41d8cd98f00b204e9800998ecf8427e", + "cp_final.ok:md5,d41d8cd98f00b204e9800998ecf8427e", + "cp_gtf_file_workdir.ok:md5,d41d8cd98f00b204e9800998ecf8427e", + "cp_tracks_json.ok:md5,d41d8cd98f00b204e9800998ecf8427e", + "create_fi_igvjs.ok:md5,d41d8cd98f00b204e9800998ecf8427e", + "cytoband.ok:md5,d41d8cd98f00b204e9800998ecf8427e", + "filter_by_frag_threshs.ok:md5,d41d8cd98f00b204e9800998ecf8427e", + "final.abridged.ok:md5,d41d8cd98f00b204e9800998ecf8427e", + "fusion_annotator.ok:md5,d41d8cd98f00b204e9800998ecf8427e", + "fusion_coding_region_effect.ok:md5,d41d8cd98f00b204e9800998ecf8427e", + "fusion_contigs.ok:md5,d41d8cd98f00b204e9800998ecf8427e", + "fusion_reports_html.ok:md5,d41d8cd98f00b204e9800998ecf8427e", + "get_fusion_JUNCTION_reads_from_bam.ok:md5,d41d8cd98f00b204e9800998ecf8427e", + "get_fusion_SPANNING_reads_from_bam.ok:md5,d41d8cd98f00b204e9800998ecf8427e", + "index_consol_bam.ok:md5,d41d8cd98f00b204e9800998ecf8427e", + "init_EM_adj_counts.ok:md5,d41d8cd98f00b204e9800998ecf8427e", + "init_spanning_reads_bam.ok:md5,d41d8cd98f00b204e9800998ecf8427e", + "mark_dup_reads.ok:md5,d41d8cd98f00b204e9800998ecf8427e", + "mark_dups_reads.index.ok:md5,d41d8cd98f00b204e9800998ecf8427e", + "merged_contig_fai.ok:md5,d41d8cd98f00b204e9800998ecf8427e", + "merged_contig_gtf_to_bed.ok:md5,d41d8cd98f00b204e9800998ecf8427e", + "microH.dat.ok:md5,d41d8cd98f00b204e9800998ecf8427e", + "prep_igv_extract_junc_reads.ok:md5,d41d8cd98f00b204e9800998ecf8427e", + "prep_igv_junc_reads_bam.ok:md5,d41d8cd98f00b204e9800998ecf8427e", + "prep_igv_pfam_bed.ok:md5,d41d8cd98f00b204e9800998ecf8427e", + "prep_igv_pfam_gff3.ok:md5,d41d8cd98f00b204e9800998ecf8427e", + "prep_igv_seqsim_bed.ok:md5,d41d8cd98f00b204e9800998ecf8427e", + "prep_igv_seqsim_gff3.ok:md5,d41d8cd98f00b204e9800998ecf8427e", + "prep_spanning_reads.ok:md5,d41d8cd98f00b204e9800998ecf8427e", + "run_STAR.ok:md5,d41d8cd98f00b204e9800998ecf8427e", + "samtools_idx_junc_reads_bam.ok:md5,d41d8cd98f00b204e9800998ecf8427e", + "samtools_index_span_reads_bam.ok:md5,d41d8cd98f00b204e9800998ecf8427e", + "span_reads_acc.ok:md5,d41d8cd98f00b204e9800998ecf8427e", + "test.bed.bedsort.ok:md5,d41d8cd98f00b204e9800998ecf8427e", + "test.bed.bgzip.ok:md5,d41d8cd98f00b204e9800998ecf8427e", + "test.bed.tabix.ok:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ] + ], + "9": [ + "versions.yml:md5,ad8d0da5929dfa70794bccf8765ddb23" + ], + "abridged_tsv": [ + [ + { + "id": "test", + "single_end": false + }, + "test.FusionInspector.fusions.abridged.tsv:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "chckpts_dir": [ + [ + { + "id": "test", + "single_end": false + }, + [ + "EM_adj_counts.ok:md5,d41d8cd98f00b204e9800998ecf8427e", + "add_FFPM.ok:md5,d41d8cd98f00b204e9800998ecf8427e", + "add_splice_info.ok:md5,d41d8cd98f00b204e9800998ecf8427e", + "append_microH_info.ok:md5,d41d8cd98f00b204e9800998ecf8427e", + "blast_filter.ok:md5,d41d8cd98f00b204e9800998ecf8427e", + "coalesce_junc_n_span.ok:md5,d41d8cd98f00b204e9800998ecf8427e", + "cp_consol_bam.ok:md5,d41d8cd98f00b204e9800998ecf8427e", + "cp_contigs_file_workdir:md5,d41d8cd98f00b204e9800998ecf8427e", + "cp_final.ok:md5,d41d8cd98f00b204e9800998ecf8427e", + "cp_gtf_file_workdir.ok:md5,d41d8cd98f00b204e9800998ecf8427e", + "cp_tracks_json.ok:md5,d41d8cd98f00b204e9800998ecf8427e", + "create_fi_igvjs.ok:md5,d41d8cd98f00b204e9800998ecf8427e", + "cytoband.ok:md5,d41d8cd98f00b204e9800998ecf8427e", + "filter_by_frag_threshs.ok:md5,d41d8cd98f00b204e9800998ecf8427e", + "final.abridged.ok:md5,d41d8cd98f00b204e9800998ecf8427e", + "fusion_annotator.ok:md5,d41d8cd98f00b204e9800998ecf8427e", + "fusion_coding_region_effect.ok:md5,d41d8cd98f00b204e9800998ecf8427e", + "fusion_contigs.ok:md5,d41d8cd98f00b204e9800998ecf8427e", + "fusion_reports_html.ok:md5,d41d8cd98f00b204e9800998ecf8427e", + "get_fusion_JUNCTION_reads_from_bam.ok:md5,d41d8cd98f00b204e9800998ecf8427e", + "get_fusion_SPANNING_reads_from_bam.ok:md5,d41d8cd98f00b204e9800998ecf8427e", + "index_consol_bam.ok:md5,d41d8cd98f00b204e9800998ecf8427e", + "init_EM_adj_counts.ok:md5,d41d8cd98f00b204e9800998ecf8427e", + "init_spanning_reads_bam.ok:md5,d41d8cd98f00b204e9800998ecf8427e", + "mark_dup_reads.ok:md5,d41d8cd98f00b204e9800998ecf8427e", + "mark_dups_reads.index.ok:md5,d41d8cd98f00b204e9800998ecf8427e", + "merged_contig_fai.ok:md5,d41d8cd98f00b204e9800998ecf8427e", + "merged_contig_gtf_to_bed.ok:md5,d41d8cd98f00b204e9800998ecf8427e", + "microH.dat.ok:md5,d41d8cd98f00b204e9800998ecf8427e", + "prep_igv_extract_junc_reads.ok:md5,d41d8cd98f00b204e9800998ecf8427e", + "prep_igv_junc_reads_bam.ok:md5,d41d8cd98f00b204e9800998ecf8427e", + "prep_igv_pfam_bed.ok:md5,d41d8cd98f00b204e9800998ecf8427e", + "prep_igv_pfam_gff3.ok:md5,d41d8cd98f00b204e9800998ecf8427e", + "prep_igv_seqsim_bed.ok:md5,d41d8cd98f00b204e9800998ecf8427e", + "prep_igv_seqsim_gff3.ok:md5,d41d8cd98f00b204e9800998ecf8427e", + "prep_spanning_reads.ok:md5,d41d8cd98f00b204e9800998ecf8427e", + "run_STAR.ok:md5,d41d8cd98f00b204e9800998ecf8427e", + "samtools_idx_junc_reads_bam.ok:md5,d41d8cd98f00b204e9800998ecf8427e", + "samtools_index_span_reads_bam.ok:md5,d41d8cd98f00b204e9800998ecf8427e", + "span_reads_acc.ok:md5,d41d8cd98f00b204e9800998ecf8427e", + "test.bed.bedsort.ok:md5,d41d8cd98f00b204e9800998ecf8427e", + "test.bed.bgzip.ok:md5,d41d8cd98f00b204e9800998ecf8427e", + "test.bed.tabix.ok:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ] + ], + "fi_workdir": [ + [ + { + "id": "test", + "single_end": false + }, + [ + "Log.final.out:md5,d41d8cd98f00b204e9800998ecf8427e", + "Log.out:md5,d41d8cd98f00b204e9800998ecf8427e", + "Log.progress.out:md5,d41d8cd98f00b204e9800998ecf8427e", + "SJ.out.tab:md5,d41d8cd98f00b204e9800998ecf8427e", + [ + "exonGeTrInfo.tab:md5,d41d8cd98f00b204e9800998ecf8427e", + "exonInfo.tab:md5,d41d8cd98f00b204e9800998ecf8427e", + "geneInfo.tab:md5,d41d8cd98f00b204e9800998ecf8427e", + "sjdbInfo.txt:md5,d41d8cd98f00b204e9800998ecf8427e", + "sjdbList.fromGTF.out.tab:md5,d41d8cd98f00b204e9800998ecf8427e", + "sjdbList.out.tab:md5,d41d8cd98f00b204e9800998ecf8427e", + "transcriptInfo.tab:md5,d41d8cd98f00b204e9800998ecf8427e" + ], + "microH.dat:md5,d41d8cd98f00b204e9800998ecf8427e", + "pipeliner.456.cmds:md5,d41d8cd98f00b204e9800998ecf8427e", + "star_align.ok:md5,d41d8cd98f00b204e9800998ecf8427e", + "test.fa:md5,d41d8cd98f00b204e9800998ecf8427e", + "test.fusion_preds.coalesced.summary:md5,d41d8cd98f00b204e9800998ecf8427e", + "test.fusion_preds.coalesced.summary.EMadj:md5,d41d8cd98f00b204e9800998ecf8427e", + "test.fusion_preds.coalesced.summary.EMadj.min_frag_thresh:md5,d41d8cd98f00b204e9800998ecf8427e", + "test.fusion_preds.coalesced.summary.EMadj.min_frag_thresh.wSpliceInfo:md5,d41d8cd98f00b204e9800998ecf8427e", + "test.fusion_preds.coalesced.summary.EMadj.min_frag_thresh.wSpliceInfo.post_blast_filter:md5,d41d8cd98f00b204e9800998ecf8427e", + "test.fusion_preds.coalesced.summary.EMadj.min_frag_thresh.wSpliceInfo.post_blast_filter.info:md5,d41d8cd98f00b204e9800998ecf8427e", + "test.fusion_preds.coalesced.summary.EMadj.min_frag_thresh.wSpliceInfo.post_blast_filter.post_promisc_filter:md5,d41d8cd98f00b204e9800998ecf8427e", + "test.fusion_preds.coalesced.summary.EMadj.min_frag_thresh.wSpliceInfo.post_blast_filter.post_promisc_filter.info:md5,d41d8cd98f00b204e9800998ecf8427e", + "test.fusion_preds.coalesced.summary.fusion_junction_read_accs:md5,d41d8cd98f00b204e9800998ecf8427e", + "test.fusion_preds.coalesced.summary.fusion_spanning_read_accs:md5,d41d8cd98f00b204e9800998ecf8427e", + "test.gtf:md5,d41d8cd98f00b204e9800998ecf8427e", + "test.igv.Pfam.gff3:md5,d41d8cd98f00b204e9800998ecf8427e", + "test.igv.seqsimilar.gff3:md5,d41d8cd98f00b204e9800998ecf8427e", + "test.post_blast_and_promiscuity_filter:md5,d41d8cd98f00b204e9800998ecf8427e", + "test.post_blast_and_promiscuity_filter.EMadj:md5,d41d8cd98f00b204e9800998ecf8427e", + "test.post_blast_and_promiscuity_filter.EMadj.FFPM:md5,d41d8cd98f00b204e9800998ecf8427e", + "test.post_blast_and_promiscuity_filter.EMadj.FFPM.wMicroH:md5,d41d8cd98f00b204e9800998ecf8427e", + "test.post_blast_and_promiscuity_filter.EMadj.FFPM.wMicroH.annotated:md5,d41d8cd98f00b204e9800998ecf8427e", + "test.post_blast_and_promiscuity_filter.EMadj.FFPM.wMicroH.annotated.coding_effect:md5,d41d8cd98f00b204e9800998ecf8427e", + "test.star.cSorted.dupsMarked.bam:md5,d41d8cd98f00b204e9800998ecf8427e", + "test.star.cSorted.dupsMarked.bam.bai:md5,d41d8cd98f00b204e9800998ecf8427e", + "test.star.cSorted.dupsMarked.bam.failed_reads_during_span_analysis:md5,d41d8cd98f00b204e9800998ecf8427e", + "test.star.cSorted.dupsMarked.bam.fusion_junc_reads.sam:md5,d41d8cd98f00b204e9800998ecf8427e", + "test.star.cSorted.dupsMarked.bam.fusion_junction_info:md5,d41d8cd98f00b204e9800998ecf8427e", + "test.star.cSorted.dupsMarked.bam.fusion_span_reads.sam:md5,d41d8cd98f00b204e9800998ecf8427e", + "test.star.cSorted.dupsMarked.bam.fusion_spanning_info:md5,d41d8cd98f00b204e9800998ecf8427e", + "test.star.cSorted.dupsMarked.bam.read_align_counts.idx:md5,d41d8cd98f00b204e9800998ecf8427e", + "test.star.cSorted.dupsMarked.bam.read_align_counts.idx.ok:md5,d41d8cd98f00b204e9800998ecf8427e", + "test.star.cSorted.dupsMarked.bam.spanning_reads_want.idx:md5,d41d8cd98f00b204e9800998ecf8427e", + "test.star.sortedByCoord.out.bam:md5,d41d8cd98f00b204e9800998ecf8427e", + "test.star.sortedByCoord.out.bam.bai:md5,d41d8cd98f00b204e9800998ecf8427e", + "test.star.sortedByCoord.out.bam.bai.ok:md5,d41d8cd98f00b204e9800998ecf8427e", + "test.star.sortedByCoord.out.bam.ok:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ] + ], + "html": [ + [ + { + "id": "test", + "single_end": false + }, + "test.fusion_inspector_web.html:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "igv_inputs": [ + [ + { + "id": "test", + "single_end": false + }, + [ + "TrinityFusion.bed:md5,d41d8cd98f00b204e9800998ecf8427e", + "cytoBand.txt:md5,d41d8cd98f00b204e9800998ecf8427e", + "test.ROI.bed:md5,d41d8cd98f00b204e9800998ecf8427e", + "test.bed:md5,d41d8cd98f00b204e9800998ecf8427e", + "test.bed.sorted.bed.gz:md5,d41d8cd98f00b204e9800998ecf8427e", + "test.bed.sorted.bed.gz.tbi:md5,d41d8cd98f00b204e9800998ecf8427e", + "test.consolidated.bam:md5,d41d8cd98f00b204e9800998ecf8427e", + "test.consolidated.bam.bai:md5,d41d8cd98f00b204e9800998ecf8427e", + "test.fa:md5,d41d8cd98f00b204e9800998ecf8427e", + "test.fa.fai:md5,d41d8cd98f00b204e9800998ecf8427e", + "test.fusion_inspector_web.json:md5,d41d8cd98f00b204e9800998ecf8427e", + "test.gtf:md5,d41d8cd98f00b204e9800998ecf8427e", + "test.igv.Pfam.bed:md5,d41d8cd98f00b204e9800998ecf8427e", + "test.igv.seqsimilar.bed:md5,d41d8cd98f00b204e9800998ecf8427e", + "test.junction_reads.bam:md5,d41d8cd98f00b204e9800998ecf8427e", + "test.junction_reads.bam.bai:md5,d41d8cd98f00b204e9800998ecf8427e", + "test.spanning_reads.bam:md5,d41d8cd98f00b204e9800998ecf8427e", + "test.spanning_reads.bam.bai:md5,d41d8cd98f00b204e9800998ecf8427e", + "tracks.json:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ] + ], + "log": [ + [ + { + "id": "test", + "single_end": false + }, + "test.FusionInspector.log:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "out_gtf": [ + + ], + "tsv": [ + [ + { + "id": "test", + "single_end": false + }, + "test.FusionInspector.fusions.tsv:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "tsv_coding_effect": [ + + ], + "versions": [ + "versions.yml:md5,ad8d0da5929dfa70794bccf8765ddb23" + ] + } + ], + "meta": { + "nf-test": "0.9.2", + "nextflow": "24.10.3" + }, + "timestamp": "2024-12-31T13:48:13.020571472" + }, + "FUSIONINSPECTOR": { + "content": [ + "test.FusionInspector.fusions.tsv", + [ + + ], + [ + + ], + "FusionInspector.log", + "test.fusion_inspector_web.html", + "test.FusionInspector.fusions.abridged.tsv", + [ + "test.fa:md5,a54bec9daad731a086e8533f89d9c2e6", + "test.gtf:md5,d523061bd8b443a014d4cca9406ec772", + "test.fa.fai:md5,15ddfba09f4abf8a15de5eea6c6286ee", + "cytoBand.txt:md5,d8a84202fa42d9455222c94ae641c8b4", + "test.bed.sorted.bed.gz:md5,a306900912cc4479f06d61bea00a3ea6", + "test.bed.sorted.bed.gz.tbi:md5,8c9d524558ade2b82c67d60bcfa0de6b", + "test.spanning_reads.bam.bai:md5,6e035943a5c90026bd499e78c7eeed4a", + "test.igv.Pfam.bed:md5,d41d8cd98f00b204e9800998ecf8427e", + "test.igv.seqsimilar.bed:md5,d41d8cd98f00b204e9800998ecf8427e", + "test.fusion_inspector_web.json:md5,ebe4faf6b6133b40d6c2847195cf55a9", + "test.ROI.bed:md5,671de301af27b3dab40ee224401d3a0e", + "TrinityFusion.bed:md5,d41d8cd98f00b204e9800998ecf8427e", + "tracks.json:md5,72f2234c2ad2b7997226283acf040161" + ], + [ + "test.fa:md5,a54bec9daad731a086e8533f89d9c2e6", + "test.gtf:md5,d523061bd8b443a014d4cca9406ec772", + "pipeliner.456.cmds:md5,bb8bf0e749f794dfb37efc2ff7c24be0", + "Log.progress.out:md5,b2bd061d6cbaaf3d6d3b1fed547f69b8", + [ + "exonGeTrInfo.tab:md5,8815dc1fce3a5f6cb836ea8f822e9c81", + "exonInfo.tab:md5,e51c649a0ca61719e8d4f7324c069c60", + "geneInfo.tab:md5,2f334fb0772c906860c9489ee0d8f3ff", + "sjdbInfo.txt:md5,3a27e9e4b5f402d3daed85dd6f06519e", + "sjdbList.fromGTF.out.tab:md5,fd4e89499ee467de647bd202d8894628", + "sjdbList.out.tab:md5,dad82973249f1c094e36610d2663ddec", + "transcriptInfo.tab:md5,69658043a209e2bb59c1da71b11fd5c3" + ], + "SJ.out.tab:md5,2ed3417c0ef9daa230a72e6fd3137588", + "star_align.ok:md5,d41d8cd98f00b204e9800998ecf8427e", + "test.star.sortedByCoord.out.bam.ok:md5,d41d8cd98f00b204e9800998ecf8427e", + "test.star.sortedByCoord.out.bam.bai.ok:md5,d41d8cd98f00b204e9800998ecf8427e", + "test.star.cSorted.dupsMarked.bam.fusion_junc_reads.sam:md5,7585d76d73eacfc222ce76cdb4e22d0c", + "test.star.cSorted.dupsMarked.bam.fusion_span_reads.sam:md5,c9e99bbe81bdbadf5fa56176828540ae", + "test.star.cSorted.dupsMarked.bam.read_align_counts.idx.ok:md5,d41d8cd98f00b204e9800998ecf8427e", + "test.star.cSorted.dupsMarked.bam.failed_reads_during_span_analysis:md5,a1a0daca698e0ff4141f820c9fe64360", + "test.igv.Pfam.gff3:md5,d41d8cd98f00b204e9800998ecf8427e", + "test.igv.seqsimilar.gff3:md5,d41d8cd98f00b204e9800998ecf8427e", + "test.fusion_preds.coalesced.summary.EMadj.min_frag_thresh.wSpliceInfo.post_blast_filter.info:md5,ed6214ba6c0cd135afd76a808dfdbd5d", + "test.fusion_preds.coalesced.summary.EMadj.min_frag_thresh.wSpliceInfo.post_blast_filter.post_promisc_filter.info:md5,ed6214ba6c0cd135afd76a808dfdbd5d" + ], + [ + "test.fa", + "test.gtf", + "test.fa.fai", + "cytoBand.txt", + "test.bed.sorted.bed.gz", + "test.bed.sorted.bed.gz.tbi", + "test.spanning_reads.bam.bai", + "test.igv.Pfam.bed", + "test.igv.seqsimilar.bed", + "test.fusion_inspector_web.json", + "test.ROI.bed", + "TrinityFusion.bed", + "tracks.json" + ], + [ + "test.fa", + "test.gtf", + "pipeliner.456.cmds", + "Log.progress.out", + "_STARgenome", + "SJ.out.tab", + "star_align.ok", + "test.star.sortedByCoord.out.bam.ok", + "test.star.sortedByCoord.out.bam.bai.ok", + "test.star.cSorted.dupsMarked.bam.fusion_junc_reads.sam", + "test.star.cSorted.dupsMarked.bam.fusion_span_reads.sam", + "test.star.cSorted.dupsMarked.bam.read_align_counts.idx.ok", + "test.star.cSorted.dupsMarked.bam.failed_reads_during_span_analysis", + "test.igv.Pfam.gff3", + "test.igv.seqsimilar.gff3", + "test.fusion_preds.coalesced.summary.EMadj.min_frag_thresh.wSpliceInfo.post_blast_filter.info", + "test.fusion_preds.coalesced.summary.EMadj.min_frag_thresh.wSpliceInfo.post_blast_filter.post_promisc_filter.info" + ], + [ + [ + { + "id": "test", + "single_end": false + }, + [ + "EM_adj_counts.ok:md5,d41d8cd98f00b204e9800998ecf8427e", + "add_FFPM.ok:md5,d41d8cd98f00b204e9800998ecf8427e", + "add_splice_info.ok:md5,d41d8cd98f00b204e9800998ecf8427e", + "append_microH_info.ok:md5,d41d8cd98f00b204e9800998ecf8427e", + "blast_filter.ok:md5,d41d8cd98f00b204e9800998ecf8427e", + "coalesce_junc_n_span.ok:md5,d41d8cd98f00b204e9800998ecf8427e", + "cp_consol_bam.ok:md5,d41d8cd98f00b204e9800998ecf8427e", + "cp_contigs_file_workdir:md5,d41d8cd98f00b204e9800998ecf8427e", + "cp_final.ok:md5,d41d8cd98f00b204e9800998ecf8427e", + "cp_gtf_file_workdir.ok:md5,d41d8cd98f00b204e9800998ecf8427e", + "cp_tracks_json.ok:md5,d41d8cd98f00b204e9800998ecf8427e", + "create_fi_igvjs.ok:md5,d41d8cd98f00b204e9800998ecf8427e", + "cytoband.ok:md5,d41d8cd98f00b204e9800998ecf8427e", + "filter_by_frag_threshs.ok:md5,d41d8cd98f00b204e9800998ecf8427e", + "final.abridged.ok:md5,d41d8cd98f00b204e9800998ecf8427e", + "fusion_annotator.ok:md5,d41d8cd98f00b204e9800998ecf8427e", + "fusion_coding_region_effect.ok:md5,d41d8cd98f00b204e9800998ecf8427e", + "fusion_contigs.ok:md5,d41d8cd98f00b204e9800998ecf8427e", + "fusion_reports_html.ok:md5,d41d8cd98f00b204e9800998ecf8427e", + "get_fusion_JUNCTION_reads_from_bam.ok:md5,d41d8cd98f00b204e9800998ecf8427e", + "get_fusion_SPANNING_reads_from_bam.ok:md5,d41d8cd98f00b204e9800998ecf8427e", + "index_consol_bam.ok:md5,d41d8cd98f00b204e9800998ecf8427e", + "init_EM_adj_counts.ok:md5,d41d8cd98f00b204e9800998ecf8427e", + "init_spanning_reads_bam.ok:md5,d41d8cd98f00b204e9800998ecf8427e", + "mark_dup_reads.ok:md5,d41d8cd98f00b204e9800998ecf8427e", + "mark_dups_reads.index.ok:md5,d41d8cd98f00b204e9800998ecf8427e", + "merged_contig_fai.ok:md5,d41d8cd98f00b204e9800998ecf8427e", + "merged_contig_gtf_to_bed.ok:md5,d41d8cd98f00b204e9800998ecf8427e", + "microH.dat.ok:md5,d41d8cd98f00b204e9800998ecf8427e", + "prep_igv_extract_junc_reads.ok:md5,d41d8cd98f00b204e9800998ecf8427e", + "prep_igv_junc_reads_bam.ok:md5,d41d8cd98f00b204e9800998ecf8427e", + "prep_igv_pfam_bed.ok:md5,d41d8cd98f00b204e9800998ecf8427e", + "prep_igv_pfam_gff3.ok:md5,d41d8cd98f00b204e9800998ecf8427e", + "prep_igv_seqsim_bed.ok:md5,d41d8cd98f00b204e9800998ecf8427e", + "prep_igv_seqsim_gff3.ok:md5,d41d8cd98f00b204e9800998ecf8427e", + "prep_spanning_reads.ok:md5,d41d8cd98f00b204e9800998ecf8427e", + "run_STAR.ok:md5,d41d8cd98f00b204e9800998ecf8427e", + "samtools_idx_junc_reads_bam.ok:md5,d41d8cd98f00b204e9800998ecf8427e", + "samtools_index_span_reads_bam.ok:md5,d41d8cd98f00b204e9800998ecf8427e", + "span_reads_acc.ok:md5,d41d8cd98f00b204e9800998ecf8427e", + "test.bed.bedsort.ok:md5,d41d8cd98f00b204e9800998ecf8427e", + "test.bed.bgzip.ok:md5,d41d8cd98f00b204e9800998ecf8427e", + "test.bed.tabix.ok:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ] + ], + [ + "versions.yml:md5,ad8d0da5929dfa70794bccf8765ddb23" + ] + ], + "meta": { + "nf-test": "0.9.2", + "nextflow": "24.10.3" + }, + "timestamp": "2024-12-31T14:34:37.759837761" + } +} \ No newline at end of file diff --git a/modules/local/fusioninspector/tests/nextflow.config b/modules/local/fusioninspector/tests/nextflow.config new file mode 100644 index 00000000..ca61431b --- /dev/null +++ b/modules/local/fusioninspector/tests/nextflow.config @@ -0,0 +1,5 @@ +process { + withName: 'FUSIONINSPECTOR' { + ext.args = '--annotate --examine_coding_effect' + } +} diff --git a/modules/local/fusionreport/detect/environment.yml b/modules/local/fusionreport/detect/environment.yml new file mode 100644 index 00000000..0d260fdb --- /dev/null +++ b/modules/local/fusionreport/detect/environment.yml @@ -0,0 +1,6 @@ +channels: + - conda-forge + - bioconda +dependencies: + - bioconda::fusion-report=3.1.1 + - conda-forge::openpyxl=3.1.5 diff --git a/modules/local/fusionreport/detect/main.nf b/modules/local/fusionreport/detect/main.nf new file mode 100644 index 00000000..3670eec0 --- /dev/null +++ b/modules/local/fusionreport/detect/main.nf @@ -0,0 +1,66 @@ +process FUSIONREPORT { + tag "$meta.id" + label 'process_medium' + + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://community-cr-prod.seqera.io/docker/registry/v2/blobs/sha256/d9/d9d1075dc45da6b08ec99c6e8bcc83e0ab71a674e7efdc7a36e459539793fcf9/data' : + 'community.wave.seqera.io/library/fusion-report_openpyxl:6748677442b83a9a'}" + + + input: + tuple val(meta), path(reads), path(arriba_fusions), path(starfusion_fusions), path(fusioncatcher_fusions) + tuple val(meta2), path(fusionreport_ref) + val(tools_cutoff) + + output: + tuple val(meta), path("*fusionreport.tsv") , emit: fusion_list + tuple val(meta), path("*fusionreport_filtered.tsv") , emit: fusion_list_filtered + tuple val(meta), path("*index.html") , emit: report + tuple val(meta), path("*_*.html") , optional:true, emit: html + tuple val(meta), path("*.csv") , optional:true, emit: csv + tuple val(meta), path("*.json") , optional:true, emit: json + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def args2 = task.ext.args2 ?: '' + def tools = params.arriba || params.all ? "--arriba ${arriba_fusions} " : '' + tools += params.starfusion || params.all ? "--starfusion ${starfusion_fusions} " : '' + tools += params.fusioncatcher || params.all ? "--fusioncatcher ${fusioncatcher_fusions} " : '' + def prefix = task.ext.prefix ?: "${meta.id}" + """ + fusion_report run $meta.id . $fusionreport_ref $tools --allow-multiple-gene-symbols --tool-cutoff $tools_cutoff $args $args2 + + mv fusion_list.tsv ${prefix}.fusionreport.tsv + mv fusion_list_filtered.tsv ${prefix}.fusionreport_filtered.tsv + mv index.html ${prefix}_fusionreport_index.html + [ ! -f fusions.csv ] || mv fusions.csv ${prefix}.fusions.csv + [ ! -f fusions.json ] || mv fusions.json ${prefix}.fusions.json + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + fusion_report: \$(fusion_report --version | sed 's/fusion-report //') + fusion_report DB retrieval: \$(cat $fusionreport_ref/DB-timestamp.txt) + END_VERSIONS + """ + + stub: + def prefix = task.ext.prefix ?: "${meta.id}" + """ + touch ${prefix}.fusionreport_filtered.tsv + touch ${prefix}.fusionreport.tsv + touch ${prefix}_fusionreport_index.html + touch AAA_BBB.html + touch ${prefix}.fusions.csv + touch ${prefix}.fusions.json + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + fusion_report: \$(fusion_report --version | sed 's/fusion-report //') + END_VERSIONS + """ +} diff --git a/modules/local/fusionreport/detect/meta.yml b/modules/local/fusionreport/detect/meta.yml new file mode 100644 index 00000000..ae3601dc --- /dev/null +++ b/modules/local/fusionreport/detect/meta.yml @@ -0,0 +1,51 @@ +name: fusionreport +description: fusionreport +keywords: + - sort +tools: + - fusionreport: + description: Tool for parsing outputs from fusion detection tools + homepage: https://github.com/Clinical-Genomics/fusion-report + documentation: https://matq007.github.io/fusion-report/#/ + doi: "10.1101/011650" + licence: ["GPL v3"] + +input: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - reference: + type: path + description: Path to fusionreport references + pattern: "*" + - arriba_fusions: + type: path + description: File + pattern: "*.fusions.tsv" + - starfusion_fusions: + type: path + description: File containing fusions from STARfusion + pattern: "*.starfusion.fusion_predictions.tsv" + - fusioncatcher_fusions: + type: path + description: File containing fusions from fusioncatcher + pattern: "*.fusions.tsv" + +output: + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" + - fusion_list: + type: file + description: File containing the summary of all fusions fed-in + pattern: "*.tsv" + - report: + type: file + description: HTML files + pattern: "*.html" + +authors: + - "@praveenraj2018, @rannick" diff --git a/modules/local/fusionreport/detect/tests/main.nf.test b/modules/local/fusionreport/detect/tests/main.nf.test new file mode 100644 index 00000000..9fc89540 --- /dev/null +++ b/modules/local/fusionreport/detect/tests/main.nf.test @@ -0,0 +1,116 @@ +nextflow_process { + + name "Test Process FUSIONREPORT" + script "../main.nf" + process "FUSIONREPORT" + tag "modules" + tag "modules_local" + tag "fusionreport" + + + test("FUSIONREPORT - arriba - starfusion - fusioncatcher") { + config './nextflow.config' + + setup { + run("FUSIONREPORT_DOWNLOAD") { + script "../../../fusionreport/download/main.nf" + process { + """ + """ + } + } + } + + when { + process { + """ + + input[0] = [ + [ id:'test_sample' ], // meta map + file("https://github.com/nf-core/test-datasets/raw/rnafusion/testdata/human/reads_1.fq.gz"), + file("https://github.com/Clinical-Genomics/fusion-report/raw/master/tests/test_data/arriba.tsv"), + file("https://github.com/Clinical-Genomics/fusion-report/raw/master/tests/test_data/starfusion.tsv"), + file("https://github.com/Clinical-Genomics/fusion-report/raw/master/tests/test_data/fusioncatcher.txt") + ] + + input[1] = FUSIONREPORT_DOWNLOAD.out.fusionreport_ref + input[2] = 1 + """ + } + params { + arriba = true + starfusion = true + fusioncatcher = true + no_cosmic = true + outdir = "$outputDir" + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot( + process.out.fusion_list, + process.out.fusion_list_filtered, + process.out.csv, + file(process.out.report[0][1]).name, + process.out.html[0][1].collect { file(it).name }, + process.out.json, + process.out.versions.flatten().first().toString().split('/')[-1] // md5sum not stable as versions contains DB timestamp + ).match() } + ) + } + + } + + test("FUSIONREPORT - arriba - starfusion - fusioncatcher - stub") { + config './nextflow.config' + + setup { + run("FUSIONREPORT_DOWNLOAD") { + script "../../../fusionreport/download/main.nf" + process { + """ + """ + } + } + } + + options "-stub" + + when { + process { + """ + + input[0] = [ + [ id:'test_sample' ], // meta map + file("https://github.com/nf-core/test-datasets/raw/rnafusion/testdata/human/reads_1.fq.gz"), + file("https://github.com/Clinical-Genomics/fusion-report/raw/master/tests/test_data/arriba.tsv"), + file("https://github.com/Clinical-Genomics/fusion-report/raw/master/tests/test_data/starfusion.tsv"), + file("https://github.com/Clinical-Genomics/fusion-report/raw/master/tests/test_data/fusioncatcher.txt") + ] + + input[1] = FUSIONREPORT_DOWNLOAD.out.fusionreport_ref + + input[2] = 1 + """ + } + params { + arriba = true + starfusion = true + fusioncatcher = true + no_cosmic = true + outdir = "$outputDir" + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + } + + +} diff --git a/modules/local/fusionreport/detect/tests/main.nf.test.snap b/modules/local/fusionreport/detect/tests/main.nf.test.snap new file mode 100644 index 00000000..c20e3ec4 --- /dev/null +++ b/modules/local/fusionreport/detect/tests/main.nf.test.snap @@ -0,0 +1,186 @@ +{ + "FUSIONREPORT - arriba - starfusion - fusioncatcher": { + "content": [ + [ + [ + { + "id": "test_sample" + }, + "test_sample.fusionreport.tsv:md5,3593b7021f26cc5427fdc96f0d1c72f0" + ] + ], + [ + [ + { + "id": "test_sample" + }, + "test_sample.fusionreport_filtered.tsv:md5,3593b7021f26cc5427fdc96f0d1c72f0" + ] + ], + [ + [ + { + "id": "test_sample" + }, + "test_sample.fusions.csv:md5,49f378c2112d7e0b3b17d9095c79e6bd" + ] + ], + "test_sample_fusionreport_index.html", + [ + "AKAP9_BRAF.html", + "BRD4-1_NUTM1.html", + "BRD4_NUTM1.html", + "CD74_AL132671.2.html", + "CD74_ROS1.html", + "CIC_DUX4.html", + "DUX4_IGH@.html", + "EML4_ALK.html", + "ETV6_NTRK3.html", + "EWSR1_ATF1.html", + "EWSR1_FLI1.html", + "FGFR3_TACC3.html", + "FIP1L1_PDGFRA.html", + "GOPC_ROS1.html", + "HOOK3_RET.html", + "IGH@_CRLF2.html", + "MALT1_IGH@.html", + "NPM1_ALK.html", + "TMPRSS2_ETV1.html", + "test_sample_fusionreport_index.html" + ], + [ + [ + { + "id": "test_sample" + }, + "fusion_genes_mqc.json:md5,b196dc8d064a47d17fd3a032b8fbed4f" + ] + ], + "versions.yml" + ], + "meta": { + "nf-test": "0.9.0", + "nextflow": "24.10.2" + }, + "timestamp": "2024-12-06T14:23:44.59690452" + }, + "FUSIONREPORT - arriba - starfusion - fusioncatcher - stub": { + "content": [ + { + "0": [ + "versions.yml:md5,6bd28f2526774f519a7627a30c6a7f2f" + ], + "1": [ + [ + { + "id": "test_sample" + }, + "test_sample.fusionreport.tsv:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "2": [ + [ + { + "id": "test_sample" + }, + "test_sample.fusionreport_filtered.tsv:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "3": [ + [ + { + "id": "test_sample" + }, + "test_sample_fusionreport_index.html:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "4": [ + [ + { + "id": "test_sample" + }, + [ + "AAA_BBB.html:md5,d41d8cd98f00b204e9800998ecf8427e", + "test_sample_fusionreport_index.html:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ] + ], + "5": [ + [ + { + "id": "test_sample" + }, + "test_sample.fusions.csv:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "6": [ + [ + { + "id": "test_sample" + }, + "test_sample.fusions.json:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "csv": [ + [ + { + "id": "test_sample" + }, + "test_sample.fusions.csv:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "fusion_list": [ + [ + { + "id": "test_sample" + }, + "test_sample.fusionreport.tsv:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "fusion_list_filtered": [ + [ + { + "id": "test_sample" + }, + "test_sample.fusionreport_filtered.tsv:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "html": [ + [ + { + "id": "test_sample" + }, + [ + "AAA_BBB.html:md5,d41d8cd98f00b204e9800998ecf8427e", + "test_sample_fusionreport_index.html:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ] + ], + "json": [ + [ + { + "id": "test_sample" + }, + "test_sample.fusions.json:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "report": [ + [ + { + "id": "test_sample" + }, + "test_sample_fusionreport_index.html:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "versions": [ + "versions.yml:md5,6bd28f2526774f519a7627a30c6a7f2f" + ] + } + ], + "meta": { + "nf-test": "0.9.0", + "nextflow": "24.10.2" + }, + "timestamp": "2024-12-10T15:13:58.414161" + } +} \ No newline at end of file diff --git a/modules/local/fusionreport/detect/tests/nextflow.config b/modules/local/fusionreport/detect/tests/nextflow.config new file mode 100644 index 00000000..a1c32707 --- /dev/null +++ b/modules/local/fusionreport/detect/tests/nextflow.config @@ -0,0 +1,10 @@ +process { + withName: 'FUSIONREPORT_DOWNLOAD' { + ext.args = "--no-cosmic" + } + + withName: 'FUSIONREPORT' { + ext.args = { {params.no_cosmic} ? "--no-cosmic" : "" } + ext.args2 = "--export csv" + } +} diff --git a/modules/local/fusionreport/download/environment.yml b/modules/local/fusionreport/download/environment.yml new file mode 100644 index 00000000..0d260fdb --- /dev/null +++ b/modules/local/fusionreport/download/environment.yml @@ -0,0 +1,6 @@ +channels: + - conda-forge + - bioconda +dependencies: + - bioconda::fusion-report=3.1.1 + - conda-forge::openpyxl=3.1.5 diff --git a/modules/local/fusionreport/download/main.nf b/modules/local/fusionreport/download/main.nf new file mode 100644 index 00000000..ec62c75e --- /dev/null +++ b/modules/local/fusionreport/download/main.nf @@ -0,0 +1,44 @@ +process FUSIONREPORT_DOWNLOAD { + tag 'fusionreport' + label 'process_medium' + + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://community-cr-prod.seqera.io/docker/registry/v2/blobs/sha256/d9/d9d1075dc45da6b08ec99c6e8bcc83e0ab71a674e7efdc7a36e459539793fcf9/data' : + 'community.wave.seqera.io/library/fusion-report_openpyxl:6748677442b83a9a'}" + + output: + tuple val(meta), path("fusion_report_db"), emit: fusionreport_ref + path "versions.yml" , emit: versions + + script: + meta = [id: 'fusion_report_db'] + def args = task.ext.args ?: '' + def args2 = task.ext.args2 ?: '' + """ + fusion_report download $args ./ + mkdir fusion_report_db + mv *.txt *.log *.db fusion_report_db/ + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + fusion_report: \$(fusion_report --version | sed 's/fusion-report //') + END_VERSIONS + """ + + stub: + meta = [id: 'fusion_report_db'] + """ + mkdir fusion_report_db + touch fusion_report_db/cosmic.db + touch fusion_report_db/fusiongdb2.db + touch fusion_report_db/mitelman.db + touch fusion_report_db/DB-timestamp.txt + touch fusion_report_db/fusion_report.log + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + fusion_report: \$(fusion_report --version | sed 's/fusion-report //') + END_VERSIONS + """ +} diff --git a/modules/local/fusionreport/download/meta.yml b/modules/local/fusionreport/download/meta.yml new file mode 100644 index 00000000..21a15a89 --- /dev/null +++ b/modules/local/fusionreport/download/meta.yml @@ -0,0 +1,35 @@ +name: fusionreport_download +description: Build DB for fusionreport +keywords: + - sort +tools: + - fusioncatcher: + description: Build DB for fusionreport + homepage: https://github.com/ndaniel/fusioncatcher/ + documentation: https://github.com/ndaniel/fusioncatcher/blob/master/doc/manual.md + tool_dev_url: https://github.com/ndaniel/fusioncatcher/ + doi: "10.1101/011650" + licence: ["GPL v3"] + +input: + - username: + type: value + description: Organism for which the data is downloaded from Ensembl database and built + pattern: "*" + - passwd: + type: value + description: Organism for which the data is downloaded from Ensembl database and built + pattern: "*" + +output: + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" + - reference: + type: directory + description: directory containing the genome resource files required for fusioncatcher + pattern: "fusioncatcher-genome" + +authors: + - "@praveenraj2018" diff --git a/modules/local/fusionreport/download/tests/main.nf.test b/modules/local/fusionreport/download/tests/main.nf.test new file mode 100644 index 00000000..35af2a52 --- /dev/null +++ b/modules/local/fusionreport/download/tests/main.nf.test @@ -0,0 +1,51 @@ +nextflow_process { + + name "Test Process FUSIONREPORT_DOWNLOAD" + script "../main.nf" + process "FUSIONREPORT_DOWNLOAD" + + test("Download fusionreport databases") { + config './nextflow.config' + + when { + process { + """ + // This process doesn't have any inputs + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot( + path(process.out.fusionreport_ref[0][1]).resolve("fusiongdb2.db"), + path(process.out.fusionreport_ref[0][1]).resolve("mitelman.db"), + path(process.out.fusionreport_ref[0][1]).resolve("DB-timestamp.txt").exists(), + path(process.out.fusionreport_ref[0][1]).resolve("fusion_report.log").exists(), + process.out.versions + ).match() } + ) + } + } + + test("Create stub files") { + + options "-stub" + + when { + process { + """ + // This process doesn't have any inputs + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + } +} diff --git a/modules/local/fusionreport/download/tests/main.nf.test.snap b/modules/local/fusionreport/download/tests/main.nf.test.snap new file mode 100644 index 00000000..722d8c68 --- /dev/null +++ b/modules/local/fusionreport/download/tests/main.nf.test.snap @@ -0,0 +1,63 @@ +{ + "Download fusionreport databases": { + "content": [ + "fusiongdb2.db:md5,e1ac123a744e515d3e5f85b8344d526a", + "mitelman.db:md5,1363795c97f77c641065ecd9ad0e484a", + true, + true, + [ + "versions.yml:md5,fa5f13c563f431912048c1802b5a0c74" + ] + ], + "meta": { + "nf-test": "0.9.0", + "nextflow": "24.10.2" + }, + "timestamp": "2024-12-05T19:27:38.99855171" + }, + "Create stub files": { + "content": [ + { + "0": [ + [ + { + "id": "fusion_report_db" + }, + [ + "DB-timestamp.txt:md5,d41d8cd98f00b204e9800998ecf8427e", + "cosmic.db:md5,d41d8cd98f00b204e9800998ecf8427e", + "fusion_report.log:md5,d41d8cd98f00b204e9800998ecf8427e", + "fusiongdb2.db:md5,d41d8cd98f00b204e9800998ecf8427e", + "mitelman.db:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ] + ], + "1": [ + "versions.yml:md5,fa5f13c563f431912048c1802b5a0c74" + ], + "fusionreport_ref": [ + [ + { + "id": "fusion_report_db" + }, + [ + "DB-timestamp.txt:md5,d41d8cd98f00b204e9800998ecf8427e", + "cosmic.db:md5,d41d8cd98f00b204e9800998ecf8427e", + "fusion_report.log:md5,d41d8cd98f00b204e9800998ecf8427e", + "fusiongdb2.db:md5,d41d8cd98f00b204e9800998ecf8427e", + "mitelman.db:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ] + ], + "versions": [ + "versions.yml:md5,fa5f13c563f431912048c1802b5a0c74" + ] + } + ], + "meta": { + "nf-test": "0.9.0", + "nextflow": "24.10.2" + }, + "timestamp": "2024-12-10T15:05:22.781845" + } +} \ No newline at end of file diff --git a/modules/local/fusionreport/download/tests/nextflow.config b/modules/local/fusionreport/download/tests/nextflow.config new file mode 100644 index 00000000..7c1ffb50 --- /dev/null +++ b/modules/local/fusionreport/download/tests/nextflow.config @@ -0,0 +1,5 @@ +process { + withName: 'FUSIONREPORT_DOWNLOAD' { + ext.args = "--no-cosmic" + } +} diff --git a/modules/local/gencode_download/main.nf b/modules/local/gencode_download/main.nf new file mode 100644 index 00000000..1f466d26 --- /dev/null +++ b/modules/local/gencode_download/main.nf @@ -0,0 +1,49 @@ +process GENCODE_DOWNLOAD { + tag "gencode_download" + label 'process_low' + + conda "bioconda::gnu-wget=1.18" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/gnu-wget:1.18--h5bf99c6_5' : + 'quay.io/biocontainers/gnu-wget:1.18--h5bf99c6_5' }" + + input: + val genome_gencode_version + val genome + + output: + path "*.fa" , emit: fasta + path "*.gtf" , emit: gtf + path "versions.yml", emit: versions + + + when: + task.ext.when == null || task.ext.when + + script: + def folder_gencode = genome.contains("38") ? "" : "/${genome}_mapping" + def gtf_file_name = genome.contains("38") ? "gencode.v${genome_gencode_version}.primary_assembly.annotation.gtf.gz" : "gencode.v${genome_gencode_version}lift${genome_gencode_version}.annotation.gtf.gz" + """ + wget ftp://ftp.ebi.ac.uk/pub/databases/gencode/Gencode_human/release_${genome_gencode_version}/${folder_gencode}${genome}.primary_assembly.genome.fa.gz -O Homo_sapiens_${genome}_${genome_gencode_version}_dna_primary_assembly.fa.gz + gunzip Homo_sapiens_${genome}_${genome_gencode_version}_dna_primary_assembly.fa.gz + wget ftp://ftp.ebi.ac.uk/pub/databases/gencode/Gencode_human/release_${genome_gencode_version}/${folder_gencode}${gtf_file_name} -O Homo_sapiens_${genome}_${genome_gencode_version}.gtf.gz + gunzip Homo_sapiens_${genome}_${genome_gencode_version}.gtf.gz + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + wget: \$(echo wget -V 2>&1 | grep "GNU Wget" | cut -d" " -f3) + END_VERSIONS + """ + + stub: + """ + touch Homo_sapiens.${genome}.${genome_gencode_version}_dna_primary_assembly.fa + touch Homo_sapiens.${genome}.${genome_gencode_version}.gtf + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + wget: \$(echo wget -V 2>&1 | grep "GNU Wget" | cut -d" " -f3) + END_VERSIONS + """ + +} diff --git a/modules/local/get_rrna_transcript/environment.yml b/modules/local/get_rrna_transcript/environment.yml new file mode 100644 index 00000000..66b65c3a --- /dev/null +++ b/modules/local/get_rrna_transcript/environment.yml @@ -0,0 +1,6 @@ +channels: + - conda-forge + - bioconda +dependencies: + - bioconda::pirate=1.0.5 + - bioconda::perl-bioperl=1.7.8 diff --git a/modules/local/get_rrna_transcript/main.nf b/modules/local/get_rrna_transcript/main.nf new file mode 100644 index 00000000..901c17b4 --- /dev/null +++ b/modules/local/get_rrna_transcript/main.nf @@ -0,0 +1,43 @@ +process GET_RRNA_TRANSCRIPTS { + tag 'get_rrna_bed' + label 'process_low' + + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/pirate:1.0.5--hdfd78af_0' : + 'biocontainers/pirate:1.0.5--hdfd78af_0' }" + + input: + tuple val(meta), path(gtf) + + output: + tuple val(meta), path('rrna.gtf') , emit: rrnagtf + tuple val(meta), path('rrna.bed') , emit: bed + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + """ + $baseDir/bin/get_rrna_transcripts.py ${gtf} rrna.gtf + + $baseDir/bin/gtf2bed rrna.gtf > rrna.bed + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + get_rrna_transcripts: v1.0 + END_VERSIONS + """ + + stub: + """ + touch rrna.gtf + touch rrna.bed + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + get_rrna_transcripts: v1.0 + END_VERSIONS + """ +} diff --git a/modules/local/hgnc/main.nf b/modules/local/hgnc/main.nf new file mode 100644 index 00000000..aa5c077c --- /dev/null +++ b/modules/local/hgnc/main.nf @@ -0,0 +1,38 @@ +process HGNC_DOWNLOAD { + tag "hgnc" + label 'process_low' + + conda "bioconda::gnu-wget=1.18" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/gnu-wget:1.18--h5bf99c6_5' : + 'biocontainers/gnu-wget:1.18--h5bf99c6_5' }" + + output: + path "hgnc_complete_set.txt" , emit: hgnc_ref + path "HGNC-DB-timestamp.txt" , emit: hgnc_date + path "versions.yml" , emit: versions + + + script: + """ + wget https://storage.googleapis.com/public-download-files/hgnc/tsv/tsv/hgnc_complete_set.txt + date +%Y-%m-%d/%H:%M > HGNC-DB-timestamp.txt + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + wget: \$(echo wget -V 2>&1 | grep "GNU Wget" | cut -d" " -f3) + END_VERSIONS + """ + + stub: + """ + touch "hgnc_complete_set.txt" + touch "HGNC-DB-timestamp.txt" + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + wget: \$(echo wget -V 2>&1 | grep "GNU Wget" | cut -d" " -f3) + END_VERSIONS + """ + +} diff --git a/modules/local/hgnc/tests/main.nf.test b/modules/local/hgnc/tests/main.nf.test new file mode 100644 index 00000000..cecc4302 --- /dev/null +++ b/modules/local/hgnc/tests/main.nf.test @@ -0,0 +1,52 @@ +nextflow_process { + + name "Test Process HGNC_DOWNLOAD" + script "../main.nf" + process "HGNC_DOWNLOAD" + + test("Should download HGNC files") { + + when { + process { + """ + // This process doesn't have any inputs + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot( + process.out.hgnc_ref, + file(process.out.hgnc_date[0]).name, + process.out.versions + ).match() } + ) + } + } + + test("Should create stub files") { + + options "-stub" + + when { + process { + """ + // This process doesn't have any inputs + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot( + process.out.hgnc_ref, + file(process.out.hgnc_date[0]).name, + process.out.versions + ).match() } + ) + } + } +} diff --git a/modules/local/hgnc/tests/main.nf.test.snap b/modules/local/hgnc/tests/main.nf.test.snap new file mode 100644 index 00000000..8512dcf6 --- /dev/null +++ b/modules/local/hgnc/tests/main.nf.test.snap @@ -0,0 +1,34 @@ +{ + "Should create stub files": { + "content": [ + [ + "hgnc_complete_set.txt:md5,d41d8cd98f00b204e9800998ecf8427e" + ], + "HGNC-DB-timestamp.txt", + [ + "versions.yml:md5,a7b7ccbd9eda8036baf548cdf1cb6867" + ] + ], + "meta": { + "nf-test": "0.9.0", + "nextflow": "24.10.1" + }, + "timestamp": "2024-11-26T17:42:04.47884487" + }, + "Should download HGNC files": { + "content": [ + [ + "hgnc_complete_set.txt:md5,29571d88d1648e8764b70791df6a5d2d" + ], + "HGNC-DB-timestamp.txt", + [ + "versions.yml:md5,a7b7ccbd9eda8036baf548cdf1cb6867" + ] + ], + "meta": { + "nf-test": "0.9.0", + "nextflow": "24.10.1" + }, + "timestamp": "2024-11-26T17:41:49.965512273" + } +} \ No newline at end of file diff --git a/modules/local/starfusion/build/environment.yml b/modules/local/starfusion/build/environment.yml new file mode 100644 index 00000000..ef7f9316 --- /dev/null +++ b/modules/local/starfusion/build/environment.yml @@ -0,0 +1,8 @@ +channels: + - conda-forge + - bioconda +dependencies: + - bioconda::dfam=3.7 + - bioconda::hmmer=3.4 + - bioconda::minimap2=2.28 + - bioconda::star-fusion=1.14.0 diff --git a/modules/local/starfusion/build/main.nf b/modules/local/starfusion/build/main.nf new file mode 100644 index 00000000..fcd9cf80 --- /dev/null +++ b/modules/local/starfusion/build/main.nf @@ -0,0 +1,129 @@ +process STARFUSION_BUILD { + tag "$meta.id" + label 'process_high' + + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://community-cr-prod.seqera.io/docker/registry/v2/blobs/sha256/be/bed86145102fdf7e381e1a506a4723676f98b4bbe1db5085d02213cef18525c9/data' : + 'community.wave.seqera.io/library/dfam_hmmer_minimap2_star-fusion:aa3a8e3951498552'}" + + input: + tuple val(meta), path(fasta) + tuple val(meta2), path(gtf) + path fusion_annot_lib + val dfam_species + + output: + tuple val(meta), path("ctat_genome_lib_build_dir"), emit: reference + path "versions.yml" , emit: versions + + script: + def args = task.ext.args ?: '' + """ + prep_genome_lib.pl \\ + --genome_fa $fasta \\ + --gtf $gtf \\ + --dfam_db ${dfam_species} \\ + --pfam_db current \\ + --fusion_annot_lib $fusion_annot_lib \\ + --CPU $task.cpus \\ + ${args} + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + STAR-Fusion: \$(STAR-Fusion --version 2>&1 | grep -i 'version' | sed 's/STAR-Fusion version: //') + END_VERSIONS + """ + + stub: + """ + mkdir -p ctat_genome_lib_build_dir + + touch ctat_genome_lib_build_dir/AnnotFilterRule.pm + gzip -c /dev/null > ctat_genome_lib_build_dir/blast_pairs.dat.gz + touch ctat_genome_lib_build_dir/blast_pairs.idx + + mkdir -p ctat_genome_lib_build_dir/__chkpts + touch ctat_genome_lib_build_dir/__chkpts/annotfiltrule_cp.ok + touch ctat_genome_lib_build_dir/__chkpts/blast_pairs.idx.ok + touch ctat_genome_lib_build_dir/__chkpts/cp_gene_blast_pairs.ok + touch ctat_genome_lib_build_dir/__chkpts/cp_pfam_dat.ok + touch ctat_genome_lib_build_dir/__chkpts/cp_ref_annot_cdna.ok + touch ctat_genome_lib_build_dir/__chkpts/fusion_annot_lib.cp.ok + touch ctat_genome_lib_build_dir/__chkpts/_fusion_annot_lib.idx.ok + touch ctat_genome_lib_build_dir/__chkpts/index_pfam_hits.ok + touch ctat_genome_lib_build_dir/__chkpts/index_ref_annot_cdna.ok + touch ctat_genome_lib_build_dir/__chkpts/makeblastdb.ok + touch ctat_genome_lib_build_dir/__chkpts/mm2_genome_idx.ok + touch ctat_genome_lib_build_dir/__chkpts/mm2.splice_bed.ok + touch ctat_genome_lib_build_dir/__chkpts/_prot_info_db.ok + touch ctat_genome_lib_build_dir/__chkpts/ref_annot.cdsplus.dfam_masked.fa.cp.ok + touch ctat_genome_lib_build_dir/__chkpts/ref_annot.cdsplus.dfam_masked.fa.idx.ok + touch ctat_genome_lib_build_dir/__chkpts/ref_annot.gtf.gene_spans.ok + touch ctat_genome_lib_build_dir/__chkpts/ref_annot.gtf.mini.sortu.ok + touch ctat_genome_lib_build_dir/__chkpts/ref_annot.gtf.ok + touch ctat_genome_lib_build_dir/__chkpts/ref_genome_fai.ok + touch ctat_genome_lib_build_dir/__chkpts/ref_genome.fa.ok + touch ctat_genome_lib_build_dir/__chkpts/trans.blast.dat.cp.ok + touch ctat_genome_lib_build_dir/__chkpts/trans.blast.dat.index.ok + touch ctat_genome_lib_build_dir/__chkpts/validate_ctat_genome_lib.ok + + gzip -c /dev/null > ctat_genome_lib_build_dir/fusion_annot_lib.gz + touch ctat_genome_lib_build_dir/fusion_annot_lib.idx + touch ctat_genome_lib_build_dir/pfam_domains.dbm + gzip -c /dev/null > ctat_genome_lib_build_dir/PFAM.domtblout.dat.gz + + touch ctat_genome_lib_build_dir/ref_annot.cdna.fa + touch ctat_genome_lib_build_dir/ref_annot.cdna.fa.idx + touch ctat_genome_lib_build_dir/ref_annot.cds + touch ctat_genome_lib_build_dir/ref_annot.cdsplus.fa + touch ctat_genome_lib_build_dir/ref_annot.cdsplus.fa.idx + touch ctat_genome_lib_build_dir/ref_annot.gtf + touch ctat_genome_lib_build_dir/ref_annot.gtf.gene_spans + touch ctat_genome_lib_build_dir/ref_annot.gtf.mini.sortu + touch ctat_genome_lib_build_dir/ref_annot.gtf.mm2.splice.bed + touch ctat_genome_lib_build_dir/ref_annot.pep + touch ctat_genome_lib_build_dir/ref_annot.prot_info.dbm + + touch ctat_genome_lib_build_dir/ref_genome.fa + touch ctat_genome_lib_build_dir/ref_genome.fa.fai + touch ctat_genome_lib_build_dir/ref_genome.fa.mm2 + touch ctat_genome_lib_build_dir/ref_genome.fa.ndb + touch ctat_genome_lib_build_dir/ref_genome.fa.nhr + touch ctat_genome_lib_build_dir/ref_genome.fa.nin + touch ctat_genome_lib_build_dir/ref_genome.fa.njs + touch ctat_genome_lib_build_dir/ref_genome.fa.not + touch ctat_genome_lib_build_dir/ref_genome.fa.nsq + touch ctat_genome_lib_build_dir/ref_genome.fa.ntf + touch ctat_genome_lib_build_dir/ref_genome.fa.nto + + mkdir -p ctat_genome_lib_build_dir/ref_genome.fa.star.idx + touch ctat_genome_lib_build_dir/ref_genome.fa.star.idx/build.ok + touch ctat_genome_lib_build_dir/ref_genome.fa.star.idx/chrLength.txt + touch ctat_genome_lib_build_dir/ref_genome.fa.star.idx/chrNameLength.txt + touch ctat_genome_lib_build_dir/ref_genome.fa.star.idx/chrName.txt + touch ctat_genome_lib_build_dir/ref_genome.fa.star.idx/chrStart.txt + touch ctat_genome_lib_build_dir/ref_genome.fa.star.idx/exonGeTrInfo.tab + touch ctat_genome_lib_build_dir/ref_genome.fa.star.idx/exonInfo.tab + touch ctat_genome_lib_build_dir/ref_genome.fa.star.idx/geneInfo.tab + touch ctat_genome_lib_build_dir/ref_genome.fa.star.idx/Genome + touch ctat_genome_lib_build_dir/ref_genome.fa.star.idx/genomeParameters.txt + touch ctat_genome_lib_build_dir/ref_genome.fa.star.idx/Log.out + touch ctat_genome_lib_build_dir/ref_genome.fa.star.idx/SA + touch ctat_genome_lib_build_dir/ref_genome.fa.star.idx/SAindex + touch ctat_genome_lib_build_dir/ref_genome.fa.star.idx/sjdbInfo.txt + touch ctat_genome_lib_build_dir/ref_genome.fa.star.idx/sjdbList.fromGTF.out.tab + touch ctat_genome_lib_build_dir/ref_genome.fa.star.idx/sjdbList.out.tab + touch ctat_genome_lib_build_dir/ref_genome.fa.star.idx/transcriptInfo.tab + + touch ctat_genome_lib_build_dir/trans.blast.align_coords.align_coords.dat + touch ctat_genome_lib_build_dir/trans.blast.align_coords.align_coords.dbm + gzip -c /dev/null > ctat_genome_lib_build_dir/trans.blast.dat.gz + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + STAR-Fusion: \$(STAR-Fusion --version 2>&1 | grep -i 'version' | sed 's/STAR-Fusion version: //') + END_VERSIONS + """ + +} diff --git a/modules/local/starfusion/build/meta.yml b/modules/local/starfusion/build/meta.yml new file mode 100644 index 00000000..d7f65a0b --- /dev/null +++ b/modules/local/starfusion/build/meta.yml @@ -0,0 +1,38 @@ +name: starfusion_build +description: Download STAR-fusion genome resource required to run STAR-Fusion caller +keywords: + - download +tools: + - star-fusion: + description: Fusion calling algorithm for RNAseq data + homepage: https://github.com/STAR-Fusion/ + documentation: https://github.com/STAR-Fusion/STAR-Fusion/wiki/installing-star-fusion + tool_dev_url: https://github.com/STAR-Fusion/STAR-Fusion + doi: "10.1186/s13059-019-1842-9" + licence: ["GPL v3"] + +input: + - fasta: + type: file + description: genome fasta file + pattern: "*.{fasta}" + - gtf: + type: file + description: genome gtf file + pattern: "*.{gtf}" + - fusion_annot_lib: + type: file + description: Fusion annotation library (key/val pairs, tab-delimited). + pattern: "*.dat.gz" + - dfam_species: + type: string + description: DNA transposable element database (Dfam.hmm), required for repeat masking. Only 'human' or 'mouse' are accepted (will automatically pull the resources from dfam). + +output: + - reference: + type: directory + description: Reference dir + pattern: "ctat_genome_lib_build_dir" + +authors: + - "@praveenraj2018" diff --git a/modules/local/starfusion/build/tests/main.nf.test b/modules/local/starfusion/build/tests/main.nf.test new file mode 100644 index 00000000..8eb1b6f6 --- /dev/null +++ b/modules/local/starfusion/build/tests/main.nf.test @@ -0,0 +1,138 @@ +nextflow_process { + + name "Test Process STARFUSION_BUILD" + script "../main.nf" + process "STARFUSION_BUILD" + + test("STARFUSION_BUILD - human - minigenome") { + + when { + process { + """ + input[0] = [ + [ id:'minigenome fasta' ], + file("https://github.com/STAR-Fusion/STAR-Fusion-Tutorial/raw/master/minigenome.fa") + ] + input[1] = [ + [ id:'minigenome gtf' ], + file("https://github.com/STAR-Fusion/STAR-Fusion-Tutorial/raw/master/minigenome.gtf") + ] + + input [2] = file("https://github.com/STAR-Fusion/STAR-Fusion-Tutorial/raw/master/CTAT_HumanFusionLib.mini.dat.gz") + input [3] = "human" + + """ + } + } + + then { + assert snapshot( + path(process.out.reference[0][1]).resolve("AnnotFilterRule.pm"), + path(process.out.reference[0][1]).resolve("blast_pairs.dat.gz").exists(), + path(process.out.reference[0][1]).resolve("blast_pairs.idx").exists(), + path(process.out.reference[0][1]).resolve("__chkpts/annotfiltrule_cp.ok"), + path(process.out.reference[0][1]).resolve("__chkpts/blast_pairs.idx.ok"), + path(process.out.reference[0][1]).resolve("__chkpts/cp_gene_blast_pairs.ok"), + path(process.out.reference[0][1]).resolve("__chkpts/cp_pfam_dat.ok"), + path(process.out.reference[0][1]).resolve("__chkpts/cp_ref_annot_cdna.ok"), + path(process.out.reference[0][1]).resolve("__chkpts/fusion_annot_lib.cp.ok"), + path(process.out.reference[0][1]).resolve("__chkpts/_fusion_annot_lib.idx.ok"), + path(process.out.reference[0][1]).resolve("__chkpts/index_pfam_hits.ok"), + path(process.out.reference[0][1]).resolve("__chkpts/index_ref_annot_cdna.ok"), + path(process.out.reference[0][1]).resolve("__chkpts/makeblastdb.ok"), + path(process.out.reference[0][1]).resolve("__chkpts/mm2_genome_idx.ok"), + path(process.out.reference[0][1]).resolve("__chkpts/mm2.splice_bed.ok"), + path(process.out.reference[0][1]).resolve("__chkpts/_prot_info_db.ok"), + path(process.out.reference[0][1]).resolve("__chkpts/ref_annot.cdsplus.dfam_masked.fa.cp.ok"), + path(process.out.reference[0][1]).resolve("__chkpts/ref_annot.cdsplus.dfam_masked.fa.idx.ok"), + path(process.out.reference[0][1]).resolve("__chkpts/ref_annot.gtf.gene_spans.ok"), + path(process.out.reference[0][1]).resolve("__chkpts/ref_annot.gtf.mini.sortu.ok"), + path(process.out.reference[0][1]).resolve("__chkpts/ref_annot.gtf.ok"), + path(process.out.reference[0][1]).resolve("__chkpts/ref_genome_fai.ok"), + path(process.out.reference[0][1]).resolve("__chkpts/ref_genome.fa.ok"), + path(process.out.reference[0][1]).resolve("__chkpts/trans.blast.dat.cp.ok"), + path(process.out.reference[0][1]).resolve("__chkpts/trans.blast.dat.index.ok"), + path(process.out.reference[0][1]).resolve("__chkpts/validate_ctat_genome_lib.ok"), + path(process.out.reference[0][1]).resolve("fusion_annot_lib.gz"), + path(process.out.reference[0][1]).resolve("fusion_annot_lib.idx").exists(), + path(process.out.reference[0][1]).resolve("pfam_domains.dbm").exists(), + path(process.out.reference[0][1]).resolve("PFAM.domtblout.dat.gz").exists(), + path(process.out.reference[0][1]).resolve("ref_annot.cdna.fa").exists(), + path(process.out.reference[0][1]).resolve("ref_annot.cdna.fa.idx").exists(), + path(process.out.reference[0][1]).resolve("ref_annot.cds").exists(), + path(process.out.reference[0][1]).resolve("ref_annot.cdsplus.fa").exists(), + path(process.out.reference[0][1]).resolve("ref_annot.cdsplus.fa.idx").exists(), + path(process.out.reference[0][1]).resolve("ref_annot.gtf"), + path(process.out.reference[0][1]).resolve("ref_annot.gtf.gene_spans").exists(), + path(process.out.reference[0][1]).resolve("ref_annot.gtf.mini.sortu"), + path(process.out.reference[0][1]).resolve("ref_annot.gtf.mm2.splice.bed"), + path(process.out.reference[0][1]).resolve("ref_annot.pep").exists(), + path(process.out.reference[0][1]).resolve("ref_annot.prot_info.dbm").exists(), + path(process.out.reference[0][1]).resolve("ref_genome.fa"), + path(process.out.reference[0][1]).resolve("ref_genome.fa.fai"), + path(process.out.reference[0][1]).resolve("ref_genome.fa.mm2"), + path(process.out.reference[0][1]).resolve("ref_genome.fa.ndb"), + path(process.out.reference[0][1]).resolve("ref_genome.fa.nhr"), + path(process.out.reference[0][1]).resolve("ref_genome.fa.nin").exists(), + path(process.out.reference[0][1]).resolve("ref_genome.fa.njs").exists(), + path(process.out.reference[0][1]).resolve("ref_genome.fa.not"), + path(process.out.reference[0][1]).resolve("ref_genome.fa.nsq"), + path(process.out.reference[0][1]).resolve("ref_genome.fa.ntf"), + path(process.out.reference[0][1]).resolve("ref_genome.fa.nto"), + path(process.out.reference[0][1]).resolve("ref_genome.fa.star.idx").exists(), + path(process.out.reference[0][1]).resolve("ref_genome.fa.star.idx/build.ok"), + path(process.out.reference[0][1]).resolve("ref_genome.fa.star.idx/chrLength.txt"), + path(process.out.reference[0][1]).resolve("ref_genome.fa.star.idx/chrNameLength.txt"), + path(process.out.reference[0][1]).resolve("ref_genome.fa.star.idx/chrName.txt"), + path(process.out.reference[0][1]).resolve("ref_genome.fa.star.idx/chrStart.txt"), + path(process.out.reference[0][1]).resolve("ref_genome.fa.star.idx/exonGeTrInfo.tab"), + path(process.out.reference[0][1]).resolve("ref_genome.fa.star.idx/exonInfo.tab"), + path(process.out.reference[0][1]).resolve("ref_genome.fa.star.idx/geneInfo.tab"), + path(process.out.reference[0][1]).resolve("ref_genome.fa.star.idx/Genome"), + path(process.out.reference[0][1]).resolve("ref_genome.fa.star.idx/genomeParameters.txt").exists(), + path(process.out.reference[0][1]).resolve("ref_genome.fa.star.idx/Log.out").exists(), + path(process.out.reference[0][1]).resolve("ref_genome.fa.star.idx/SA"), + path(process.out.reference[0][1]).resolve("ref_genome.fa.star.idx/SAindex"), + path(process.out.reference[0][1]).resolve("ref_genome.fa.star.idx/sjdbInfo.txt"), + path(process.out.reference[0][1]).resolve("ref_genome.fa.star.idx/sjdbList.fromGTF.out.tab"), + path(process.out.reference[0][1]).resolve("ref_genome.fa.star.idx/sjdbList.out.tab"), + path(process.out.reference[0][1]).resolve("ref_genome.fa.star.idx/transcriptInfo.tab"), + path(process.out.reference[0][1]).resolve("trans.blast.align_coords.align_coords.dat"), + path(process.out.reference[0][1]).resolve("trans.blast.align_coords.align_coords.dbm").exists(), + path(process.out.reference[0][1]).resolve("trans.blast.dat.gz"), + process.out.versions + ).match() + } + + } + + test("STARFUSION_BUILD - human - minigenome - stub") { + + options "-stub" + + when { + process { + """ + input[0] = [ + [ id:'minigenome fasta' ], + file("https://github.com/STAR-Fusion/STAR-Fusion-Tutorial/raw/master/minigenome.fa") + ] + input[1] = [ + [ id:'minigenome gtf' ], + file("https://github.com/STAR-Fusion/STAR-Fusion-Tutorial/raw/master/minigenome.gtf") + ] + + input [2] = file("https://github.com/STAR-Fusion/STAR-Fusion-Tutorial/raw/master/CTAT_HumanFusionLib.mini.dat.gz") + input [3] = "human" + + """ + } + } + + then { + assert snapshot(process.out).match() + } + + } + +} diff --git a/modules/local/starfusion/build/tests/main.nf.test.snap b/modules/local/starfusion/build/tests/main.nf.test.snap new file mode 100644 index 00000000..b08f2519 --- /dev/null +++ b/modules/local/starfusion/build/tests/main.nf.test.snap @@ -0,0 +1,266 @@ +{ + "STARFUSION_BUILD - human - minigenome": { + "content": [ + "AnnotFilterRule.pm:md5,5391fcc58d9c71cd1f0e45668c5ec597", + true, + true, + "annotfiltrule_cp.ok:md5,d41d8cd98f00b204e9800998ecf8427e", + "blast_pairs.idx.ok:md5,d41d8cd98f00b204e9800998ecf8427e", + "cp_gene_blast_pairs.ok:md5,d41d8cd98f00b204e9800998ecf8427e", + "cp_pfam_dat.ok:md5,d41d8cd98f00b204e9800998ecf8427e", + "cp_ref_annot_cdna.ok:md5,d41d8cd98f00b204e9800998ecf8427e", + "fusion_annot_lib.cp.ok:md5,d41d8cd98f00b204e9800998ecf8427e", + "_fusion_annot_lib.idx.ok:md5,d41d8cd98f00b204e9800998ecf8427e", + "index_pfam_hits.ok:md5,d41d8cd98f00b204e9800998ecf8427e", + "index_ref_annot_cdna.ok:md5,d41d8cd98f00b204e9800998ecf8427e", + "makeblastdb.ok:md5,d41d8cd98f00b204e9800998ecf8427e", + "mm2_genome_idx.ok:md5,d41d8cd98f00b204e9800998ecf8427e", + "mm2.splice_bed.ok:md5,d41d8cd98f00b204e9800998ecf8427e", + "_prot_info_db.ok:md5,d41d8cd98f00b204e9800998ecf8427e", + "ref_annot.cdsplus.dfam_masked.fa.cp.ok:md5,d41d8cd98f00b204e9800998ecf8427e", + "ref_annot.cdsplus.dfam_masked.fa.idx.ok:md5,d41d8cd98f00b204e9800998ecf8427e", + "ref_annot.gtf.gene_spans.ok:md5,d41d8cd98f00b204e9800998ecf8427e", + "ref_annot.gtf.mini.sortu.ok:md5,d41d8cd98f00b204e9800998ecf8427e", + "ref_annot.gtf.ok:md5,d41d8cd98f00b204e9800998ecf8427e", + "ref_genome_fai.ok:md5,d41d8cd98f00b204e9800998ecf8427e", + "ref_genome.fa.ok:md5,d41d8cd98f00b204e9800998ecf8427e", + "trans.blast.dat.cp.ok:md5,d41d8cd98f00b204e9800998ecf8427e", + "trans.blast.dat.index.ok:md5,d41d8cd98f00b204e9800998ecf8427e", + "validate_ctat_genome_lib.ok:md5,d41d8cd98f00b204e9800998ecf8427e", + "fusion_annot_lib.gz:md5,23d82a5da81f91ca4e1ecd6481992a12", + true, + true, + true, + true, + true, + true, + true, + true, + "ref_annot.gtf:md5,5ce8afe99ef3940a877a04caeacf9181", + true, + "ref_annot.gtf.mini.sortu:md5,1d29ccecdbb7b40a99c84a02d6c2c1be", + "ref_annot.gtf.mm2.splice.bed:md5,340585ea1843bf06bf555575ddecf28c", + true, + true, + "ref_genome.fa:md5,ad699c56ed38566c7d3e9579486b1706", + "ref_genome.fa.fai:md5,e3f74a27219b33ae80fd5de5cbeaf32b", + "ref_genome.fa.mm2:md5,ce50979ea284748eb9f84ae88cfd930e", + "ref_genome.fa.ndb:md5,6ea574753b557610f62f6e4ab79e19f5", + "ref_genome.fa.nhr:md5,50f28dae71683c4394bfaf94a1ef4392", + true, + true, + "ref_genome.fa.not:md5,1e53e9d08f1d23af0299cfa87478a7bb", + "ref_genome.fa.nsq:md5,d2361e7871ce4cf51181c112a48f191b", + "ref_genome.fa.ntf:md5,de1250813f0c7affc6d12dac9d0fb6bb", + "ref_genome.fa.nto:md5,33cdeccccebe80329f1fdbee7f5874cb", + true, + "build.ok:md5,d41d8cd98f00b204e9800998ecf8427e", + "chrLength.txt:md5,e02cd536b7281b894246863b160d5d06", + "chrNameLength.txt:md5,07a67d7ac441d7d30d80840b0927e717", + "chrName.txt:md5,f4d0d6595f423084e6b9472e40dfe6e8", + "chrStart.txt:md5,e2031239a74fe5ee9051e9364e4f608a", + "exonGeTrInfo.tab:md5,3c35618d07a8e35a0f9108699fcdda42", + "exonInfo.tab:md5,bcbb3f32fa31fe504cc737f337ad341c", + "geneInfo.tab:md5,db5db4b6e003904e9908fce7c05f0125", + "Genome:md5,9e3efdd0901cabb5a2d589664a63b372", + true, + true, + "SA:md5,7dd9083264be9c6a2194d990bc10d237", + "SAindex:md5,ac4711df685109e04356db9e9cb9fb7f", + "sjdbInfo.txt:md5,e4cc1bbf8bd687cfc3d7c2c702e6def7", + "sjdbList.fromGTF.out.tab:md5,8f3e8604b00d4067e4eb80aa476a8113", + "sjdbList.out.tab:md5,5d78dd49d5db24ca2c056b7ebe5c2059", + "transcriptInfo.tab:md5,b758c0ccaddcf0453bab5905b3cec4a2", + "trans.blast.align_coords.align_coords.dat:md5,9f6b7a75aea03a9671190be25ecdd4c2", + true, + "trans.blast.dat.gz:md5,85ba5ea96c566f751ad83a3e4b8ab128", + null + ], + "meta": { + "nf-test": "0.9.2", + "nextflow": "24.10.3" + }, + "timestamp": "2024-12-19T17:03:12.812884291" + }, + "STARFUSION_BUILD - human - minigenome - stub": { + "content": [ + { + "0": [ + [ + { + "id": "minigenome fasta" + }, + [ + "AnnotFilterRule.pm:md5,d41d8cd98f00b204e9800998ecf8427e", + "PFAM.domtblout.dat.gz:md5,d41d8cd98f00b204e9800998ecf8427e", + [ + "_fusion_annot_lib.idx.ok:md5,d41d8cd98f00b204e9800998ecf8427e", + "_prot_info_db.ok:md5,d41d8cd98f00b204e9800998ecf8427e", + "annotfiltrule_cp.ok:md5,d41d8cd98f00b204e9800998ecf8427e", + "blast_pairs.idx.ok:md5,d41d8cd98f00b204e9800998ecf8427e", + "cp_gene_blast_pairs.ok:md5,d41d8cd98f00b204e9800998ecf8427e", + "cp_pfam_dat.ok:md5,d41d8cd98f00b204e9800998ecf8427e", + "cp_ref_annot_cdna.ok:md5,d41d8cd98f00b204e9800998ecf8427e", + "fusion_annot_lib.cp.ok:md5,d41d8cd98f00b204e9800998ecf8427e", + "index_pfam_hits.ok:md5,d41d8cd98f00b204e9800998ecf8427e", + "index_ref_annot_cdna.ok:md5,d41d8cd98f00b204e9800998ecf8427e", + "makeblastdb.ok:md5,d41d8cd98f00b204e9800998ecf8427e", + "mm2.splice_bed.ok:md5,d41d8cd98f00b204e9800998ecf8427e", + "mm2_genome_idx.ok:md5,d41d8cd98f00b204e9800998ecf8427e", + "ref_annot.cdsplus.dfam_masked.fa.cp.ok:md5,d41d8cd98f00b204e9800998ecf8427e", + "ref_annot.cdsplus.dfam_masked.fa.idx.ok:md5,d41d8cd98f00b204e9800998ecf8427e", + "ref_annot.gtf.gene_spans.ok:md5,d41d8cd98f00b204e9800998ecf8427e", + "ref_annot.gtf.mini.sortu.ok:md5,d41d8cd98f00b204e9800998ecf8427e", + "ref_annot.gtf.ok:md5,d41d8cd98f00b204e9800998ecf8427e", + "ref_genome.fa.ok:md5,d41d8cd98f00b204e9800998ecf8427e", + "ref_genome_fai.ok:md5,d41d8cd98f00b204e9800998ecf8427e", + "trans.blast.dat.cp.ok:md5,d41d8cd98f00b204e9800998ecf8427e", + "trans.blast.dat.index.ok:md5,d41d8cd98f00b204e9800998ecf8427e", + "validate_ctat_genome_lib.ok:md5,d41d8cd98f00b204e9800998ecf8427e" + ], + "blast_pairs.dat.gz:md5,d41d8cd98f00b204e9800998ecf8427e", + "blast_pairs.idx:md5,d41d8cd98f00b204e9800998ecf8427e", + "fusion_annot_lib.gz:md5,d41d8cd98f00b204e9800998ecf8427e", + "fusion_annot_lib.idx:md5,d41d8cd98f00b204e9800998ecf8427e", + "pfam_domains.dbm:md5,d41d8cd98f00b204e9800998ecf8427e", + "ref_annot.cdna.fa:md5,d41d8cd98f00b204e9800998ecf8427e", + "ref_annot.cdna.fa.idx:md5,d41d8cd98f00b204e9800998ecf8427e", + "ref_annot.cds:md5,d41d8cd98f00b204e9800998ecf8427e", + "ref_annot.cdsplus.fa:md5,d41d8cd98f00b204e9800998ecf8427e", + "ref_annot.cdsplus.fa.idx:md5,d41d8cd98f00b204e9800998ecf8427e", + "ref_annot.gtf:md5,d41d8cd98f00b204e9800998ecf8427e", + "ref_annot.gtf.gene_spans:md5,d41d8cd98f00b204e9800998ecf8427e", + "ref_annot.gtf.mini.sortu:md5,d41d8cd98f00b204e9800998ecf8427e", + "ref_annot.gtf.mm2.splice.bed:md5,d41d8cd98f00b204e9800998ecf8427e", + "ref_annot.pep:md5,d41d8cd98f00b204e9800998ecf8427e", + "ref_annot.prot_info.dbm:md5,d41d8cd98f00b204e9800998ecf8427e", + "ref_genome.fa:md5,d41d8cd98f00b204e9800998ecf8427e", + "ref_genome.fa.fai:md5,d41d8cd98f00b204e9800998ecf8427e", + "ref_genome.fa.mm2:md5,d41d8cd98f00b204e9800998ecf8427e", + "ref_genome.fa.ndb:md5,d41d8cd98f00b204e9800998ecf8427e", + "ref_genome.fa.nhr:md5,d41d8cd98f00b204e9800998ecf8427e", + "ref_genome.fa.nin:md5,d41d8cd98f00b204e9800998ecf8427e", + "ref_genome.fa.njs:md5,d41d8cd98f00b204e9800998ecf8427e", + "ref_genome.fa.not:md5,d41d8cd98f00b204e9800998ecf8427e", + "ref_genome.fa.nsq:md5,d41d8cd98f00b204e9800998ecf8427e", + "ref_genome.fa.ntf:md5,d41d8cd98f00b204e9800998ecf8427e", + "ref_genome.fa.nto:md5,d41d8cd98f00b204e9800998ecf8427e", + [ + "Genome:md5,d41d8cd98f00b204e9800998ecf8427e", + "Log.out:md5,d41d8cd98f00b204e9800998ecf8427e", + "SA:md5,d41d8cd98f00b204e9800998ecf8427e", + "SAindex:md5,d41d8cd98f00b204e9800998ecf8427e", + "build.ok:md5,d41d8cd98f00b204e9800998ecf8427e", + "chrLength.txt:md5,d41d8cd98f00b204e9800998ecf8427e", + "chrName.txt:md5,d41d8cd98f00b204e9800998ecf8427e", + "chrNameLength.txt:md5,d41d8cd98f00b204e9800998ecf8427e", + "chrStart.txt:md5,d41d8cd98f00b204e9800998ecf8427e", + "exonGeTrInfo.tab:md5,d41d8cd98f00b204e9800998ecf8427e", + "exonInfo.tab:md5,d41d8cd98f00b204e9800998ecf8427e", + "geneInfo.tab:md5,d41d8cd98f00b204e9800998ecf8427e", + "genomeParameters.txt:md5,d41d8cd98f00b204e9800998ecf8427e", + "sjdbInfo.txt:md5,d41d8cd98f00b204e9800998ecf8427e", + "sjdbList.fromGTF.out.tab:md5,d41d8cd98f00b204e9800998ecf8427e", + "sjdbList.out.tab:md5,d41d8cd98f00b204e9800998ecf8427e", + "transcriptInfo.tab:md5,d41d8cd98f00b204e9800998ecf8427e" + ], + "trans.blast.align_coords.align_coords.dat:md5,d41d8cd98f00b204e9800998ecf8427e", + "trans.blast.align_coords.align_coords.dbm:md5,d41d8cd98f00b204e9800998ecf8427e", + "trans.blast.dat.gz:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ] + ], + "reference": [ + [ + { + "id": "minigenome fasta" + }, + [ + "AnnotFilterRule.pm:md5,d41d8cd98f00b204e9800998ecf8427e", + "PFAM.domtblout.dat.gz:md5,d41d8cd98f00b204e9800998ecf8427e", + [ + "_fusion_annot_lib.idx.ok:md5,d41d8cd98f00b204e9800998ecf8427e", + "_prot_info_db.ok:md5,d41d8cd98f00b204e9800998ecf8427e", + "annotfiltrule_cp.ok:md5,d41d8cd98f00b204e9800998ecf8427e", + "blast_pairs.idx.ok:md5,d41d8cd98f00b204e9800998ecf8427e", + "cp_gene_blast_pairs.ok:md5,d41d8cd98f00b204e9800998ecf8427e", + "cp_pfam_dat.ok:md5,d41d8cd98f00b204e9800998ecf8427e", + "cp_ref_annot_cdna.ok:md5,d41d8cd98f00b204e9800998ecf8427e", + "fusion_annot_lib.cp.ok:md5,d41d8cd98f00b204e9800998ecf8427e", + "index_pfam_hits.ok:md5,d41d8cd98f00b204e9800998ecf8427e", + "index_ref_annot_cdna.ok:md5,d41d8cd98f00b204e9800998ecf8427e", + "makeblastdb.ok:md5,d41d8cd98f00b204e9800998ecf8427e", + "mm2.splice_bed.ok:md5,d41d8cd98f00b204e9800998ecf8427e", + "mm2_genome_idx.ok:md5,d41d8cd98f00b204e9800998ecf8427e", + "ref_annot.cdsplus.dfam_masked.fa.cp.ok:md5,d41d8cd98f00b204e9800998ecf8427e", + "ref_annot.cdsplus.dfam_masked.fa.idx.ok:md5,d41d8cd98f00b204e9800998ecf8427e", + "ref_annot.gtf.gene_spans.ok:md5,d41d8cd98f00b204e9800998ecf8427e", + "ref_annot.gtf.mini.sortu.ok:md5,d41d8cd98f00b204e9800998ecf8427e", + "ref_annot.gtf.ok:md5,d41d8cd98f00b204e9800998ecf8427e", + "ref_genome.fa.ok:md5,d41d8cd98f00b204e9800998ecf8427e", + "ref_genome_fai.ok:md5,d41d8cd98f00b204e9800998ecf8427e", + "trans.blast.dat.cp.ok:md5,d41d8cd98f00b204e9800998ecf8427e", + "trans.blast.dat.index.ok:md5,d41d8cd98f00b204e9800998ecf8427e", + "validate_ctat_genome_lib.ok:md5,d41d8cd98f00b204e9800998ecf8427e" + ], + "blast_pairs.dat.gz:md5,d41d8cd98f00b204e9800998ecf8427e", + "blast_pairs.idx:md5,d41d8cd98f00b204e9800998ecf8427e", + "fusion_annot_lib.gz:md5,d41d8cd98f00b204e9800998ecf8427e", + "fusion_annot_lib.idx:md5,d41d8cd98f00b204e9800998ecf8427e", + "pfam_domains.dbm:md5,d41d8cd98f00b204e9800998ecf8427e", + "ref_annot.cdna.fa:md5,d41d8cd98f00b204e9800998ecf8427e", + "ref_annot.cdna.fa.idx:md5,d41d8cd98f00b204e9800998ecf8427e", + "ref_annot.cds:md5,d41d8cd98f00b204e9800998ecf8427e", + "ref_annot.cdsplus.fa:md5,d41d8cd98f00b204e9800998ecf8427e", + "ref_annot.cdsplus.fa.idx:md5,d41d8cd98f00b204e9800998ecf8427e", + "ref_annot.gtf:md5,d41d8cd98f00b204e9800998ecf8427e", + "ref_annot.gtf.gene_spans:md5,d41d8cd98f00b204e9800998ecf8427e", + "ref_annot.gtf.mini.sortu:md5,d41d8cd98f00b204e9800998ecf8427e", + "ref_annot.gtf.mm2.splice.bed:md5,d41d8cd98f00b204e9800998ecf8427e", + "ref_annot.pep:md5,d41d8cd98f00b204e9800998ecf8427e", + "ref_annot.prot_info.dbm:md5,d41d8cd98f00b204e9800998ecf8427e", + "ref_genome.fa:md5,d41d8cd98f00b204e9800998ecf8427e", + "ref_genome.fa.fai:md5,d41d8cd98f00b204e9800998ecf8427e", + "ref_genome.fa.mm2:md5,d41d8cd98f00b204e9800998ecf8427e", + "ref_genome.fa.ndb:md5,d41d8cd98f00b204e9800998ecf8427e", + "ref_genome.fa.nhr:md5,d41d8cd98f00b204e9800998ecf8427e", + "ref_genome.fa.nin:md5,d41d8cd98f00b204e9800998ecf8427e", + "ref_genome.fa.njs:md5,d41d8cd98f00b204e9800998ecf8427e", + "ref_genome.fa.not:md5,d41d8cd98f00b204e9800998ecf8427e", + "ref_genome.fa.nsq:md5,d41d8cd98f00b204e9800998ecf8427e", + "ref_genome.fa.ntf:md5,d41d8cd98f00b204e9800998ecf8427e", + "ref_genome.fa.nto:md5,d41d8cd98f00b204e9800998ecf8427e", + [ + "Genome:md5,d41d8cd98f00b204e9800998ecf8427e", + "Log.out:md5,d41d8cd98f00b204e9800998ecf8427e", + "SA:md5,d41d8cd98f00b204e9800998ecf8427e", + "SAindex:md5,d41d8cd98f00b204e9800998ecf8427e", + "build.ok:md5,d41d8cd98f00b204e9800998ecf8427e", + "chrLength.txt:md5,d41d8cd98f00b204e9800998ecf8427e", + "chrName.txt:md5,d41d8cd98f00b204e9800998ecf8427e", + "chrNameLength.txt:md5,d41d8cd98f00b204e9800998ecf8427e", + "chrStart.txt:md5,d41d8cd98f00b204e9800998ecf8427e", + "exonGeTrInfo.tab:md5,d41d8cd98f00b204e9800998ecf8427e", + "exonInfo.tab:md5,d41d8cd98f00b204e9800998ecf8427e", + "geneInfo.tab:md5,d41d8cd98f00b204e9800998ecf8427e", + "genomeParameters.txt:md5,d41d8cd98f00b204e9800998ecf8427e", + "sjdbInfo.txt:md5,d41d8cd98f00b204e9800998ecf8427e", + "sjdbList.fromGTF.out.tab:md5,d41d8cd98f00b204e9800998ecf8427e", + "sjdbList.out.tab:md5,d41d8cd98f00b204e9800998ecf8427e", + "transcriptInfo.tab:md5,d41d8cd98f00b204e9800998ecf8427e" + ], + "trans.blast.align_coords.align_coords.dat:md5,d41d8cd98f00b204e9800998ecf8427e", + "trans.blast.align_coords.align_coords.dbm:md5,d41d8cd98f00b204e9800998ecf8427e", + "trans.blast.dat.gz:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ] + ] + } + ], + "meta": { + "nf-test": "0.9.0", + "nextflow": "24.10.2" + }, + "timestamp": "2024-12-12T14:57:33.861849482" + } +} \ No newline at end of file diff --git a/modules/local/starfusion/detect/environment.yml b/modules/local/starfusion/detect/environment.yml new file mode 100644 index 00000000..ef7f9316 --- /dev/null +++ b/modules/local/starfusion/detect/environment.yml @@ -0,0 +1,8 @@ +channels: + - conda-forge + - bioconda +dependencies: + - bioconda::dfam=3.7 + - bioconda::hmmer=3.4 + - bioconda::minimap2=2.28 + - bioconda::star-fusion=1.14.0 diff --git a/modules/local/starfusion/detect/main.nf b/modules/local/starfusion/detect/main.nf new file mode 100644 index 00000000..00c78efc --- /dev/null +++ b/modules/local/starfusion/detect/main.nf @@ -0,0 +1,58 @@ +process STARFUSION { + tag "$meta.id" + label 'process_high' + + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://community-cr-prod.seqera.io/docker/registry/v2/blobs/sha256/be/bed86145102fdf7e381e1a506a4723676f98b4bbe1db5085d02213cef18525c9/data' : + 'community.wave.seqera.io/library/dfam_hmmer_minimap2_star-fusion:aa3a8e3951498552'}" + + input: + tuple val(meta), path(reads), path(junction) + path reference + + output: + tuple val(meta), path("*.fusion_predictions.tsv") , emit: fusions + tuple val(meta), path("*.abridged.tsv") , emit: abridged + tuple val(meta), path("*.coding_effect.tsv") , optional: true , emit: coding_effect + path "versions.yml" , emit: versions + + script: + def prefix = task.ext.prefix ?: "${meta.id}" + def fastq_arg = reads ? (meta.single_end ? "--left_fq ${reads[0]}" : "--left_fq ${reads[0]} --right_fq ${reads[1]}") : "" + def junction_arg = junction ? "-J ${junction}" : "" + def args = task.ext.args ?: '' + """ + STAR-Fusion \\ + --genome_lib_dir $reference \\ + $fastq_arg \\ + $junction_arg \\ + --CPU $task.cpus \\ + --examine_coding_effect \\ + --output_dir . \\ + $args + + mv star-fusion.fusion_predictions.tsv ${prefix}.starfusion.fusion_predictions.tsv + mv star-fusion.fusion_predictions.abridged.tsv ${prefix}.starfusion.abridged.tsv + mv star-fusion.fusion_predictions.abridged.coding_effect.tsv ${prefix}.starfusion.abridged.coding_effect.tsv + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + STAR-Fusion: \$(STAR-Fusion --version 2>&1 | grep -i 'version' | sed 's/STAR-Fusion version: //') + END_VERSIONS + """ + + stub: + def prefix = task.ext.prefix ?: "${meta.id}" + """ + touch ${prefix}.starfusion.fusion_predictions.tsv + touch ${prefix}.starfusion.abridged.tsv + touch ${prefix}.starfusion.abridged.coding_effect.tsv + cat <<-END_VERSIONS > versions.yml + "${task.process}": + STAR-Fusion: \$(STAR-Fusion --version 2>&1 | grep -i 'version' | sed 's/STAR-Fusion version: //') + END_VERSIONS + """ +} + + diff --git a/modules/local/starfusion/detect/meta.yml b/modules/local/starfusion/detect/meta.yml new file mode 100644 index 00000000..7337dad5 --- /dev/null +++ b/modules/local/starfusion/detect/meta.yml @@ -0,0 +1,56 @@ +name: starfusion +description: Fast and Accurate Fusion Transcript Detection from RNA-Seq +keywords: + - Fusion +tools: + - star-fusion: + description: Fast and Accurate Fusion Transcript Detection from RNA-Seq + homepage: https://github.com/STAR-Fusion/STAR-Fusion + documentation: https://github.com/STAR-Fusion/STAR-Fusion/wiki + tool_dev_url: https://github.com/STAR-Fusion/STAR-Fusion/releases + doi: "10.1101/120295v1" + licence: ["GPL v3"] + +input: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - genome_lib: + type: path + description: STAR-fusion reference genome lib folder + - junction: + type: file + description: Chimeric junction output from STAR aligner + pattern: "*.{out.junction}" + - reference: + type: directory + description: Reference dir + pattern: "ctat_genome_lib_build_dir" + +output: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - version: + type: file + description: File containing software version + pattern: "*.{versions.yml}" + - fusions: + type: file + description: Fusion events from STAR-fusion + pattern: "*.{fusion_predictions.tsv}" + - abridged: + type: file + description: Fusion events from STAR-fusion + pattern: "*.{fusion.abridged.tsv}" + - coding_effect: + type: file + description: Fusion events from STAR-fusion + pattern: "*.{coding_effect.tsv}" + +authors: + - "@praveenraj2018" diff --git a/modules/local/starfusion/detect/tests/main.nf.test b/modules/local/starfusion/detect/tests/main.nf.test new file mode 100644 index 00000000..75e0bfdd --- /dev/null +++ b/modules/local/starfusion/detect/tests/main.nf.test @@ -0,0 +1,87 @@ +nextflow_process { + + name "Test Process STARFUSION" + script "../main.nf" + process "STARFUSION" + + setup { + run("STARFUSION_BUILD") { + script "../../../starfusion/build/main.nf" + process { + """ + input[0] = [ + [ id:'minigenome fasta' ], + file("https://github.com/STAR-Fusion/STAR-Fusion-Tutorial/raw/master/minigenome.fa") + ] + input[1] = [ + [ id:'minigenome gtf' ], + file("https://github.com/STAR-Fusion/STAR-Fusion-Tutorial/raw/master/minigenome.gtf") + ] + + input [2] = file("https://github.com/STAR-Fusion/STAR-Fusion-Tutorial/raw/master/CTAT_HumanFusionLib.mini.dat.gz") + input [3] = "human" + """ + } + } + } + + test("Should run without failures") { + + when { + process { + """ + input[0] = [ + [ id:'test', single_end:false ], // meta map + [ + file("https://github.com/STAR-Fusion/STAR-Fusion-Tutorial/raw/v0.0.1/rnaseq_1.fastq.gz"), + file("https://github.com/STAR-Fusion/STAR-Fusion-Tutorial/raw/v0.0.1/rnaseq_2.fastq.gz") + ], // reads + [] // empty list for junction, as we don't have a pre-computed Chimeric.out.junction file + ] + input[1] = STARFUSION_BUILD.out.reference.map { it[1] } + """ + } + } + + then { + assert process.success + assert snapshot( + process.out.fusions, + process.out.abridged, + process.out.coding_effect, + process.out.versions + ).match() + } + + } + + test("Should create stub files") { + + options "-stub" + + when { + process { + """ + input[0] = [ + [ id:'test_stub', single_end:false ], + [ + file("https://github.com/STAR-Fusion/STAR-Fusion-Tutorial/raw/v0.0.1/rnaseq_1.fastq.gz"), + file("https://github.com/STAR-Fusion/STAR-Fusion-Tutorial/raw/v0.0.1/rnaseq_2.fastq.gz") + ], + [] // empty list for junction + ] + input[1] = STARFUSION_BUILD.out.reference.map { it[1] } + """ + } + } + + then { + assert process.success + assert snapshot( + process.out + ).match() + } + + } + +} diff --git a/modules/local/starfusion/detect/tests/main.nf.test.snap b/modules/local/starfusion/detect/tests/main.nf.test.snap new file mode 100644 index 00000000..f65f94b0 --- /dev/null +++ b/modules/local/starfusion/detect/tests/main.nf.test.snap @@ -0,0 +1,112 @@ +{ + "Should create stub files": { + "content": [ + { + "0": [ + [ + { + "id": "test_stub", + "single_end": false + }, + "test_stub.starfusion.fusion_predictions.tsv:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "1": [ + [ + { + "id": "test_stub", + "single_end": false + }, + "test_stub.starfusion.abridged.tsv:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "2": [ + [ + { + "id": "test_stub", + "single_end": false + }, + "test_stub.starfusion.abridged.coding_effect.tsv:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "3": [ + "versions.yml:md5,a8fb6344fdf740dde0941048313fc243" + ], + "abridged": [ + [ + { + "id": "test_stub", + "single_end": false + }, + "test_stub.starfusion.abridged.tsv:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "coding_effect": [ + [ + { + "id": "test_stub", + "single_end": false + }, + "test_stub.starfusion.abridged.coding_effect.tsv:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "fusions": [ + [ + { + "id": "test_stub", + "single_end": false + }, + "test_stub.starfusion.fusion_predictions.tsv:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "versions": [ + "versions.yml:md5,a8fb6344fdf740dde0941048313fc243" + ] + } + ], + "meta": { + "nf-test": "0.9.2", + "nextflow": "24.10.3" + }, + "timestamp": "2024-12-19T18:42:06.988178092" + }, + "Should run without failures": { + "content": [ + [ + [ + { + "id": "test", + "single_end": false + }, + "test.starfusion.fusion_predictions.tsv:md5,82834fffed743afe07da82bd56d50c99" + ] + ], + [ + [ + { + "id": "test", + "single_end": false + }, + "test.starfusion.abridged.tsv:md5,d6d20fdd4b5cba21b9c0ebf8e0ea19ff" + ] + ], + [ + [ + { + "id": "test", + "single_end": false + }, + "test.starfusion.abridged.coding_effect.tsv:md5,95dfce6fdaf3589f23881fe1e855c62b" + ] + ], + [ + "versions.yml:md5,a8fb6344fdf740dde0941048313fc243" + ] + ], + "meta": { + "nf-test": "0.9.2", + "nextflow": "24.10.3" + }, + "timestamp": "2024-12-19T18:41:49.150362156" + } +} \ No newline at end of file diff --git a/modules/local/uscs/custom_gtftogenepred/main.nf b/modules/local/uscs/custom_gtftogenepred/main.nf new file mode 100644 index 00000000..9cc15765 --- /dev/null +++ b/modules/local/uscs/custom_gtftogenepred/main.nf @@ -0,0 +1,39 @@ +process GTF_TO_REFFLAT { + tag "$meta.id" + label 'process_low' + + conda "bioconda::ucsc-gtftogenepred=377" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/ucsc-gtftogenepred:377--ha8a8165_5' : + 'quay.io/biocontainers/ucsc-gtftogenepred:377--ha8a8165_5' }" + + input: + tuple val(meta), path (gtf) + + output: + path('*.refflat') , emit: refflat + path "versions.yml" , emit: versions + + script: + def genepred = gtf + '.genepred' + def refflat = gtf + '.refflat' + """ + gtfToGenePred -genePredExt -geneNameAsName2 ${gtf} ${genepred} + paste ${genepred} ${genepred} | cut -f12,16-25 > ${refflat} + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + gtfToGenePred: 377 + END_VERSIONS + """ + + stub: + def refflat = gtf + '.refflat' + """ + touch ${refflat} + cat <<-END_VERSIONS > versions.yml + "${task.process}": + gtfToGenePred: 377 + END_VERSIONS + """ +} diff --git a/modules/local/vcf_collect/main.nf b/modules/local/vcf_collect/main.nf new file mode 100644 index 00000000..3a1bf221 --- /dev/null +++ b/modules/local/vcf_collect/main.nf @@ -0,0 +1,45 @@ +process VCF_COLLECT { + tag "$meta.id" + label 'process_single' + + conda "conda-forge::pandas=1.5.2" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/pandas:1.5.2' : + 'biocontainers/pandas:1.5.2' }" + + input: + tuple val(meta), path(fusioninspector_tsv), path(fusioninspector_gtf_tsv), path(fusionreport_report), path(fusionreport_csv) + tuple val(meta2), path(hgnc_ref) + tuple val(meta3), path(hgnc_date) + + output: + path "versions.yml" , emit: versions + tuple val(meta), path("*vcf.gz") , emit: vcf + + when: + task.ext.when == null || task.ext.when + + script: + def prefix = task.ext.prefix ?: "${meta.id}" + """ + vcf_collect.py --fusioninspector $fusioninspector_tsv --fusionreport $fusionreport_report --fusioninspector_gtf $fusioninspector_gtf_tsv --fusionreport_csv $fusionreport_csv --hgnc $hgnc_ref --sample ${prefix} --out ${prefix}_fusion_data.vcf + gzip ${prefix}_fusion_data.vcf + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + python: \$(python --version | sed 's/Python //g') + HGNC DB retrieval: \$(cat $hgnc_date) + END_VERSIONS + """ + + stub: + def prefix = task.ext.prefix ?: "${meta.id}" + """ + touch ${prefix}.vcf + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + python: \$(python --version | sed 's/Python //g') + END_VERSIONS + """ +} diff --git a/modules/local/vcf_collect/meta.yml b/modules/local/vcf_collect/meta.yml new file mode 100644 index 00000000..de4667bb --- /dev/null +++ b/modules/local/vcf_collect/meta.yml @@ -0,0 +1,39 @@ +name: vcf_collect +description: vcf_collect +keywords: + - sort +tools: + - fusionreport: + description: Converts RNA fusion files to SV VCF and collects statistics and metrics in a VCF file. + homepage: Adapted from https://github.com/J35P312/MegaFusion + documentation: https://github.com/J35P312/MegaFusion + doi: "" + licence: [""] + +input: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - tsv: + type: path + description: Path to FusionInspector tsv output + pattern: "*" + - report: + type: path + description: Path to fusionreport report + pattern: "*.fusions.tsv" + +output: + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" + - vcf: + type: file + description: File containing the summary of all fusions as compressed vcf file + pattern: "*.vcf.gz" + +authors: + - "@rannick" diff --git a/modules/nf-core/agat/convertspgff2tsv/environment.yml b/modules/nf-core/agat/convertspgff2tsv/environment.yml new file mode 100644 index 00000000..0410ee76 --- /dev/null +++ b/modules/nf-core/agat/convertspgff2tsv/environment.yml @@ -0,0 +1,5 @@ +channels: + - conda-forge + - bioconda +dependencies: + - bioconda::agat=1.4.0 diff --git a/modules/nf-core/agat/convertspgff2tsv/main.nf b/modules/nf-core/agat/convertspgff2tsv/main.nf new file mode 100644 index 00000000..a6f73b6c --- /dev/null +++ b/modules/nf-core/agat/convertspgff2tsv/main.nf @@ -0,0 +1,46 @@ +process AGAT_CONVERTSPGFF2TSV { + tag "$meta.id" + label 'process_single' + + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/agat:1.4.0--pl5321hdfd78af_0' : + 'biocontainers/agat:1.4.0--pl5321hdfd78af_0' }" + + input: + tuple val(meta), path(gff) + + output: + tuple val(meta), path("*.tsv"), emit: tsv + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + """ + agat_convert_sp_gff2tsv.pl \\ + --gff $gff \\ + --output ${prefix}.tsv \\ + $args + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + agat: \$(agat_convert_sp_gff2tsv.pl --help | sed -n 's/.*(AGAT) - Version: \\(.*\\) .*/\\1/p') + END_VERSIONS + """ + + stub: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + """ + touch ${prefix}.tsv + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + agat: \$(agat_convert_sp_gff2tsv.pl --help | sed -n 's/.*(AGAT) - Version: \\(.*\\) .*/\\1/p') + END_VERSIONS + """ +} diff --git a/modules/nf-core/agat/convertspgff2tsv/meta.yml b/modules/nf-core/agat/convertspgff2tsv/meta.yml new file mode 100644 index 00000000..31f09868 --- /dev/null +++ b/modules/nf-core/agat/convertspgff2tsv/meta.yml @@ -0,0 +1,48 @@ +name: agat_convertspgff2tsv +description: | + Converts a GFF/GTF file into a TSV file +keywords: + - genome + - gff + - gtf + - conversion + - tsv +tools: + - agat: + description: "AGAT is a toolkit for manipulation and getting information from + GFF/GTF files" + homepage: "https://github.com/NBISweden/AGAT" + documentation: "https://agat.readthedocs.io/" + tool_dev_url: "https://github.com/NBISweden/AGAT" + doi: "10.5281/zenodo.3552717" + licence: ["GPL v3"] + identifier: biotools:AGAT +input: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - gff: + type: file + description: Annotation file in GFF3/GTF format + pattern: "*.{gff, gtf}" +output: + - tsv: + - meta: + type: file + description: Annotation file in TSV format + pattern: "*.{gtf}" + - "*.tsv": + type: file + description: Annotation file in TSV format + pattern: "*.{gtf}" + - versions: + - versions.yml: + type: file + description: File containing software versions + pattern: "versions.yml" +authors: + - "@rannick" +maintainers: + - "@gallvp" diff --git a/modules/nf-core/agat/convertspgff2tsv/tests/main.nf.test b/modules/nf-core/agat/convertspgff2tsv/tests/main.nf.test new file mode 100644 index 00000000..6a2e8942 --- /dev/null +++ b/modules/nf-core/agat/convertspgff2tsv/tests/main.nf.test @@ -0,0 +1,59 @@ +nextflow_process { + + name "Test Process AGAT_CONVERTSPGFF2TSV" + script "../main.nf" + process "AGAT_CONVERTSPGFF2TSV" + + tag "modules" + tag "modules_nfcore" + tag "agat" + tag "agat/convertspgff2tsv" + + test("sarscov2 - genome [gff3]") { + + when { + process { + """ + input[0] = [ + [ id: 'test' ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.gff3', checkIfExists: true) + ] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + + } + + test("sarscov2 - genome [gff3] - stub") { + + options "-stub" + + when { + process { + """ + input[0] = [ + [ id: 'test' ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.gff3', checkIfExists: true) + ] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out.tsv.collect { file(it[1]).getName() } + + process.out.versions).match() } + ) + } + + } + +} diff --git a/modules/nf-core/agat/convertspgff2tsv/tests/main.nf.test.snap b/modules/nf-core/agat/convertspgff2tsv/tests/main.nf.test.snap new file mode 100644 index 00000000..71ed6205 --- /dev/null +++ b/modules/nf-core/agat/convertspgff2tsv/tests/main.nf.test.snap @@ -0,0 +1,48 @@ +{ + "sarscov2 - genome [gff3] - stub": { + "content": [ + [ + "test.tsv", + "versions.yml:md5,b81565a6ff8911848806128b3bec8508" + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-05-08T10:06:55.853319" + }, + "sarscov2 - genome [gff3]": { + "content": [ + { + "0": [ + [ + { + "id": "test" + }, + "test.tsv:md5,8373d2035689d23694f87606116cdccd" + ] + ], + "1": [ + "versions.yml:md5,b81565a6ff8911848806128b3bec8508" + ], + "tsv": [ + [ + { + "id": "test" + }, + "test.tsv:md5,8373d2035689d23694f87606116cdccd" + ] + ], + "versions": [ + "versions.yml:md5,b81565a6ff8911848806128b3bec8508" + ] + } + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-05-08T10:06:51.415395" + } +} \ No newline at end of file diff --git a/modules/nf-core/agat/convertspgff2tsv/tests/tags.yml b/modules/nf-core/agat/convertspgff2tsv/tests/tags.yml new file mode 100644 index 00000000..d2ee3721 --- /dev/null +++ b/modules/nf-core/agat/convertspgff2tsv/tests/tags.yml @@ -0,0 +1,2 @@ +agat/convertspgff2tsv: + - "modules/nf-core/agat/convertspgff2tsv/**" diff --git a/modules/nf-core/arriba/arriba/environment.yml b/modules/nf-core/arriba/arriba/environment.yml new file mode 100644 index 00000000..c3a88ffb --- /dev/null +++ b/modules/nf-core/arriba/arriba/environment.yml @@ -0,0 +1,8 @@ +--- +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/environment-schema.json +channels: + - conda-forge + - bioconda + +dependencies: + - bioconda::arriba=2.4.0 diff --git a/modules/nf-core/arriba/arriba/main.nf b/modules/nf-core/arriba/arriba/main.nf new file mode 100644 index 00000000..75dbf93a --- /dev/null +++ b/modules/nf-core/arriba/arriba/main.nf @@ -0,0 +1,65 @@ +process ARRIBA_ARRIBA { + tag "$meta.id" + label 'process_medium' + + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/arriba:2.4.0--h0033a41_2' : + 'biocontainers/arriba:2.4.0--h0033a41_2' }" + + input: + tuple val(meta), path(bam) + tuple val(meta2), path(fasta) + tuple val(meta3), path(gtf) + path(blacklist) + path(known_fusions) + path(cytobands) + path(protein_domains) + + output: + tuple val(meta), path("*.fusions.tsv") , emit: fusions + tuple val(meta), path("*.fusions.discarded.tsv"), emit: fusions_fail + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + def blacklist = blacklist ? "-b $blacklist" : "-f blacklist" + def known_fusions = known_fusions ? "-k $known_fusions" : "" + def cytobands = cytobands ? "-d $cytobands" : "" + def protein_domains = protein_domains ? "-p $protein_domains" : "" + + """ + arriba \\ + -x $bam \\ + -a $fasta \\ + -g $gtf \\ + -o ${prefix}.fusions.tsv \\ + -O ${prefix}.fusions.discarded.tsv \\ + $blacklist \\ + $known_fusions \\ + $cytobands \\ + $protein_domains \\ + $args + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + arriba: \$(arriba -h | grep 'Version:' 2>&1 | sed 's/Version:\s//') + END_VERSIONS + """ + + stub: + def prefix = task.ext.prefix ?: "${meta.id}" + """ + echo stub > ${prefix}.fusions.tsv + echo stub > ${prefix}.fusions.discarded.tsv + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + arriba: \$(arriba -h | grep 'Version:' 2>&1 | sed 's/Version:\s//') + END_VERSIONS + """ +} diff --git a/modules/nf-core/arriba/arriba/meta.yml b/modules/nf-core/arriba/arriba/meta.yml new file mode 100644 index 00000000..91ba2103 --- /dev/null +++ b/modules/nf-core/arriba/arriba/meta.yml @@ -0,0 +1,94 @@ +name: arriba_arriba +description: Arriba is a command-line tool for the detection of gene fusions from + RNA-Seq data. +keywords: + - fusion + - arriba + - detection + - RNA-Seq +tools: + - arriba: + description: Fast and accurate gene fusion detection from RNA-Seq data + homepage: https://github.com/suhrig/arriba + documentation: https://arriba.readthedocs.io/en/latest/ + tool_dev_url: https://github.com/suhrig/arriba + doi: "10.1101/gr.257246.119" + licence: ["MIT"] + identifier: biotools:Arriba +input: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - bam: + type: file + description: BAM/CRAM/SAM file + pattern: "*.{bam,cram,sam}" + - - meta2: + type: map + description: | + Groovy Map containing reference information + e.g. [ id:'test' ] + - fasta: + type: file + description: Assembly FASTA file + pattern: "*.{fasta}" + - - meta3: + type: map + description: | + Groovy Map containing reference information + e.g. [ id:'test' ] + - gtf: + type: file + description: Annotation GTF file + pattern: "*.{gtf}" + - - blacklist: + type: file + description: Blacklist file + pattern: "*.{tsv}" + - - known_fusions: + type: file + description: Known fusions file + pattern: "*.{tsv}" + - - cytobands: + type: file + description: Cytobands file + pattern: "*.{tsv}" + - - protein_domains: + type: file + description: Protein domains file + pattern: "*.{gff3}" +output: + - fusions: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*.fusions.tsv": + type: file + description: File contains fusions which pass all of Arriba's filters. + pattern: "*.{fusions.tsv}" + - fusions_fail: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*.fusions.discarded.tsv": + type: file + description: File contains fusions that Arriba classified as an artifact or + that are also observed in healthy tissue. + pattern: "*.{fusions.discarded.tsv}" + - versions: + - versions.yml: + type: file + description: File containing software versions + pattern: "versions.yml" +authors: + - "@praveenraj2018" + - "@rannick" +maintainers: + - "@praveenraj2018" + - "@rannick" diff --git a/modules/nf-core/arriba/arriba/tests/main.nf.test b/modules/nf-core/arriba/arriba/tests/main.nf.test new file mode 100644 index 00000000..4cff86e5 --- /dev/null +++ b/modules/nf-core/arriba/arriba/tests/main.nf.test @@ -0,0 +1,107 @@ + +nextflow_process { + + name "Test Process ARRIBA_ARRIBA" + script "../main.nf" + process "ARRIBA_ARRIBA" + + tag "modules" + tag "modules_nfcore" + tag "arriba" + tag "arriba/arriba" + tag "arriba/download" + tag "star/genomegenerate" + tag "star/align" + + setup { + config "./nextflow.config" + options "-stub" + run("ARRIBA_DOWNLOAD") { + script "../../../arriba/download/main.nf" + process { + """ + input[0] = 'GRCh38' + """ + } + } + run("STAR_GENOMEGENERATE") { + script "../../../star/genomegenerate/main.nf" + options "-stub" + process { + """ + input[0] = Channel.of([ + [ id:'test_fasta' ], + [ file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.fasta', checkIfExists: true) ] + ]) + input[1] = Channel.of([ + [ id:'test_gtf' ], + [ file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.gtf', checkIfExists: true) ] + ]) + """ + } + } + run("STAR_ALIGN") { + script "../../../star/align/main.nf" + options "-stub" + process { + """ + input[0] = Channel.of([ + [ id:'test', single_end:false ], // meta map + [ + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/fastq/test_rnaseq_1.fastq.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/fastq/test_rnaseq_2.fastq.gz', checkIfExists: true) + ] + ]) + input[1] = STAR_GENOMEGENERATE.out.index + input[2] = Channel.of([ + [ id:'test_gtf' ], + [ file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.gtf', checkIfExists: true) ] + ]) + input[3] = false + input[4] = 'illumina' + input[5] = false + """ + } + } + } + + test("homo_sapiens - paired_end - stub") { + + options "-stub" + + when { + process { + """ + input[0] = Channel.of([ + [ id:'test', single_end:false ], // meta map + [ file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/bam/test.rna.paired_end.sorted.bam', checkIfExists: true) ] + ]) + input[1] = Channel.of([ + [ id:'test_fasta' ], + [ file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.fasta', checkIfExists: true) ] + ]) + input[2] = Channel.of([ + [ id:'test_gtf' ], + [ file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.gtf', checkIfExists: true) ] + ]) + input[3] = [] + + input[4] = [] + + input[5] = [] + + input[6] = [] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + + } + +} diff --git a/modules/nf-core/arriba/arriba/tests/main.nf.test.snap b/modules/nf-core/arriba/arriba/tests/main.nf.test.snap new file mode 100644 index 00000000..14d46f6b --- /dev/null +++ b/modules/nf-core/arriba/arriba/tests/main.nf.test.snap @@ -0,0 +1,55 @@ +{ + "homo_sapiens - paired_end - stub": { + "content": [ + { + "0": [ + [ + { + "id": "test", + "single_end": false + }, + "test.fusions.tsv:md5,f50b84b1db4b83ba62ec1deacc69c260" + ] + ], + "1": [ + [ + { + "id": "test", + "single_end": false + }, + "test.fusions.discarded.tsv:md5,f50b84b1db4b83ba62ec1deacc69c260" + ] + ], + "2": [ + "versions.yml:md5,d323796555db4a58fe4c6bc08d1dec30" + ], + "fusions": [ + [ + { + "id": "test", + "single_end": false + }, + "test.fusions.tsv:md5,f50b84b1db4b83ba62ec1deacc69c260" + ] + ], + "fusions_fail": [ + [ + { + "id": "test", + "single_end": false + }, + "test.fusions.discarded.tsv:md5,f50b84b1db4b83ba62ec1deacc69c260" + ] + ], + "versions": [ + "versions.yml:md5,d323796555db4a58fe4c6bc08d1dec30" + ] + } + ], + "meta": { + "nf-test": "0.9.0", + "nextflow": "24.04.4" + }, + "timestamp": "2024-10-08T15:41:23.945072" + } +} \ No newline at end of file diff --git a/modules/nf-core/arriba/arriba/tests/nextflow.config b/modules/nf-core/arriba/arriba/tests/nextflow.config new file mode 100644 index 00000000..d3d5b00f --- /dev/null +++ b/modules/nf-core/arriba/arriba/tests/nextflow.config @@ -0,0 +1,8 @@ +process { + withName: STAR_GENOMEGENERATE { + ext.args = '--genomeSAindexNbases 11' + } + withName: STAR_ALIGN { + ext.args = '--readFilesCommand zcat --outSAMtype BAM Unsorted --outSAMunmapped Within --outBAMcompression 0 --outFilterMultimapNmax 50 --peOverlapNbasesMin 10 --alignSplicedMateMapLminOverLmate 0.5 --alignSJstitchMismatchNmax 5 -1 5 5 --chimSegmentMin 10 --chimOutType WithinBAM HardClip --chimJunctionOverhangMin 10 --chimScoreDropMax 30 --chimScoreJunctionNonGTAG 0 --chimScoreSeparation 1 --chimSegmentReadGapMax 3 --chimMultimapNmax 50' + } +} diff --git a/modules/nf-core/arriba/download/environment.yml b/modules/nf-core/arriba/download/environment.yml new file mode 100644 index 00000000..d0883a0d --- /dev/null +++ b/modules/nf-core/arriba/download/environment.yml @@ -0,0 +1,5 @@ +channels: + - conda-forge + - bioconda +dependencies: + - bioconda::arriba=2.4.0 diff --git a/modules/nf-core/arriba/download/main.nf b/modules/nf-core/arriba/download/main.nf new file mode 100644 index 00000000..c45120ad --- /dev/null +++ b/modules/nf-core/arriba/download/main.nf @@ -0,0 +1,49 @@ +process ARRIBA_DOWNLOAD { + tag "arriba" + label 'process_single' + + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/arriba:2.4.0--h0033a41_2' : + 'biocontainers/arriba:2.4.0--h0033a41_2' }" + + input: + val(genome) + + output: + path "blacklist*${genome}*.tsv.gz" , emit: blacklist + path "cytobands*${genome}*.tsv" , emit: cytobands + path "protein_domains*${genome}*.gff3" , emit: protein_domains + path "known_fusions*${genome}*.tsv.gz" , emit: known_fusions + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + """ + wget https://github.com/suhrig/arriba/releases/download/v2.4.0/arriba_v2.4.0.tar.gz -O arriba_v2.4.0.tar.gz --no-check-certificate + tar -xzvf arriba_v2.4.0.tar.gz + rm arriba_v2.4.0.tar.gz + mv arriba_v2.4.0/database/* . + rm -r arriba_v2.4.0 + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + arriba_download: \$(arriba -h | grep 'Version:' 2>&1 | sed 's/Version:\s//') + END_VERSIONS + """ + + stub: + """ + touch blacklist_hg38_GRCh38_v2.4.0.tsv.gz + touch protein_domains_hg38_GRCh38_v2.4.0.gff3 + touch cytobands_hg38_GRCh38_v2.4.0.tsv + touch known_fusions_hg38_GRCh38_v2.4.0.tsv.gz + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + arriba_download: \$(arriba -h | grep 'Version:' 2>&1 | sed 's/Version:\s//') + END_VERSIONS + """ +} diff --git a/modules/nf-core/arriba/download/meta.yml b/modules/nf-core/arriba/download/meta.yml new file mode 100644 index 00000000..bdf542eb --- /dev/null +++ b/modules/nf-core/arriba/download/meta.yml @@ -0,0 +1,52 @@ +name: arriba_download +description: Arriba is a command-line tool for the detection of gene fusions from + RNA-Seq data. +keywords: + - fusion + - arriba + - reference +tools: + - arriba: + description: Fast and accurate gene fusion detection from RNA-Seq data + homepage: https://github.com/suhrig/arriba + documentation: https://arriba.readthedocs.io/en/latest/ + tool_dev_url: https://github.com/suhrig/arriba + doi: "10.1101/gr.257246.119" + licence: ["MIT"] + identifier: biotools:Arriba + +input: + - - genome: + type: string + description: hg38, hg19, GRCh38, GRCh37 for humans are accepted +output: + - blacklist: + - blacklist*${genome}*.tsv.gz: + type: string + description: The blacklist removes recurrent alignment artifacts and transcripts + which are present in healthy tissue + pattern: ".tsv.gz" + - cytobands: + - cytobands*${genome}*.tsv: + type: file + description: Coordinates of the Giemsa staining bands. This information is used + to draw ideograms + pattern: ".tsv" + - protein_domains: + - protein_domains*${genome}*.gff3: + type: file + description: Protein domain annotations + patter: "*.gff3" + - known_fusions: + - known_fusions*${genome}*.tsv.gz: + type: file + description: Arriba is more sensitive to those fusions to improve the detection + rate of expected or highly relevant events, such as recurrent fusions + patter: "*.tsv.gz" + - versions: + - versions.yml: + type: file + description: File containing software versions + pattern: "versions.yml" +authors: + - "@peterpru" diff --git a/modules/nf-core/arriba/download/tests/main.nf.test b/modules/nf-core/arriba/download/tests/main.nf.test new file mode 100644 index 00000000..2c32c7a0 --- /dev/null +++ b/modules/nf-core/arriba/download/tests/main.nf.test @@ -0,0 +1,50 @@ + +nextflow_process { + + name "Test Process ARRIBA_DOWNLOAD" + script "../main.nf" + process "ARRIBA_DOWNLOAD" + + tag "modules" + tag "modules_nfcore" + tag "arriba" + tag "arriba/download" + + test("test-arriba-download") { + when { + process { + """ + input[0] = 'GRCh38' + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + + } + + test("download - stub") { + + options "-stub" + + when { + process { + """ + input[0] = 'GRCh38' + """ + } + } + + then { + assertAll( + { assert process.success } + ) + } + } + +} diff --git a/modules/nf-core/arriba/download/tests/main.nf.test.snap b/modules/nf-core/arriba/download/tests/main.nf.test.snap new file mode 100644 index 00000000..fe9b18b9 --- /dev/null +++ b/modules/nf-core/arriba/download/tests/main.nf.test.snap @@ -0,0 +1,43 @@ +{ + "download": { + "content": [ + { + "0": [ + "blacklist_hg38_GRCh38_v2.4.0.tsv.gz:md5,e3098a4be51aece78aede64b55c39318" + ], + "1": [ + "cytobands_hg38_GRCh38_v2.4.0.tsv:md5,7bd504feefb33fcfc9be0517439a423c" + ], + "2": [ + "protein_domains_hg38_GRCh38_v2.4.0.gff3:md5,43c387a784ebeed71b4147076cebf978" + ], + "3": [ + "known_fusions_hg38_GRCh38_v2.4.0.tsv.gz:md5,4f00f81ccb5f4db283f1a22b8b0da67c" + ], + "4": [ + "versions.yml:md5,98c69df5eaea5caf0b4af7b8d7af4893" + ], + "blacklist": [ + "blacklist_hg38_GRCh38_v2.4.0.tsv.gz:md5,e3098a4be51aece78aede64b55c39318" + ], + "cytobands": [ + "cytobands_hg38_GRCh38_v2.4.0.tsv:md5,7bd504feefb33fcfc9be0517439a423c" + ], + "known_fusions": [ + "known_fusions_hg38_GRCh38_v2.4.0.tsv.gz:md5,4f00f81ccb5f4db283f1a22b8b0da67c" + ], + "protein_domains": [ + "protein_domains_hg38_GRCh38_v2.4.0.gff3:md5,43c387a784ebeed71b4147076cebf978" + ], + "versions": [ + "versions.yml:md5,98c69df5eaea5caf0b4af7b8d7af4893" + ] + } + ], + "meta": { + "nf-test": "0.9.0", + "nextflow": "24.04.4" + }, + "timestamp": "2024-10-08T11:12:17.010496" + } +} \ No newline at end of file diff --git a/modules/nf-core/arriba/download/tests/tags.yml b/modules/nf-core/arriba/download/tests/tags.yml new file mode 100644 index 00000000..f510bbf1 --- /dev/null +++ b/modules/nf-core/arriba/download/tests/tags.yml @@ -0,0 +1,2 @@ +arriba/download: + - "modules/nf-core/arriba/download/**" diff --git a/modules/nf-core/cat/cat/environment.yml b/modules/nf-core/cat/cat/environment.yml new file mode 100644 index 00000000..9b01c865 --- /dev/null +++ b/modules/nf-core/cat/cat/environment.yml @@ -0,0 +1,5 @@ +channels: + - conda-forge + - bioconda +dependencies: + - conda-forge::pigz=2.3.4 diff --git a/modules/nf-core/cat/cat/main.nf b/modules/nf-core/cat/cat/main.nf new file mode 100644 index 00000000..2862c64c --- /dev/null +++ b/modules/nf-core/cat/cat/main.nf @@ -0,0 +1,78 @@ +process CAT_CAT { + tag "$meta.id" + label 'process_low' + + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/pigz:2.3.4' : + 'biocontainers/pigz:2.3.4' }" + + input: + tuple val(meta), path(files_in) + + output: + tuple val(meta), path("${prefix}"), emit: file_out + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def args2 = task.ext.args2 ?: '' + def file_list = files_in.collect { it.toString() } + + // choose appropriate concatenation tool depending on input and output format + + // | input | output | command1 | command2 | + // |-----------|------------|----------|----------| + // | gzipped | gzipped | cat | | + // | ungzipped | ungzipped | cat | | + // | gzipped | ungzipped | zcat | | + // | ungzipped | gzipped | cat | pigz | + + // Use input file ending as default + prefix = task.ext.prefix ?: "${meta.id}${getFileSuffix(file_list[0])}" + out_zip = prefix.endsWith('.gz') + in_zip = file_list[0].endsWith('.gz') + command1 = (in_zip && !out_zip) ? 'zcat' : 'cat' + command2 = (!in_zip && out_zip) ? "| pigz -c -p $task.cpus $args2" : '' + if(file_list.contains(prefix.trim())) { + error "The name of the input file can't be the same as for the output prefix in the " + + "module CAT_CAT (currently `$prefix`). Please choose a different one." + } + """ + $command1 \\ + $args \\ + ${file_list.join(' ')} \\ + $command2 \\ + > ${prefix} + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + pigz: \$( pigz --version 2>&1 | sed 's/pigz //g' ) + END_VERSIONS + """ + + stub: + def file_list = files_in.collect { it.toString() } + prefix = task.ext.prefix ?: "${meta.id}${file_list[0].substring(file_list[0].lastIndexOf('.'))}" + if(file_list.contains(prefix.trim())) { + error "The name of the input file can't be the same as for the output prefix in the " + + "module CAT_CAT (currently `$prefix`). Please choose a different one." + } + """ + touch $prefix + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + pigz: \$( pigz --version 2>&1 | sed 's/pigz //g' ) + END_VERSIONS + """ +} + +// for .gz files also include the second to last extension if it is present. E.g., .fasta.gz +def getFileSuffix(filename) { + def match = filename =~ /^.*?((\.\w{1,5})?(\.\w{1,5}\.gz$))/ + return match ? match[0][1] : filename.substring(filename.lastIndexOf('.')) +} diff --git a/modules/nf-core/cat/cat/meta.yml b/modules/nf-core/cat/cat/meta.yml new file mode 100644 index 00000000..81778a06 --- /dev/null +++ b/modules/nf-core/cat/cat/meta.yml @@ -0,0 +1,43 @@ +name: cat_cat +description: A module for concatenation of gzipped or uncompressed files +keywords: + - concatenate + - gzip + - cat +tools: + - cat: + description: Just concatenation + documentation: https://man7.org/linux/man-pages/man1/cat.1.html + licence: ["GPL-3.0-or-later"] + identifier: "" +input: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - files_in: + type: file + description: List of compressed / uncompressed files + pattern: "*" +output: + - file_out: + - meta: + type: file + description: Concatenated file. Will be gzipped if file_out ends with ".gz" + pattern: "${file_out}" + - ${prefix}: + type: file + description: Concatenated file. Will be gzipped if file_out ends with ".gz" + pattern: "${file_out}" + - versions: + - versions.yml: + type: file + description: File containing software versions + pattern: "versions.yml" +authors: + - "@erikrikarddaniel" + - "@FriederikeHanssen" +maintainers: + - "@erikrikarddaniel" + - "@FriederikeHanssen" diff --git a/modules/nf-core/cat/cat/tests/main.nf.test b/modules/nf-core/cat/cat/tests/main.nf.test new file mode 100644 index 00000000..9cb16178 --- /dev/null +++ b/modules/nf-core/cat/cat/tests/main.nf.test @@ -0,0 +1,191 @@ +nextflow_process { + + name "Test Process CAT_CAT" + script "../main.nf" + process "CAT_CAT" + tag "modules" + tag "modules_nfcore" + tag "cat" + tag "cat/cat" + + test("test_cat_name_conflict") { + when { + params { + outdir = "${outputDir}" + } + process { + """ + input[0] = + [ + [ id:'genome', single_end:true ], + [ + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.sizes', checkIfExists: true) + ] + ] + """ + } + } + then { + assertAll( + { assert !process.success }, + { assert process.stdout.toString().contains("The name of the input file can't be the same as for the output prefix") }, + { assert snapshot(process.out.versions).match() } + ) + } + } + + test("test_cat_unzipped_unzipped") { + when { + params { + outdir = "${outputDir}" + } + process { + """ + input[0] = + [ + [ id:'test', single_end:true ], + [ + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.sizes', checkIfExists: true) + ] + ] + """ + } + } + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + } + + + test("test_cat_zipped_zipped") { + when { + params { + outdir = "${outputDir}" + } + process { + """ + input[0] = + [ + [ id:'test', single_end:true ], + [ + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.gff3.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/alignment/last/contigs.genome.maf.gz', checkIfExists: true) + ] + ] + """ + } + } + then { + def lines = path(process.out.file_out.get(0).get(1)).linesGzip + assertAll( + { assert process.success }, + { assert snapshot( + lines[0..5], + lines.size(), + process.out.versions + ).match() + } + ) + } + } + + test("test_cat_zipped_unzipped") { + config './nextflow_zipped_unzipped.config' + + when { + params { + outdir = "${outputDir}" + } + process { + """ + input[0] = + [ + [ id:'test', single_end:true ], + [ + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.gff3.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/alignment/last/contigs.genome.maf.gz', checkIfExists: true) + ] + ] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + + } + + test("test_cat_unzipped_zipped") { + config './nextflow_unzipped_zipped.config' + when { + params { + outdir = "${outputDir}" + } + process { + """ + input[0] = + [ + [ id:'test', single_end:true ], + [ + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.sizes', checkIfExists: true) + ] + ] + """ + } + } + then { + def lines = path(process.out.file_out.get(0).get(1)).linesGzip + assertAll( + { assert process.success }, + { assert snapshot( + lines[0..5], + lines.size(), + process.out.versions + ).match() + } + ) + } + } + + test("test_cat_one_file_unzipped_zipped") { + config './nextflow_unzipped_zipped.config' + when { + params { + outdir = "${outputDir}" + } + process { + """ + input[0] = + [ + [ id:'test', single_end:true ], + [ + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true) + ] + ] + """ + } + } + then { + def lines = path(process.out.file_out.get(0).get(1)).linesGzip + assertAll( + { assert process.success }, + { assert snapshot( + lines[0..5], + lines.size(), + process.out.versions + ).match() + } + ) + } + } +} diff --git a/modules/nf-core/cat/cat/tests/main.nf.test.snap b/modules/nf-core/cat/cat/tests/main.nf.test.snap new file mode 100644 index 00000000..b7623ee6 --- /dev/null +++ b/modules/nf-core/cat/cat/tests/main.nf.test.snap @@ -0,0 +1,147 @@ +{ + "test_cat_unzipped_unzipped": { + "content": [ + { + "0": [ + [ + { + "id": "test", + "single_end": true + }, + "test.fasta:md5,f44b33a0e441ad58b2d3700270e2dbe2" + ] + ], + "1": [ + "versions.yml:md5,115ed6177ebcff24eb99d503fa5ef894" + ], + "file_out": [ + [ + { + "id": "test", + "single_end": true + }, + "test.fasta:md5,f44b33a0e441ad58b2d3700270e2dbe2" + ] + ], + "versions": [ + "versions.yml:md5,115ed6177ebcff24eb99d503fa5ef894" + ] + } + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.3" + }, + "timestamp": "2023-10-16T14:32:18.500464399" + }, + "test_cat_zipped_unzipped": { + "content": [ + { + "0": [ + [ + { + "id": "test", + "single_end": true + }, + "cat.txt:md5,c439d3b60e7bc03e8802a451a0d9a5d9" + ] + ], + "1": [ + "versions.yml:md5,115ed6177ebcff24eb99d503fa5ef894" + ], + "file_out": [ + [ + { + "id": "test", + "single_end": true + }, + "cat.txt:md5,c439d3b60e7bc03e8802a451a0d9a5d9" + ] + ], + "versions": [ + "versions.yml:md5,115ed6177ebcff24eb99d503fa5ef894" + ] + } + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.3" + }, + "timestamp": "2023-10-16T14:32:49.642741302" + }, + "test_cat_zipped_zipped": { + "content": [ + [ + "MT192765.1\tGenbank\ttranscript\t259\t29667\t.\t+\t.\tID=unknown_transcript_1;geneID=orf1ab;gene_name=orf1ab", + "MT192765.1\tGenbank\tgene\t259\t21548\t.\t+\t.\tParent=unknown_transcript_1", + "MT192765.1\tGenbank\tCDS\t259\t13461\t.\t+\t0\tParent=unknown_transcript_1;exception=\"ribosomal slippage\";gbkey=CDS;gene=orf1ab;note=\"pp1ab;translated=by -1 ribosomal frameshift\";product=\"orf1ab polyprotein\";protein_id=QIK50426.1", + "MT192765.1\tGenbank\tCDS\t13461\t21548\t.\t+\t0\tParent=unknown_transcript_1;exception=\"ribosomal slippage\";gbkey=CDS;gene=orf1ab;note=\"pp1ab;translated=by -1 ribosomal frameshift\";product=\"orf1ab polyprotein\";protein_id=QIK50426.1", + "MT192765.1\tGenbank\tCDS\t21556\t25377\t.\t+\t0\tParent=unknown_transcript_1;gbkey=CDS;gene=S;note=\"structural protein\";product=\"surface glycoprotein\";protein_id=QIK50427.1", + "MT192765.1\tGenbank\tgene\t21556\t25377\t.\t+\t.\tParent=unknown_transcript_1" + ], + 78, + [ + "versions.yml:md5,115ed6177ebcff24eb99d503fa5ef894" + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.3" + }, + "timestamp": "2024-07-22T11:51:46.802978" + }, + "test_cat_name_conflict": { + "content": [ + [ + + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.3" + }, + "timestamp": "2024-07-22T11:51:29.45394" + }, + "test_cat_one_file_unzipped_zipped": { + "content": [ + [ + ">MT192765.1 Severe acute respiratory syndrome coronavirus 2 isolate SARS-CoV-2/human/USA/PC00101P/2020, complete genome", + "GTTTATACCTTCCCAGGTAACAAACCAACCAACTTTCGATCTCTTGTAGATCTGTTCTCTAAACGAACTTTAAAATCTGT", + "GTGGCTGTCACTCGGCTGCATGCTTAGTGCACTCACGCAGTATAATTAATAACTAATTACTGTCGTTGACAGGACACGAG", + "TAACTCGTCTATCTTCTGCAGGCTGCTTACGGTTTCGTCCGTGTTGCAGCCGATCATCAGCACATCTAGGTTTTGTCCGG", + "GTGTGACCGAAAGGTAAGATGGAGAGCCTTGTCCCTGGTTTCAACGAGAAAACACACGTCCAACTCAGTTTGCCTGTTTT", + "ACAGGTTCGCGACGTGCTCGTACGTGGCTTTGGAGACTCCGTGGAGGAGGTCTTATCAGAGGCACGTCAACATCTTAAAG" + ], + 374, + [ + "versions.yml:md5,115ed6177ebcff24eb99d503fa5ef894" + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.3" + }, + "timestamp": "2024-07-22T11:52:02.774016" + }, + "test_cat_unzipped_zipped": { + "content": [ + [ + ">MT192765.1 Severe acute respiratory syndrome coronavirus 2 isolate SARS-CoV-2/human/USA/PC00101P/2020, complete genome", + "GTTTATACCTTCCCAGGTAACAAACCAACCAACTTTCGATCTCTTGTAGATCTGTTCTCTAAACGAACTTTAAAATCTGT", + "GTGGCTGTCACTCGGCTGCATGCTTAGTGCACTCACGCAGTATAATTAATAACTAATTACTGTCGTTGACAGGACACGAG", + "TAACTCGTCTATCTTCTGCAGGCTGCTTACGGTTTCGTCCGTGTTGCAGCCGATCATCAGCACATCTAGGTTTTGTCCGG", + "GTGTGACCGAAAGGTAAGATGGAGAGCCTTGTCCCTGGTTTCAACGAGAAAACACACGTCCAACTCAGTTTGCCTGTTTT", + "ACAGGTTCGCGACGTGCTCGTACGTGGCTTTGGAGACTCCGTGGAGGAGGTCTTATCAGAGGCACGTCAACATCTTAAAG" + ], + 375, + [ + "versions.yml:md5,115ed6177ebcff24eb99d503fa5ef894" + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.3" + }, + "timestamp": "2024-07-22T11:51:57.581523" + } +} \ No newline at end of file diff --git a/modules/nf-core/cat/cat/tests/nextflow_unzipped_zipped.config b/modules/nf-core/cat/cat/tests/nextflow_unzipped_zipped.config new file mode 100644 index 00000000..ec26b0fd --- /dev/null +++ b/modules/nf-core/cat/cat/tests/nextflow_unzipped_zipped.config @@ -0,0 +1,6 @@ + +process { + withName: CAT_CAT { + ext.prefix = 'cat.txt.gz' + } +} diff --git a/modules/nf-core/cat/cat/tests/nextflow_zipped_unzipped.config b/modules/nf-core/cat/cat/tests/nextflow_zipped_unzipped.config new file mode 100644 index 00000000..fbc79783 --- /dev/null +++ b/modules/nf-core/cat/cat/tests/nextflow_zipped_unzipped.config @@ -0,0 +1,8 @@ + +process { + + withName: CAT_CAT { + ext.prefix = 'cat.txt' + } + +} diff --git a/modules/nf-core/cat/cat/tests/tags.yml b/modules/nf-core/cat/cat/tests/tags.yml new file mode 100644 index 00000000..37b578f5 --- /dev/null +++ b/modules/nf-core/cat/cat/tests/tags.yml @@ -0,0 +1,2 @@ +cat/cat: + - modules/nf-core/cat/cat/** diff --git a/modules/nf-core/cat/fastq/environment.yml b/modules/nf-core/cat/fastq/environment.yml new file mode 100644 index 00000000..71e04c3d --- /dev/null +++ b/modules/nf-core/cat/fastq/environment.yml @@ -0,0 +1,5 @@ +channels: + - conda-forge + - bioconda +dependencies: + - conda-forge::coreutils=9.5 diff --git a/modules/nf-core/cat/fastq/main.nf b/modules/nf-core/cat/fastq/main.nf new file mode 100644 index 00000000..4364a389 --- /dev/null +++ b/modules/nf-core/cat/fastq/main.nf @@ -0,0 +1,79 @@ +process CAT_FASTQ { + tag "$meta.id" + label 'process_single' + + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://community-cr-prod.seqera.io/docker/registry/v2/blobs/sha256/c2/c262fc09eca59edb5a724080eeceb00fb06396f510aefb229c2d2c6897e63975/data' : + 'community.wave.seqera.io/library/coreutils:9.5--ae99c88a9b28c264' }" + + input: + tuple val(meta), path(reads, stageAs: "input*/*") + + output: + tuple val(meta), path("*.merged.fastq.gz"), emit: reads + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + def readList = reads instanceof List ? reads.collect{ it.toString() } : [reads.toString()] + if (meta.single_end) { + if (readList.size >= 1) { + """ + cat ${readList.join(' ')} > ${prefix}.merged.fastq.gz + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + cat: \$(echo \$(cat --version 2>&1) | sed 's/^.*coreutils) //; s/ .*\$//') + END_VERSIONS + """ + } + } else { + if (readList.size >= 2) { + def read1 = [] + def read2 = [] + readList.eachWithIndex{ v, ix -> ( ix & 1 ? read2 : read1 ) << v } + """ + cat ${read1.join(' ')} > ${prefix}_1.merged.fastq.gz + cat ${read2.join(' ')} > ${prefix}_2.merged.fastq.gz + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + cat: \$(echo \$(cat --version 2>&1) | sed 's/^.*coreutils) //; s/ .*\$//') + END_VERSIONS + """ + } + } + + stub: + def prefix = task.ext.prefix ?: "${meta.id}" + def readList = reads instanceof List ? reads.collect{ it.toString() } : [reads.toString()] + if (meta.single_end) { + if (readList.size >= 1) { + """ + echo '' | gzip > ${prefix}.merged.fastq.gz + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + cat: \$(echo \$(cat --version 2>&1) | sed 's/^.*coreutils) //; s/ .*\$//') + END_VERSIONS + """ + } + } else { + if (readList.size >= 2) { + """ + echo '' | gzip > ${prefix}_1.merged.fastq.gz + echo '' | gzip > ${prefix}_2.merged.fastq.gz + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + cat: \$(echo \$(cat --version 2>&1) | sed 's/^.*coreutils) //; s/ .*\$//') + END_VERSIONS + """ + } + } +} diff --git a/modules/nf-core/cat/fastq/meta.yml b/modules/nf-core/cat/fastq/meta.yml new file mode 100644 index 00000000..91ff2fb5 --- /dev/null +++ b/modules/nf-core/cat/fastq/meta.yml @@ -0,0 +1,45 @@ +name: cat_fastq +description: Concatenates fastq files +keywords: + - cat + - fastq + - concatenate +tools: + - cat: + description: | + The cat utility reads files sequentially, writing them to the standard output. + documentation: https://www.gnu.org/software/coreutils/manual/html_node/cat-invocation.html + licence: ["GPL-3.0-or-later"] + identifier: "" +input: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - reads: + type: file + description: | + List of input FastQ files to be concatenated. +output: + - reads: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*.merged.fastq.gz": + type: file + description: Merged fastq file + pattern: "*.{merged.fastq.gz}" + - versions: + - versions.yml: + type: file + description: File containing software versions + pattern: "versions.yml" +authors: + - "@joseespinosa" + - "@drpatelh" +maintainers: + - "@joseespinosa" + - "@drpatelh" diff --git a/modules/nf-core/cat/fastq/tests/main.nf.test b/modules/nf-core/cat/fastq/tests/main.nf.test new file mode 100644 index 00000000..f88a78b6 --- /dev/null +++ b/modules/nf-core/cat/fastq/tests/main.nf.test @@ -0,0 +1,248 @@ +// NOTE The version snaps may not be consistant +// https://github.com/nf-core/modules/pull/4087#issuecomment-1767948035 +nextflow_process { + + name "Test Process CAT_FASTQ" + script "../main.nf" + process "CAT_FASTQ" + tag "modules" + tag "modules_nfcore" + tag "cat" + tag "cat/fastq" + + test("test_cat_fastq_single_end") { + + when { + process { + """ + input[0] = Channel.of([ + [ id:'test', single_end:true ], // meta map + [ file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_2.fastq.gz', checkIfExists: true)] + ]) + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + } + + test("test_cat_fastq_paired_end") { + + when { + process { + """ + input[0] = Channel.of([ + [ id:'test', single_end:false ], // meta map + [ file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_2.fastq.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test2_1.fastq.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test2_2.fastq.gz', checkIfExists: true)] + ]) + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + } + + test("test_cat_fastq_single_end_same_name") { + + when { + process { + """ + input[0] = Channel.of([ + [ id:'test', single_end:true ], // meta map + [ file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true)] + ]) + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + } + + test("test_cat_fastq_paired_end_same_name") { + + when { + process { + """ + input[0] = Channel.of([ + [ id:'test', single_end:false ], // meta map + [ file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_2.fastq.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_2.fastq.gz', checkIfExists: true)] + ]) + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + } + + test("test_cat_fastq_single_end_single_file") { + + when { + process { + """ + input[0] = Channel.of([ + [ id:'test', single_end:true ], // meta map + [ file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true)] + ]) + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + } + + test("test_cat_fastq_single_end - stub") { + + options "-stub" + + when { + process { + """ + input[0] = Channel.of([ + [ id:'test', single_end:true ], // meta map + [ file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_2.fastq.gz', checkIfExists: true)] + ]) + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + } + + test("test_cat_fastq_paired_end - stub") { + + options "-stub" + + when { + process { + """ + input[0] = Channel.of([ + [ id:'test', single_end:false ], // meta map + [ file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_2.fastq.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test2_1.fastq.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test2_2.fastq.gz', checkIfExists: true)] + ]) + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + } + + test("test_cat_fastq_single_end_same_name - stub") { + + options "-stub" + + when { + process { + """ + input[0] = Channel.of([ + [ id:'test', single_end:true ], // meta map + [ file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true)] + ]) + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + } + + test("test_cat_fastq_paired_end_same_name - stub") { + + options "-stub" + + when { + process { + """ + input[0] = Channel.of([ + [ id:'test', single_end:false ], // meta map + [ file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_2.fastq.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_2.fastq.gz', checkIfExists: true)] + ]) + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + } + + test("test_cat_fastq_single_end_single_file - stub") { + + options "-stub" + + when { + process { + """ + input[0] = Channel.of([ + [ id:'test', single_end:true ], // meta map + [ file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true)] + ]) + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + } +} diff --git a/modules/nf-core/cat/fastq/tests/main.nf.test.snap b/modules/nf-core/cat/fastq/tests/main.nf.test.snap new file mode 100644 index 00000000..f8689a1c --- /dev/null +++ b/modules/nf-core/cat/fastq/tests/main.nf.test.snap @@ -0,0 +1,376 @@ +{ + "test_cat_fastq_single_end": { + "content": [ + { + "0": [ + [ + { + "id": "test", + "single_end": true + }, + "test.merged.fastq.gz:md5,ee314a9bd568d06617171b0c85f508da" + ] + ], + "1": [ + "versions.yml:md5,6ef4fd28546a005865b9454bbedbf81a" + ], + "reads": [ + [ + { + "id": "test", + "single_end": true + }, + "test.merged.fastq.gz:md5,ee314a9bd568d06617171b0c85f508da" + ] + ], + "versions": [ + "versions.yml:md5,6ef4fd28546a005865b9454bbedbf81a" + ] + } + ], + "meta": { + "nf-test": "0.9.0", + "nextflow": "24.04.4" + }, + "timestamp": "2024-10-19T20:02:07.519211144" + }, + "test_cat_fastq_single_end_same_name": { + "content": [ + { + "0": [ + [ + { + "id": "test", + "single_end": true + }, + "test.merged.fastq.gz:md5,3ad9406595fafec8172368f9cd0b6a22" + ] + ], + "1": [ + "versions.yml:md5,6ef4fd28546a005865b9454bbedbf81a" + ], + "reads": [ + [ + { + "id": "test", + "single_end": true + }, + "test.merged.fastq.gz:md5,3ad9406595fafec8172368f9cd0b6a22" + ] + ], + "versions": [ + "versions.yml:md5,6ef4fd28546a005865b9454bbedbf81a" + ] + } + ], + "meta": { + "nf-test": "0.9.0", + "nextflow": "24.04.4" + }, + "timestamp": "2024-10-19T20:02:31.618628921" + }, + "test_cat_fastq_single_end_single_file": { + "content": [ + { + "0": [ + [ + { + "id": "test", + "single_end": true + }, + "test.merged.fastq.gz:md5,4161df271f9bfcd25d5845a1e220dbec" + ] + ], + "1": [ + "versions.yml:md5,6ef4fd28546a005865b9454bbedbf81a" + ], + "reads": [ + [ + { + "id": "test", + "single_end": true + }, + "test.merged.fastq.gz:md5,4161df271f9bfcd25d5845a1e220dbec" + ] + ], + "versions": [ + "versions.yml:md5,6ef4fd28546a005865b9454bbedbf81a" + ] + } + ], + "meta": { + "nf-test": "0.9.0", + "nextflow": "24.04.4" + }, + "timestamp": "2024-10-19T20:02:57.904149581" + }, + "test_cat_fastq_paired_end_same_name": { + "content": [ + { + "0": [ + [ + { + "id": "test", + "single_end": false + }, + [ + "test_1.merged.fastq.gz:md5,3ad9406595fafec8172368f9cd0b6a22", + "test_2.merged.fastq.gz:md5,a52cab0b840c7178b0ea83df1fdbe8d5" + ] + ] + ], + "1": [ + "versions.yml:md5,6ef4fd28546a005865b9454bbedbf81a" + ], + "reads": [ + [ + { + "id": "test", + "single_end": false + }, + [ + "test_1.merged.fastq.gz:md5,3ad9406595fafec8172368f9cd0b6a22", + "test_2.merged.fastq.gz:md5,a52cab0b840c7178b0ea83df1fdbe8d5" + ] + ] + ], + "versions": [ + "versions.yml:md5,6ef4fd28546a005865b9454bbedbf81a" + ] + } + ], + "meta": { + "nf-test": "0.9.0", + "nextflow": "24.04.4" + }, + "timestamp": "2024-10-19T20:02:44.577183829" + }, + "test_cat_fastq_single_end - stub": { + "content": [ + { + "0": [ + [ + { + "id": "test", + "single_end": true + }, + "test.merged.fastq.gz:md5,68b329da9893e34099c7d8ad5cb9c940" + ] + ], + "1": [ + "versions.yml:md5,6ef4fd28546a005865b9454bbedbf81a" + ], + "reads": [ + [ + { + "id": "test", + "single_end": true + }, + "test.merged.fastq.gz:md5,68b329da9893e34099c7d8ad5cb9c940" + ] + ], + "versions": [ + "versions.yml:md5,6ef4fd28546a005865b9454bbedbf81a" + ] + } + ], + "meta": { + "nf-test": "0.9.0", + "nextflow": "24.04.4" + }, + "timestamp": "2024-10-19T20:03:10.603734777" + }, + "test_cat_fastq_paired_end_same_name - stub": { + "content": [ + { + "0": [ + [ + { + "id": "test", + "single_end": false + }, + [ + "test_1.merged.fastq.gz:md5,68b329da9893e34099c7d8ad5cb9c940", + "test_2.merged.fastq.gz:md5,68b329da9893e34099c7d8ad5cb9c940" + ] + ] + ], + "1": [ + "versions.yml:md5,6ef4fd28546a005865b9454bbedbf81a" + ], + "reads": [ + [ + { + "id": "test", + "single_end": false + }, + [ + "test_1.merged.fastq.gz:md5,68b329da9893e34099c7d8ad5cb9c940", + "test_2.merged.fastq.gz:md5,68b329da9893e34099c7d8ad5cb9c940" + ] + ] + ], + "versions": [ + "versions.yml:md5,6ef4fd28546a005865b9454bbedbf81a" + ] + } + ], + "meta": { + "nf-test": "0.9.0", + "nextflow": "24.04.4" + }, + "timestamp": "2024-10-19T20:03:46.041808828" + }, + "test_cat_fastq_single_end_same_name - stub": { + "content": [ + { + "0": [ + [ + { + "id": "test", + "single_end": true + }, + "test.merged.fastq.gz:md5,68b329da9893e34099c7d8ad5cb9c940" + ] + ], + "1": [ + "versions.yml:md5,6ef4fd28546a005865b9454bbedbf81a" + ], + "reads": [ + [ + { + "id": "test", + "single_end": true + }, + "test.merged.fastq.gz:md5,68b329da9893e34099c7d8ad5cb9c940" + ] + ], + "versions": [ + "versions.yml:md5,6ef4fd28546a005865b9454bbedbf81a" + ] + } + ], + "meta": { + "nf-test": "0.9.0", + "nextflow": "24.04.4" + }, + "timestamp": "2024-10-19T20:03:34.13865402" + }, + "test_cat_fastq_paired_end": { + "content": [ + { + "0": [ + [ + { + "id": "test", + "single_end": false + }, + [ + "test_1.merged.fastq.gz:md5,3ad9406595fafec8172368f9cd0b6a22", + "test_2.merged.fastq.gz:md5,a52cab0b840c7178b0ea83df1fdbe8d5" + ] + ] + ], + "1": [ + "versions.yml:md5,6ef4fd28546a005865b9454bbedbf81a" + ], + "reads": [ + [ + { + "id": "test", + "single_end": false + }, + [ + "test_1.merged.fastq.gz:md5,3ad9406595fafec8172368f9cd0b6a22", + "test_2.merged.fastq.gz:md5,a52cab0b840c7178b0ea83df1fdbe8d5" + ] + ] + ], + "versions": [ + "versions.yml:md5,6ef4fd28546a005865b9454bbedbf81a" + ] + } + ], + "meta": { + "nf-test": "0.9.0", + "nextflow": "24.04.4" + }, + "timestamp": "2024-10-19T20:02:19.64383573" + }, + "test_cat_fastq_paired_end - stub": { + "content": [ + { + "0": [ + [ + { + "id": "test", + "single_end": false + }, + [ + "test_1.merged.fastq.gz:md5,68b329da9893e34099c7d8ad5cb9c940", + "test_2.merged.fastq.gz:md5,68b329da9893e34099c7d8ad5cb9c940" + ] + ] + ], + "1": [ + "versions.yml:md5,6ef4fd28546a005865b9454bbedbf81a" + ], + "reads": [ + [ + { + "id": "test", + "single_end": false + }, + [ + "test_1.merged.fastq.gz:md5,68b329da9893e34099c7d8ad5cb9c940", + "test_2.merged.fastq.gz:md5,68b329da9893e34099c7d8ad5cb9c940" + ] + ] + ], + "versions": [ + "versions.yml:md5,6ef4fd28546a005865b9454bbedbf81a" + ] + } + ], + "meta": { + "nf-test": "0.9.0", + "nextflow": "24.04.4" + }, + "timestamp": "2024-10-19T20:03:22.597246066" + }, + "test_cat_fastq_single_end_single_file - stub": { + "content": [ + { + "0": [ + [ + { + "id": "test", + "single_end": true + }, + "test.merged.fastq.gz:md5,68b329da9893e34099c7d8ad5cb9c940" + ] + ], + "1": [ + "versions.yml:md5,6ef4fd28546a005865b9454bbedbf81a" + ], + "reads": [ + [ + { + "id": "test", + "single_end": true + }, + "test.merged.fastq.gz:md5,68b329da9893e34099c7d8ad5cb9c940" + ] + ], + "versions": [ + "versions.yml:md5,6ef4fd28546a005865b9454bbedbf81a" + ] + } + ], + "meta": { + "nf-test": "0.9.0", + "nextflow": "24.04.4" + }, + "timestamp": "2024-10-19T20:03:58.44849001" + } +} \ No newline at end of file diff --git a/modules/nf-core/cat/fastq/tests/tags.yml b/modules/nf-core/cat/fastq/tests/tags.yml new file mode 100644 index 00000000..6ac43614 --- /dev/null +++ b/modules/nf-core/cat/fastq/tests/tags.yml @@ -0,0 +1,2 @@ +cat/fastq: + - modules/nf-core/cat/fastq/** diff --git a/modules/nf-core/fastp/environment.yml b/modules/nf-core/fastp/environment.yml new file mode 100644 index 00000000..26d4aca5 --- /dev/null +++ b/modules/nf-core/fastp/environment.yml @@ -0,0 +1,5 @@ +channels: + - conda-forge + - bioconda +dependencies: + - bioconda::fastp=0.23.4 diff --git a/modules/nf-core/fastp/main.nf b/modules/nf-core/fastp/main.nf new file mode 100644 index 00000000..e1b9f565 --- /dev/null +++ b/modules/nf-core/fastp/main.nf @@ -0,0 +1,125 @@ +process FASTP { + tag "$meta.id" + label 'process_medium' + + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/fastp:0.23.4--h5f740d0_0' : + 'biocontainers/fastp:0.23.4--h5f740d0_0' }" + + input: + tuple val(meta), path(reads) + path adapter_fasta + val discard_trimmed_pass + val save_trimmed_fail + val save_merged + + output: + tuple val(meta), path('*.fastp.fastq.gz') , optional:true, emit: reads + tuple val(meta), path('*.json') , emit: json + tuple val(meta), path('*.html') , emit: html + tuple val(meta), path('*.log') , emit: log + tuple val(meta), path('*.fail.fastq.gz') , optional:true, emit: reads_fail + tuple val(meta), path('*.merged.fastq.gz'), optional:true, emit: reads_merged + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + def adapter_list = adapter_fasta ? "--adapter_fasta ${adapter_fasta}" : "" + def fail_fastq = save_trimmed_fail && meta.single_end ? "--failed_out ${prefix}.fail.fastq.gz" : save_trimmed_fail && !meta.single_end ? "--failed_out ${prefix}.paired.fail.fastq.gz --unpaired1 ${prefix}_1.fail.fastq.gz --unpaired2 ${prefix}_2.fail.fastq.gz" : '' + def out_fq1 = discard_trimmed_pass ?: ( meta.single_end ? "--out1 ${prefix}.fastp.fastq.gz" : "--out1 ${prefix}_1.fastp.fastq.gz" ) + def out_fq2 = discard_trimmed_pass ?: "--out2 ${prefix}_2.fastp.fastq.gz" + // Added soft-links to original fastqs for consistent naming in MultiQC + // Use single ended for interleaved. Add --interleaved_in in config. + if ( task.ext.args?.contains('--interleaved_in') ) { + """ + [ ! -f ${prefix}.fastq.gz ] && ln -sf $reads ${prefix}.fastq.gz + + fastp \\ + --stdout \\ + --in1 ${prefix}.fastq.gz \\ + --thread $task.cpus \\ + --json ${prefix}.fastp.json \\ + --html ${prefix}.fastp.html \\ + $adapter_list \\ + $fail_fastq \\ + $args \\ + 2> >(tee ${prefix}.fastp.log >&2) \\ + | gzip -c > ${prefix}.fastp.fastq.gz + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + fastp: \$(fastp --version 2>&1 | sed -e "s/fastp //g") + END_VERSIONS + """ + } else if (meta.single_end) { + """ + [ ! -f ${prefix}.fastq.gz ] && ln -sf $reads ${prefix}.fastq.gz + + fastp \\ + --in1 ${prefix}.fastq.gz \\ + $out_fq1 \\ + --thread $task.cpus \\ + --json ${prefix}.fastp.json \\ + --html ${prefix}.fastp.html \\ + $adapter_list \\ + $fail_fastq \\ + $args \\ + 2> >(tee ${prefix}.fastp.log >&2) + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + fastp: \$(fastp --version 2>&1 | sed -e "s/fastp //g") + END_VERSIONS + """ + } else { + def merge_fastq = save_merged ? "-m --merged_out ${prefix}.merged.fastq.gz" : '' + """ + [ ! -f ${prefix}_1.fastq.gz ] && ln -sf ${reads[0]} ${prefix}_1.fastq.gz + [ ! -f ${prefix}_2.fastq.gz ] && ln -sf ${reads[1]} ${prefix}_2.fastq.gz + fastp \\ + --in1 ${prefix}_1.fastq.gz \\ + --in2 ${prefix}_2.fastq.gz \\ + $out_fq1 \\ + $out_fq2 \\ + --json ${prefix}.fastp.json \\ + --html ${prefix}.fastp.html \\ + $adapter_list \\ + $fail_fastq \\ + $merge_fastq \\ + --thread $task.cpus \\ + --detect_adapter_for_pe \\ + $args \\ + 2> >(tee ${prefix}.fastp.log >&2) + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + fastp: \$(fastp --version 2>&1 | sed -e "s/fastp //g") + END_VERSIONS + """ + } + + stub: + def prefix = task.ext.prefix ?: "${meta.id}" + def is_single_output = task.ext.args?.contains('--interleaved_in') || meta.single_end + def touch_reads = (discard_trimmed_pass) ? "" : (is_single_output) ? "echo '' | gzip > ${prefix}.fastp.fastq.gz" : "echo '' | gzip > ${prefix}_1.fastp.fastq.gz ; echo '' | gzip > ${prefix}_2.fastp.fastq.gz" + def touch_merged = (!is_single_output && save_merged) ? "echo '' | gzip > ${prefix}.merged.fastq.gz" : "" + def touch_fail_fastq = (!save_trimmed_fail) ? "" : meta.single_end ? "echo '' | gzip > ${prefix}.fail.fastq.gz" : "echo '' | gzip > ${prefix}.paired.fail.fastq.gz ; echo '' | gzip > ${prefix}_1.fail.fastq.gz ; echo '' | gzip > ${prefix}_2.fail.fastq.gz" + """ + $touch_reads + $touch_fail_fastq + $touch_merged + touch "${prefix}.fastp.json" + touch "${prefix}.fastp.html" + touch "${prefix}.fastp.log" + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + fastp: \$(fastp --version 2>&1 | sed -e "s/fastp //g") + END_VERSIONS + """ +} diff --git a/modules/nf-core/fastp/meta.yml b/modules/nf-core/fastp/meta.yml new file mode 100644 index 00000000..159404d0 --- /dev/null +++ b/modules/nf-core/fastp/meta.yml @@ -0,0 +1,113 @@ +name: fastp +description: Perform adapter/quality trimming on sequencing reads +keywords: + - trimming + - quality control + - fastq +tools: + - fastp: + description: | + A tool designed to provide fast all-in-one preprocessing for FastQ files. This tool is developed in C++ with multithreading supported to afford high performance. + documentation: https://github.com/OpenGene/fastp + doi: 10.1093/bioinformatics/bty560 + licence: ["MIT"] + identifier: biotools:fastp +input: + - - meta: + type: map + description: | + Groovy Map containing sample information. Use 'single_end: true' to specify single ended or interleaved FASTQs. Use 'single_end: false' for paired-end reads. + e.g. [ id:'test', single_end:false ] + - reads: + type: file + description: | + List of input FastQ files of size 1 and 2 for single-end and paired-end data, + respectively. If you wish to run interleaved paired-end data, supply as single-end data + but with `--interleaved_in` in your `modules.conf`'s `ext.args` for the module. + - - adapter_fasta: + type: file + description: File in FASTA format containing possible adapters to remove. + pattern: "*.{fasta,fna,fas,fa}" + - - discard_trimmed_pass: + type: boolean + description: Specify true to not write any reads that pass trimming thresholds. + | This can be used to use fastp for the output report only. + - - save_trimmed_fail: + type: boolean + description: Specify true to save files that failed to pass trimming thresholds + ending in `*.fail.fastq.gz` + - - save_merged: + type: boolean + description: Specify true to save all merged reads to a file ending in `*.merged.fastq.gz` +output: + - reads: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*.fastp.fastq.gz": + type: file + description: The trimmed/modified/unmerged fastq reads + pattern: "*fastp.fastq.gz" + - json: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*.json": + type: file + description: Results in JSON format + pattern: "*.json" + - html: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*.html": + type: file + description: Results in HTML format + pattern: "*.html" + - log: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*.log": + type: file + description: fastq log file + pattern: "*.log" + - reads_fail: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*.fail.fastq.gz": + type: file + description: Reads the failed the preprocessing + pattern: "*fail.fastq.gz" + - reads_merged: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*.merged.fastq.gz": + type: file + description: Reads that were successfully merged + pattern: "*.{merged.fastq.gz}" + - versions: + - versions.yml: + type: file + description: File containing software versions + pattern: "versions.yml" +authors: + - "@drpatelh" + - "@kevinmenden" +maintainers: + - "@drpatelh" + - "@kevinmenden" diff --git a/modules/nf-core/fastp/tests/main.nf.test b/modules/nf-core/fastp/tests/main.nf.test new file mode 100644 index 00000000..30dbb8aa --- /dev/null +++ b/modules/nf-core/fastp/tests/main.nf.test @@ -0,0 +1,576 @@ +nextflow_process { + + name "Test Process FASTP" + script "../main.nf" + process "FASTP" + tag "modules" + tag "modules_nfcore" + tag "fastp" + + test("test_fastp_single_end") { + + when { + + process { + """ + input[0] = Channel.of([ + [ id:'test', single_end:true ], + [ file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true) ] + ]) + input[1] = [] + input[2] = false + input[3] = false + input[4] = false + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert path(process.out.html.get(0).get(1)).getText().contains("single end (151 cycles)") }, + { assert path(process.out.log.get(0).get(1)).getText().contains("reads passed filter: 99") }, + { assert snapshot( + process.out.json, + process.out.reads, + process.out.reads_fail, + process.out.reads_merged, + process.out.versions).match() } + ) + } + } + + test("test_fastp_paired_end") { + + when { + + process { + """ + adapter_fasta = [] + save_trimmed_pass = true + save_trimmed_fail = false + save_merged = false + + input[0] = Channel.of([ + [ id:'test', single_end:false ], // meta map + [ file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_2.fastq.gz', checkIfExists: true) ] + ]) + input[1] = [] + input[2] = false + input[3] = false + input[4] = false + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert path(process.out.html.get(0).get(1)).getText().contains("The input has little adapter percentage (~0.000000%), probably it's trimmed before.") }, + { assert path(process.out.log.get(0).get(1)).getText().contains("Q30 bases: 12281(88.3716%)") }, + { assert snapshot( + process.out.json, + process.out.reads, + process.out.reads_fail, + process.out.reads_merged, + process.out.versions).match() } + ) + } + } + + test("fastp test_fastp_interleaved") { + + config './nextflow.interleaved.config' + when { + process { + """ + input[0] = Channel.of([ + [ id:'test', single_end:true ], // meta map + [ file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_interleaved.fastq.gz', checkIfExists: true) ] + ]) + input[1] = [] + input[2] = false + input[3] = false + input[4] = false + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert path(process.out.html.get(0).get(1)).getText().contains("paired end (151 cycles + 151 cycles)") }, + { assert path(process.out.log.get(0).get(1)).getText().contains("reads passed filter: 162") }, + { assert process.out.reads_fail == [] }, + { assert process.out.reads_merged == [] }, + { assert snapshot( + process.out.reads, + process.out.json, + process.out.versions).match() } + ) + } + } + + test("test_fastp_single_end_trim_fail") { + + when { + + process { + """ + input[0] = Channel.of([ + [ id:'test', single_end:true ], // meta map + [ file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true) ] + ]) + input[1] = [] + input[2] = false + input[3] = true + input[4] = false + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert path(process.out.html.get(0).get(1)).getText().contains("single end (151 cycles)") }, + { assert path(process.out.log.get(0).get(1)).getText().contains("reads passed filter: 99") }, + { assert snapshot( + process.out.json, + process.out.reads, + process.out.reads_fail, + process.out.reads_merged, + process.out.versions).match() } + ) + } + } + + test("test_fastp_paired_end_trim_fail") { + + config './nextflow.save_failed.config' + when { + process { + """ + input[0] = Channel.of([ + [ id:'test', single_end:false ], // meta map + [ file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_2.fastq.gz', checkIfExists: true)] + ]) + input[1] = [] + input[2] = false + input[3] = true + input[4] = false + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert path(process.out.html.get(0).get(1)).getText().contains("The input has little adapter percentage (~0.000000%), probably it's trimmed before.") }, + { assert path(process.out.log.get(0).get(1)).getText().contains("reads passed filter: 162") }, + { assert snapshot( + process.out.reads, + process.out.reads_fail, + process.out.reads_merged, + process.out.json, + process.out.versions).match() } + ) + } + } + + test("test_fastp_paired_end_merged") { + + when { + process { + """ + input[0] = Channel.of([ + [ id:'test', single_end:false ], // meta map + [ file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_2.fastq.gz', checkIfExists: true) ] + ]) + input[1] = [] + input[2] = false + input[3] = false + input[4] = true + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert path(process.out.html.get(0).get(1)).getText().contains("The input has little adapter percentage (~0.000000%), probably it's trimmed before.") }, + { assert path(process.out.log.get(0).get(1)).getText().contains("total reads: 75") }, + { assert snapshot( + process.out.json, + process.out.reads, + process.out.reads_fail, + process.out.reads_merged, + process.out.versions).match() }, + ) + } + } + + test("test_fastp_paired_end_merged_adapterlist") { + + when { + process { + """ + input[0] = Channel.of([ + [ id:'test', single_end:false ], // meta map + [ file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_2.fastq.gz', checkIfExists: true) ] + ]) + input[1] = Channel.of([ file(params.modules_testdata_base_path + 'delete_me/fastp/adapters.fasta', checkIfExists: true) ]) + input[2] = false + input[3] = false + input[4] = true + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert path(process.out.html.get(0).get(1)).getText().contains("
") }, + { assert path(process.out.log.get(0).get(1)).getText().contains("total bases: 13683") }, + { assert snapshot( + process.out.json, + process.out.reads, + process.out.reads_fail, + process.out.reads_merged, + process.out.versions).match() } + ) + } + } + + test("test_fastp_single_end_qc_only") { + + when { + process { + """ + input[0] = Channel.of([ + [ id:'test', single_end:true ], + [ file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true) ] + ]) + input[1] = [] + input[2] = true + input[3] = false + input[4] = false + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert path(process.out.html.get(0).get(1)).getText().contains("single end (151 cycles)") }, + { assert path(process.out.log.get(0).get(1)).getText().contains("reads passed filter: 99") }, + { assert snapshot( + process.out.json, + process.out.reads, + process.out.reads, + process.out.reads_fail, + process.out.reads_fail, + process.out.reads_merged, + process.out.reads_merged, + process.out.versions).match() } + ) + } + } + + test("test_fastp_paired_end_qc_only") { + + when { + process { + """ + input[0] = Channel.of([ + [ id:'test', single_end:false ], // meta map + [ file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_2.fastq.gz', checkIfExists: true) ] + ]) + input[1] = [] + input[2] = true + input[3] = false + input[4] = false + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert path(process.out.html.get(0).get(1)).getText().contains("The input has little adapter percentage (~0.000000%), probably it's trimmed before.") }, + { assert path(process.out.log.get(0).get(1)).getText().contains("Q30 bases: 12281(88.3716%)") }, + { assert snapshot( + process.out.json, + process.out.reads, + process.out.reads, + process.out.reads_fail, + process.out.reads_fail, + process.out.reads_merged, + process.out.reads_merged, + process.out.versions).match() } + ) + } + } + + test("test_fastp_single_end - stub") { + + options "-stub" + + when { + + process { + """ + input[0] = Channel.of([ + [ id:'test', single_end:true ], + [ file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true) ] + ]) + input[1] = [] + input[2] = false + input[3] = false + input[4] = false + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + } + + test("test_fastp_paired_end - stub") { + + options "-stub" + + when { + + process { + """ + adapter_fasta = [] + save_trimmed_pass = true + save_trimmed_fail = false + save_merged = false + + input[0] = Channel.of([ + [ id:'test', single_end:false ], // meta map + [ file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_2.fastq.gz', checkIfExists: true) ] + ]) + input[1] = [] + input[2] = false + input[3] = false + input[4] = false + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + } + + test("fastp - stub test_fastp_interleaved") { + + options "-stub" + + config './nextflow.interleaved.config' + when { + process { + """ + input[0] = Channel.of([ + [ id:'test', single_end:true ], // meta map + [ file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_interleaved.fastq.gz', checkIfExists: true) ] + ]) + input[1] = [] + input[2] = false + input[3] = false + input[4] = false + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + } + + test("test_fastp_single_end_trim_fail - stub") { + + options "-stub" + + when { + + process { + """ + input[0] = Channel.of([ + [ id:'test', single_end:true ], // meta map + [ file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true) ] + ]) + input[1] = [] + input[2] = false + input[3] = true + input[4] = false + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + } + + test("test_fastp_paired_end_trim_fail - stub") { + + options "-stub" + + config './nextflow.save_failed.config' + when { + process { + """ + input[0] = Channel.of([ + [ id:'test', single_end:false ], // meta map + [ file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_2.fastq.gz', checkIfExists: true)] + ]) + input[1] = [] + input[2] = false + input[3] = true + input[4] = false + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + } + + test("test_fastp_paired_end_merged - stub") { + + options "-stub" + + when { + process { + """ + input[0] = Channel.of([ + [ id:'test', single_end:false ], // meta map + [ file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_2.fastq.gz', checkIfExists: true) ] + ]) + input[1] = [] + input[2] = false + input[3] = false + input[4] = true + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + } + + test("test_fastp_paired_end_merged_adapterlist - stub") { + + options "-stub" + + when { + process { + """ + input[0] = Channel.of([ + [ id:'test', single_end:false ], // meta map + [ file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_2.fastq.gz', checkIfExists: true) ] + ]) + input[1] = Channel.of([ file(params.modules_testdata_base_path + 'delete_me/fastp/adapters.fasta', checkIfExists: true) ]) + input[2] = false + input[3] = false + input[4] = true + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + } + + test("test_fastp_single_end_qc_only - stub") { + + options "-stub" + + when { + process { + """ + input[0] = Channel.of([ + [ id:'test', single_end:true ], + [ file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true) ] + ]) + input[1] = [] + input[2] = true + input[3] = false + input[4] = false + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + } + + test("test_fastp_paired_end_qc_only - stub") { + + options "-stub" + + when { + process { + """ + input[0] = Channel.of([ + [ id:'test', single_end:false ], // meta map + [ file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_2.fastq.gz', checkIfExists: true) ] + ]) + input[1] = [] + input[2] = true + input[3] = false + input[4] = false + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + } +} \ No newline at end of file diff --git a/modules/nf-core/fastp/tests/main.nf.test.snap b/modules/nf-core/fastp/tests/main.nf.test.snap new file mode 100644 index 00000000..54be7e45 --- /dev/null +++ b/modules/nf-core/fastp/tests/main.nf.test.snap @@ -0,0 +1,1331 @@ +{ + "test_fastp_single_end_qc_only - stub": { + "content": [ + { + "0": [ + + ], + "1": [ + [ + { + "id": "test", + "single_end": true + }, + "test.fastp.json:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "2": [ + [ + { + "id": "test", + "single_end": true + }, + "test.fastp.html:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "3": [ + [ + { + "id": "test", + "single_end": true + }, + "test.fastp.log:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "4": [ + + ], + "5": [ + + ], + "6": [ + "versions.yml:md5,48ffc994212fb1fc9f83a74fa69c9f02" + ], + "html": [ + [ + { + "id": "test", + "single_end": true + }, + "test.fastp.html:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "json": [ + [ + { + "id": "test", + "single_end": true + }, + "test.fastp.json:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "log": [ + [ + { + "id": "test", + "single_end": true + }, + "test.fastp.log:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "reads": [ + + ], + "reads_fail": [ + + ], + "reads_merged": [ + + ], + "versions": [ + "versions.yml:md5,48ffc994212fb1fc9f83a74fa69c9f02" + ] + } + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.2" + }, + "timestamp": "2024-07-05T14:31:10.841098" + }, + "test_fastp_paired_end": { + "content": [ + [ + [ + { + "id": "test", + "single_end": false + }, + "test.fastp.json:md5,1e0f8e27e71728e2b63fc64086be95cd" + ] + ], + [ + [ + { + "id": "test", + "single_end": false + }, + [ + "test_1.fastp.fastq.gz:md5,67b2bbae47f073e05a97a9c2edce23c7", + "test_2.fastp.fastq.gz:md5,25cbdca08e2083dbd4f0502de6b62f39" + ] + ] + ], + [ + + ], + [ + + ], + [ + "versions.yml:md5,48ffc994212fb1fc9f83a74fa69c9f02" + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.2" + }, + "timestamp": "2024-07-05T13:43:28.665779" + }, + "test_fastp_paired_end_merged_adapterlist": { + "content": [ + [ + [ + { + "id": "test", + "single_end": false + }, + "test.fastp.json:md5,5914ca3f21ce162123a824e33e8564f6" + ] + ], + [ + [ + { + "id": "test", + "single_end": false + }, + [ + "test_1.fastp.fastq.gz:md5,54b726a55e992a869fd3fa778afe1672", + "test_2.fastp.fastq.gz:md5,29d3b33b869f7b63417b8ff07bb128ba" + ] + ] + ], + [ + + ], + [ + [ + { + "id": "test", + "single_end": false + }, + "test.merged.fastq.gz:md5,c873bb1ab3fa859dcc47306465e749d5" + ] + ], + [ + "versions.yml:md5,48ffc994212fb1fc9f83a74fa69c9f02" + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.2" + }, + "timestamp": "2024-07-05T13:44:18.210375" + }, + "test_fastp_single_end_qc_only": { + "content": [ + [ + [ + { + "id": "test", + "single_end": true + }, + "test.fastp.json:md5,5cc5f01e449309e0e689ed6f51a2294a" + ] + ], + [ + + ], + [ + + ], + [ + + ], + [ + + ], + [ + + ], + [ + + ], + [ + "versions.yml:md5,48ffc994212fb1fc9f83a74fa69c9f02" + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.2" + }, + "timestamp": "2024-07-05T13:44:27.380974" + }, + "test_fastp_paired_end_trim_fail": { + "content": [ + [ + [ + { + "id": "test", + "single_end": false + }, + [ + "test_1.fastp.fastq.gz:md5,6ff32a64c5188b9a9192be1398c262c7", + "test_2.fastp.fastq.gz:md5,db0cb7c9977e94ac2b4b446ebd017a8a" + ] + ] + ], + [ + [ + { + "id": "test", + "single_end": false + }, + [ + "test.paired.fail.fastq.gz:md5,409b687c734cedd7a1fec14d316e1366", + "test_1.fail.fastq.gz:md5,4f273cf3159c13f79e8ffae12f5661f6", + "test_2.fail.fastq.gz:md5,f97b9edefb5649aab661fbc9e71fc995" + ] + ] + ], + [ + + ], + [ + [ + { + "id": "test", + "single_end": false + }, + "test.fastp.json:md5,4c3268ddb50ea5b33125984776aa3519" + ] + ], + [ + "versions.yml:md5,48ffc994212fb1fc9f83a74fa69c9f02" + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.2" + }, + "timestamp": "2024-07-05T13:43:58.749589" + }, + "fastp - stub test_fastp_interleaved": { + "content": [ + { + "0": [ + [ + { + "id": "test", + "single_end": true + }, + "test.fastp.fastq.gz:md5,68b329da9893e34099c7d8ad5cb9c940" + ] + ], + "1": [ + [ + { + "id": "test", + "single_end": true + }, + "test.fastp.json:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "2": [ + [ + { + "id": "test", + "single_end": true + }, + "test.fastp.html:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "3": [ + [ + { + "id": "test", + "single_end": true + }, + "test.fastp.log:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "4": [ + + ], + "5": [ + + ], + "6": [ + "versions.yml:md5,48ffc994212fb1fc9f83a74fa69c9f02" + ], + "html": [ + [ + { + "id": "test", + "single_end": true + }, + "test.fastp.html:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "json": [ + [ + { + "id": "test", + "single_end": true + }, + "test.fastp.json:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "log": [ + [ + { + "id": "test", + "single_end": true + }, + "test.fastp.log:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "reads": [ + [ + { + "id": "test", + "single_end": true + }, + "test.fastp.fastq.gz:md5,68b329da9893e34099c7d8ad5cb9c940" + ] + ], + "reads_fail": [ + + ], + "reads_merged": [ + + ], + "versions": [ + "versions.yml:md5,48ffc994212fb1fc9f83a74fa69c9f02" + ] + } + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.2" + }, + "timestamp": "2024-07-05T13:50:00.270029" + }, + "test_fastp_single_end - stub": { + "content": [ + { + "0": [ + [ + { + "id": "test", + "single_end": true + }, + "test.fastp.fastq.gz:md5,68b329da9893e34099c7d8ad5cb9c940" + ] + ], + "1": [ + [ + { + "id": "test", + "single_end": true + }, + "test.fastp.json:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "2": [ + [ + { + "id": "test", + "single_end": true + }, + "test.fastp.html:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "3": [ + [ + { + "id": "test", + "single_end": true + }, + "test.fastp.log:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "4": [ + + ], + "5": [ + + ], + "6": [ + "versions.yml:md5,48ffc994212fb1fc9f83a74fa69c9f02" + ], + "html": [ + [ + { + "id": "test", + "single_end": true + }, + "test.fastp.html:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "json": [ + [ + { + "id": "test", + "single_end": true + }, + "test.fastp.json:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "log": [ + [ + { + "id": "test", + "single_end": true + }, + "test.fastp.log:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "reads": [ + [ + { + "id": "test", + "single_end": true + }, + "test.fastp.fastq.gz:md5,68b329da9893e34099c7d8ad5cb9c940" + ] + ], + "reads_fail": [ + + ], + "reads_merged": [ + + ], + "versions": [ + "versions.yml:md5,48ffc994212fb1fc9f83a74fa69c9f02" + ] + } + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.2" + }, + "timestamp": "2024-07-05T13:49:42.502789" + }, + "test_fastp_paired_end_merged_adapterlist - stub": { + "content": [ + { + "0": [ + [ + { + "id": "test", + "single_end": false + }, + [ + "test_1.fastp.fastq.gz:md5,68b329da9893e34099c7d8ad5cb9c940", + "test_2.fastp.fastq.gz:md5,68b329da9893e34099c7d8ad5cb9c940" + ] + ] + ], + "1": [ + [ + { + "id": "test", + "single_end": false + }, + "test.fastp.json:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "2": [ + [ + { + "id": "test", + "single_end": false + }, + "test.fastp.html:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "3": [ + [ + { + "id": "test", + "single_end": false + }, + "test.fastp.log:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "4": [ + + ], + "5": [ + [ + { + "id": "test", + "single_end": false + }, + "test.merged.fastq.gz:md5,68b329da9893e34099c7d8ad5cb9c940" + ] + ], + "6": [ + "versions.yml:md5,48ffc994212fb1fc9f83a74fa69c9f02" + ], + "html": [ + [ + { + "id": "test", + "single_end": false + }, + "test.fastp.html:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "json": [ + [ + { + "id": "test", + "single_end": false + }, + "test.fastp.json:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "log": [ + [ + { + "id": "test", + "single_end": false + }, + "test.fastp.log:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "reads": [ + [ + { + "id": "test", + "single_end": false + }, + [ + "test_1.fastp.fastq.gz:md5,68b329da9893e34099c7d8ad5cb9c940", + "test_2.fastp.fastq.gz:md5,68b329da9893e34099c7d8ad5cb9c940" + ] + ] + ], + "reads_fail": [ + + ], + "reads_merged": [ + [ + { + "id": "test", + "single_end": false + }, + "test.merged.fastq.gz:md5,68b329da9893e34099c7d8ad5cb9c940" + ] + ], + "versions": [ + "versions.yml:md5,48ffc994212fb1fc9f83a74fa69c9f02" + ] + } + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.2" + }, + "timestamp": "2024-07-05T13:54:53.458252" + }, + "test_fastp_paired_end_merged - stub": { + "content": [ + { + "0": [ + [ + { + "id": "test", + "single_end": false + }, + [ + "test_1.fastp.fastq.gz:md5,68b329da9893e34099c7d8ad5cb9c940", + "test_2.fastp.fastq.gz:md5,68b329da9893e34099c7d8ad5cb9c940" + ] + ] + ], + "1": [ + [ + { + "id": "test", + "single_end": false + }, + "test.fastp.json:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "2": [ + [ + { + "id": "test", + "single_end": false + }, + "test.fastp.html:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "3": [ + [ + { + "id": "test", + "single_end": false + }, + "test.fastp.log:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "4": [ + + ], + "5": [ + [ + { + "id": "test", + "single_end": false + }, + "test.merged.fastq.gz:md5,68b329da9893e34099c7d8ad5cb9c940" + ] + ], + "6": [ + "versions.yml:md5,48ffc994212fb1fc9f83a74fa69c9f02" + ], + "html": [ + [ + { + "id": "test", + "single_end": false + }, + "test.fastp.html:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "json": [ + [ + { + "id": "test", + "single_end": false + }, + "test.fastp.json:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "log": [ + [ + { + "id": "test", + "single_end": false + }, + "test.fastp.log:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "reads": [ + [ + { + "id": "test", + "single_end": false + }, + [ + "test_1.fastp.fastq.gz:md5,68b329da9893e34099c7d8ad5cb9c940", + "test_2.fastp.fastq.gz:md5,68b329da9893e34099c7d8ad5cb9c940" + ] + ] + ], + "reads_fail": [ + + ], + "reads_merged": [ + [ + { + "id": "test", + "single_end": false + }, + "test.merged.fastq.gz:md5,68b329da9893e34099c7d8ad5cb9c940" + ] + ], + "versions": [ + "versions.yml:md5,48ffc994212fb1fc9f83a74fa69c9f02" + ] + } + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.2" + }, + "timestamp": "2024-07-05T13:50:27.689379" + }, + "test_fastp_paired_end_merged": { + "content": [ + [ + [ + { + "id": "test", + "single_end": false + }, + "test.fastp.json:md5,b712fd68ed0322f4bec49ff2a5237fcc" + ] + ], + [ + [ + { + "id": "test", + "single_end": false + }, + [ + "test_1.fastp.fastq.gz:md5,54b726a55e992a869fd3fa778afe1672", + "test_2.fastp.fastq.gz:md5,29d3b33b869f7b63417b8ff07bb128ba" + ] + ] + ], + [ + + ], + [ + [ + { + "id": "test", + "single_end": false + }, + "test.merged.fastq.gz:md5,c873bb1ab3fa859dcc47306465e749d5" + ] + ], + [ + "versions.yml:md5,48ffc994212fb1fc9f83a74fa69c9f02" + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.2" + }, + "timestamp": "2024-07-05T13:44:08.68476" + }, + "test_fastp_paired_end - stub": { + "content": [ + { + "0": [ + [ + { + "id": "test", + "single_end": false + }, + [ + "test_1.fastp.fastq.gz:md5,68b329da9893e34099c7d8ad5cb9c940", + "test_2.fastp.fastq.gz:md5,68b329da9893e34099c7d8ad5cb9c940" + ] + ] + ], + "1": [ + [ + { + "id": "test", + "single_end": false + }, + "test.fastp.json:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "2": [ + [ + { + "id": "test", + "single_end": false + }, + "test.fastp.html:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "3": [ + [ + { + "id": "test", + "single_end": false + }, + "test.fastp.log:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "4": [ + + ], + "5": [ + + ], + "6": [ + "versions.yml:md5,48ffc994212fb1fc9f83a74fa69c9f02" + ], + "html": [ + [ + { + "id": "test", + "single_end": false + }, + "test.fastp.html:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "json": [ + [ + { + "id": "test", + "single_end": false + }, + "test.fastp.json:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "log": [ + [ + { + "id": "test", + "single_end": false + }, + "test.fastp.log:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "reads": [ + [ + { + "id": "test", + "single_end": false + }, + [ + "test_1.fastp.fastq.gz:md5,68b329da9893e34099c7d8ad5cb9c940", + "test_2.fastp.fastq.gz:md5,68b329da9893e34099c7d8ad5cb9c940" + ] + ] + ], + "reads_fail": [ + + ], + "reads_merged": [ + + ], + "versions": [ + "versions.yml:md5,48ffc994212fb1fc9f83a74fa69c9f02" + ] + } + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.2" + }, + "timestamp": "2024-07-05T13:49:51.679221" + }, + "test_fastp_single_end": { + "content": [ + [ + [ + { + "id": "test", + "single_end": true + }, + "test.fastp.json:md5,c852d7a6dba5819e4ac8d9673bedcacc" + ] + ], + [ + [ + { + "id": "test", + "single_end": true + }, + "test.fastp.fastq.gz:md5,67b2bbae47f073e05a97a9c2edce23c7" + ] + ], + [ + + ], + [ + + ], + [ + "versions.yml:md5,48ffc994212fb1fc9f83a74fa69c9f02" + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.2" + }, + "timestamp": "2024-07-05T13:43:18.834322" + }, + "test_fastp_single_end_trim_fail - stub": { + "content": [ + { + "0": [ + [ + { + "id": "test", + "single_end": true + }, + "test.fastp.fastq.gz:md5,68b329da9893e34099c7d8ad5cb9c940" + ] + ], + "1": [ + [ + { + "id": "test", + "single_end": true + }, + "test.fastp.json:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "2": [ + [ + { + "id": "test", + "single_end": true + }, + "test.fastp.html:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "3": [ + [ + { + "id": "test", + "single_end": true + }, + "test.fastp.log:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "4": [ + [ + { + "id": "test", + "single_end": true + }, + "test.fail.fastq.gz:md5,68b329da9893e34099c7d8ad5cb9c940" + ] + ], + "5": [ + + ], + "6": [ + "versions.yml:md5,48ffc994212fb1fc9f83a74fa69c9f02" + ], + "html": [ + [ + { + "id": "test", + "single_end": true + }, + "test.fastp.html:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "json": [ + [ + { + "id": "test", + "single_end": true + }, + "test.fastp.json:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "log": [ + [ + { + "id": "test", + "single_end": true + }, + "test.fastp.log:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "reads": [ + [ + { + "id": "test", + "single_end": true + }, + "test.fastp.fastq.gz:md5,68b329da9893e34099c7d8ad5cb9c940" + ] + ], + "reads_fail": [ + [ + { + "id": "test", + "single_end": true + }, + "test.fail.fastq.gz:md5,68b329da9893e34099c7d8ad5cb9c940" + ] + ], + "reads_merged": [ + + ], + "versions": [ + "versions.yml:md5,48ffc994212fb1fc9f83a74fa69c9f02" + ] + } + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.2" + }, + "timestamp": "2024-07-05T14:05:36.898142" + }, + "test_fastp_paired_end_trim_fail - stub": { + "content": [ + { + "0": [ + [ + { + "id": "test", + "single_end": false + }, + [ + "test_1.fastp.fastq.gz:md5,68b329da9893e34099c7d8ad5cb9c940", + "test_2.fastp.fastq.gz:md5,68b329da9893e34099c7d8ad5cb9c940" + ] + ] + ], + "1": [ + [ + { + "id": "test", + "single_end": false + }, + "test.fastp.json:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "2": [ + [ + { + "id": "test", + "single_end": false + }, + "test.fastp.html:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "3": [ + [ + { + "id": "test", + "single_end": false + }, + "test.fastp.log:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "4": [ + [ + { + "id": "test", + "single_end": false + }, + [ + "test.paired.fail.fastq.gz:md5,68b329da9893e34099c7d8ad5cb9c940", + "test_1.fail.fastq.gz:md5,68b329da9893e34099c7d8ad5cb9c940", + "test_2.fail.fastq.gz:md5,68b329da9893e34099c7d8ad5cb9c940" + ] + ] + ], + "5": [ + + ], + "6": [ + "versions.yml:md5,48ffc994212fb1fc9f83a74fa69c9f02" + ], + "html": [ + [ + { + "id": "test", + "single_end": false + }, + "test.fastp.html:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "json": [ + [ + { + "id": "test", + "single_end": false + }, + "test.fastp.json:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "log": [ + [ + { + "id": "test", + "single_end": false + }, + "test.fastp.log:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "reads": [ + [ + { + "id": "test", + "single_end": false + }, + [ + "test_1.fastp.fastq.gz:md5,68b329da9893e34099c7d8ad5cb9c940", + "test_2.fastp.fastq.gz:md5,68b329da9893e34099c7d8ad5cb9c940" + ] + ] + ], + "reads_fail": [ + [ + { + "id": "test", + "single_end": false + }, + [ + "test.paired.fail.fastq.gz:md5,68b329da9893e34099c7d8ad5cb9c940", + "test_1.fail.fastq.gz:md5,68b329da9893e34099c7d8ad5cb9c940", + "test_2.fail.fastq.gz:md5,68b329da9893e34099c7d8ad5cb9c940" + ] + ] + ], + "reads_merged": [ + + ], + "versions": [ + "versions.yml:md5,48ffc994212fb1fc9f83a74fa69c9f02" + ] + } + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.2" + }, + "timestamp": "2024-07-05T14:05:49.212847" + }, + "fastp test_fastp_interleaved": { + "content": [ + [ + [ + { + "id": "test", + "single_end": true + }, + "test.fastp.fastq.gz:md5,217d62dc13a23e92513a1bd8e1bcea39" + ] + ], + [ + [ + { + "id": "test", + "single_end": true + }, + "test.fastp.json:md5,b24e0624df5cc0b11cd5ba21b726fb22" + ] + ], + [ + "versions.yml:md5,48ffc994212fb1fc9f83a74fa69c9f02" + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.2" + }, + "timestamp": "2024-07-05T13:43:38.910832" + }, + "test_fastp_single_end_trim_fail": { + "content": [ + [ + [ + { + "id": "test", + "single_end": true + }, + "test.fastp.json:md5,9a7ee180f000e8d00c7fb67f06293eb5" + ] + ], + [ + [ + { + "id": "test", + "single_end": true + }, + "test.fastp.fastq.gz:md5,67b2bbae47f073e05a97a9c2edce23c7" + ] + ], + [ + [ + { + "id": "test", + "single_end": true + }, + "test.fail.fastq.gz:md5,3e4aaadb66a5b8fc9b881bf39c227abd" + ] + ], + [ + + ], + [ + "versions.yml:md5,48ffc994212fb1fc9f83a74fa69c9f02" + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.2" + }, + "timestamp": "2024-07-05T13:43:48.22378" + }, + "test_fastp_paired_end_qc_only": { + "content": [ + [ + [ + { + "id": "test", + "single_end": false + }, + "test.fastp.json:md5,623064a45912dac6f2b64e3f2e9901df" + ] + ], + [ + + ], + [ + + ], + [ + + ], + [ + + ], + [ + + ], + [ + + ], + [ + "versions.yml:md5,48ffc994212fb1fc9f83a74fa69c9f02" + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.2" + }, + "timestamp": "2024-07-05T13:44:36.334938" + }, + "test_fastp_paired_end_qc_only - stub": { + "content": [ + { + "0": [ + + ], + "1": [ + [ + { + "id": "test", + "single_end": false + }, + "test.fastp.json:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "2": [ + [ + { + "id": "test", + "single_end": false + }, + "test.fastp.html:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "3": [ + [ + { + "id": "test", + "single_end": false + }, + "test.fastp.log:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "4": [ + + ], + "5": [ + + ], + "6": [ + "versions.yml:md5,48ffc994212fb1fc9f83a74fa69c9f02" + ], + "html": [ + [ + { + "id": "test", + "single_end": false + }, + "test.fastp.html:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "json": [ + [ + { + "id": "test", + "single_end": false + }, + "test.fastp.json:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "log": [ + [ + { + "id": "test", + "single_end": false + }, + "test.fastp.log:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "reads": [ + + ], + "reads_fail": [ + + ], + "reads_merged": [ + + ], + "versions": [ + "versions.yml:md5,48ffc994212fb1fc9f83a74fa69c9f02" + ] + } + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.2" + }, + "timestamp": "2024-07-05T14:31:27.096468" + } +} \ No newline at end of file diff --git a/modules/nf-core/fastp/tests/nextflow.interleaved.config b/modules/nf-core/fastp/tests/nextflow.interleaved.config new file mode 100644 index 00000000..4be8dbd2 --- /dev/null +++ b/modules/nf-core/fastp/tests/nextflow.interleaved.config @@ -0,0 +1,5 @@ +process { + withName: FASTP { + ext.args = "--interleaved_in -e 30" + } +} diff --git a/modules/nf-core/fastp/tests/nextflow.save_failed.config b/modules/nf-core/fastp/tests/nextflow.save_failed.config new file mode 100644 index 00000000..53b61b0c --- /dev/null +++ b/modules/nf-core/fastp/tests/nextflow.save_failed.config @@ -0,0 +1,5 @@ +process { + withName: FASTP { + ext.args = "-e 30" + } +} diff --git a/modules/nf-core/fastp/tests/tags.yml b/modules/nf-core/fastp/tests/tags.yml new file mode 100644 index 00000000..c1afcce7 --- /dev/null +++ b/modules/nf-core/fastp/tests/tags.yml @@ -0,0 +1,2 @@ +fastp: + - modules/nf-core/fastp/** diff --git a/modules/nf-core/gatk4/bedtointervallist/environment.yml b/modules/nf-core/gatk4/bedtointervallist/environment.yml new file mode 100644 index 00000000..1f7d0824 --- /dev/null +++ b/modules/nf-core/gatk4/bedtointervallist/environment.yml @@ -0,0 +1,10 @@ +--- +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/environment-schema.json +channels: + - conda-forge + - bioconda +dependencies: + # renovate: datasource=conda depName=bioconda/gatk4 + - bioconda::gatk4=4.6.1.0 + # renovate: datasource=conda depName=bioconda/gcnvkernel + - bioconda::gcnvkernel=0.9 diff --git a/modules/nf-core/gatk4/bedtointervallist/main.nf b/modules/nf-core/gatk4/bedtointervallist/main.nf new file mode 100644 index 00000000..89960e04 --- /dev/null +++ b/modules/nf-core/gatk4/bedtointervallist/main.nf @@ -0,0 +1,56 @@ +process GATK4_BEDTOINTERVALLIST { + tag "$meta.id" + label 'process_single' + + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://community-cr-prod.seqera.io/docker/registry/v2/blobs/sha256/b2/b28daf5d9bb2f0d129dcad1b7410e0dd8a9b087aaf3ec7ced929b1f57624ad98/data': + 'community.wave.seqera.io/library/gatk4_gcnvkernel:e48d414933d188cd' }" + + input: + tuple val(meta), path(bed) + tuple val(meta2), path(dict) + + output: + tuple val(meta), path('*.interval_list'), emit: interval_list + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + + def avail_mem = 3072 + if (!task.memory) { + log.info '[GATK BedToIntervalList] Available memory not known - defaulting to 3GB. Specify process memory requirements to change this.' + } else { + avail_mem = (task.memory.mega*0.8).intValue() + } + """ + gatk --java-options "-Xmx${avail_mem}M -XX:-UsePerfData" \\ + BedToIntervalList \\ + --INPUT $bed \\ + --OUTPUT ${prefix}.interval_list \\ + --SEQUENCE_DICTIONARY $dict \\ + --TMP_DIR . \\ + $args + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + gatk4: \$(echo \$(gatk --version 2>&1) | sed 's/^.*(GATK) v//; s/ .*\$//') + END_VERSIONS + """ + + stub: + def prefix = task.ext.prefix ?: "${meta.id}" + """ + touch ${prefix}.interval_list + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + gatk4: \$(echo \$(gatk --version 2>&1) | sed 's/^.*(GATK) v//; s/ .*\$//') + END_VERSIONS + """ +} diff --git a/modules/nf-core/gatk4/bedtointervallist/meta.yml b/modules/nf-core/gatk4/bedtointervallist/meta.yml new file mode 100644 index 00000000..25348e16 --- /dev/null +++ b/modules/nf-core/gatk4/bedtointervallist/meta.yml @@ -0,0 +1,62 @@ +name: gatk4_bedtointervallist +description: Creates an interval list from a bed file and a reference dict +keywords: + - bed + - bedtointervallist + - gatk4 + - interval list +tools: + - gatk4: + description: | + Developed in the Data Sciences Platform at the Broad Institute, the toolkit offers a wide variety of tools + with a primary focus on variant discovery and genotyping. Its powerful processing engine + and high-performance computing features make it capable of taking on projects of any size. + homepage: https://gatk.broadinstitute.org/hc/en-us + documentation: https://gatk.broadinstitute.org/hc/en-us/categories/360002369672s + doi: 10.1158/1538-7445.AM2017-3590 + licence: ["Apache-2.0"] + identifier: "" +input: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test'] + - bed: + type: file + description: Input bed file + pattern: "*.bed" + - - meta2: + type: map + description: | + Groovy Map containing reference information + e.g. [ id:'genome' ] + - dict: + type: file + description: Sequence dictionary + pattern: "*.dict" +output: + - interval_list: + - meta: + type: file + description: gatk interval list file + pattern: "*.interval_list" + - "*.interval_list": + type: file + description: gatk interval list file + pattern: "*.interval_list" + - _list: + type: file + description: gatk interval list file + pattern: "*.interval_list" + - versions: + - versions.yml: + type: file + description: File containing software versions + pattern: "versions.yml" +authors: + - "@kevinmenden" + - "@ramprasadn" +maintainers: + - "@kevinmenden" + - "@ramprasadn" diff --git a/modules/nf-core/gatk4/bedtointervallist/tests/main.nf.test b/modules/nf-core/gatk4/bedtointervallist/tests/main.nf.test new file mode 100644 index 00000000..2289f73f --- /dev/null +++ b/modules/nf-core/gatk4/bedtointervallist/tests/main.nf.test @@ -0,0 +1,38 @@ +nextflow_process { + + name "Test Process GATK4_BEDTOINTERVALLIST" + script "../main.nf" + process "GATK4_BEDTOINTERVALLIST" + + tag "modules" + tag "modules_nfcore" + tag "gatk4" + tag "gatk4/bedtointervallist" + + test("test_gatk4_bedtointervallist") { + + when { + process { + """ + input[0] = [ [ id:'test' ], // meta map + [file(params.modules_testdata_base_path + + 'genomics/sarscov2/genome/bed/test.bed', checkIfExists: true) ] + ] + input[1] = [ [ id:'dict' ], // meta map + [file(params.modules_testdata_base_path + + 'genomics/sarscov2/genome/genome.dict', checkIfExists: true) ] + ] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + + } + +} \ No newline at end of file diff --git a/modules/nf-core/gatk4/bedtointervallist/tests/main.nf.test.snap b/modules/nf-core/gatk4/bedtointervallist/tests/main.nf.test.snap new file mode 100644 index 00000000..6936cf97 --- /dev/null +++ b/modules/nf-core/gatk4/bedtointervallist/tests/main.nf.test.snap @@ -0,0 +1,35 @@ +{ + "test_gatk4_bedtointervallist": { + "content": [ + { + "0": [ + [ + { + "id": "test" + }, + "test.interval_list:md5,e51101c9357fb2d59fd30e370eefa39c" + ] + ], + "1": [ + "versions.yml:md5,6b3aa4d49cc3ba433ecf92e31f155d00" + ], + "interval_list": [ + [ + { + "id": "test" + }, + "test.interval_list:md5,e51101c9357fb2d59fd30e370eefa39c" + ] + ], + "versions": [ + "versions.yml:md5,6b3aa4d49cc3ba433ecf92e31f155d00" + ] + } + ], + "meta": { + "nf-test": "0.9.1", + "nextflow": "24.10.0" + }, + "timestamp": "2024-10-31T10:37:25.720782902" + } +} \ No newline at end of file diff --git a/modules/nf-core/gatk4/bedtointervallist/tests/tags.yml b/modules/nf-core/gatk4/bedtointervallist/tests/tags.yml new file mode 100644 index 00000000..b4d54f12 --- /dev/null +++ b/modules/nf-core/gatk4/bedtointervallist/tests/tags.yml @@ -0,0 +1,2 @@ +gatk4/bedtointervallist: + - "modules/nf-core/gatk4/bedtointervallist/**" diff --git a/modules/nf-core/gatk4/createsequencedictionary/environment.yml b/modules/nf-core/gatk4/createsequencedictionary/environment.yml new file mode 100644 index 00000000..1f7d0824 --- /dev/null +++ b/modules/nf-core/gatk4/createsequencedictionary/environment.yml @@ -0,0 +1,10 @@ +--- +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/environment-schema.json +channels: + - conda-forge + - bioconda +dependencies: + # renovate: datasource=conda depName=bioconda/gatk4 + - bioconda::gatk4=4.6.1.0 + # renovate: datasource=conda depName=bioconda/gcnvkernel + - bioconda::gcnvkernel=0.9 diff --git a/modules/nf-core/gatk4/createsequencedictionary/main.nf b/modules/nf-core/gatk4/createsequencedictionary/main.nf new file mode 100644 index 00000000..998622a0 --- /dev/null +++ b/modules/nf-core/gatk4/createsequencedictionary/main.nf @@ -0,0 +1,52 @@ +process GATK4_CREATESEQUENCEDICTIONARY { + tag "$fasta" + label 'process_single' + + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://community-cr-prod.seqera.io/docker/registry/v2/blobs/sha256/b2/b28daf5d9bb2f0d129dcad1b7410e0dd8a9b087aaf3ec7ced929b1f57624ad98/data': + 'community.wave.seqera.io/library/gatk4_gcnvkernel:e48d414933d188cd' }" + + input: + tuple val(meta), path(fasta) + + output: + tuple val(meta), path('*.dict') , emit: dict + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + + def avail_mem = 6144 + if (!task.memory) { + log.info '[GATK CreateSequenceDictionary] Available memory not known - defaulting to 6GB. Specify process memory requirements to change this.' + } else { + avail_mem = (task.memory.mega*0.8).intValue() + } + """ + gatk --java-options "-Xmx${avail_mem}M -XX:-UsePerfData" \\ + CreateSequenceDictionary \\ + --REFERENCE $fasta \\ + --URI $fasta \\ + --TMP_DIR . \\ + $args + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + gatk4: \$(echo \$(gatk --version 2>&1) | sed 's/^.*(GATK) v//; s/ .*\$//') + END_VERSIONS + """ + + stub: + """ + touch ${fasta.baseName}.dict + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + gatk4: \$(echo \$(gatk --version 2>&1) | sed 's/^.*(GATK) v//; s/ .*\$//') + END_VERSIONS + """ +} diff --git a/modules/nf-core/gatk4/createsequencedictionary/meta.yml b/modules/nf-core/gatk4/createsequencedictionary/meta.yml new file mode 100644 index 00000000..7b5156bb --- /dev/null +++ b/modules/nf-core/gatk4/createsequencedictionary/meta.yml @@ -0,0 +1,49 @@ +name: gatk4_createsequencedictionary +description: Creates a sequence dictionary for a reference sequence +keywords: + - createsequencedictionary + - dictionary + - fasta + - gatk4 +tools: + - gatk: + description: | + Developed in the Data Sciences Platform at the Broad Institute, the toolkit offers a wide variety of tools + with a primary focus on variant discovery and genotyping. Its powerful processing engine + and high-performance computing features make it capable of taking on projects of any size. + homepage: https://gatk.broadinstitute.org/hc/en-us + documentation: https://gatk.broadinstitute.org/hc/en-us/categories/360002369672s + doi: 10.1158/1538-7445.AM2017-3590 + licence: ["Apache-2.0"] + identifier: "" +input: + - - meta: + type: map + description: | + Groovy Map containing reference information + e.g. [ id:'genome' ] + - fasta: + type: file + description: Input fasta file + pattern: "*.{fasta,fa}" +output: + - dict: + - meta: + type: file + description: gatk dictionary file + pattern: "*.{dict}" + - "*.dict": + type: file + description: gatk dictionary file + pattern: "*.{dict}" + - versions: + - versions.yml: + type: file + description: File containing software versions + pattern: "versions.yml" +authors: + - "@maxulysse" + - "@ramprasadn" +maintainers: + - "@maxulysse" + - "@ramprasadn" diff --git a/modules/nf-core/gatk4/createsequencedictionary/tests/main.nf.test b/modules/nf-core/gatk4/createsequencedictionary/tests/main.nf.test new file mode 100644 index 00000000..a8a9c6d2 --- /dev/null +++ b/modules/nf-core/gatk4/createsequencedictionary/tests/main.nf.test @@ -0,0 +1,56 @@ +nextflow_process { + + name "Test Process GATK4_CREATESEQUENCEDICTIONARY" + script "../main.nf" + process "GATK4_CREATESEQUENCEDICTIONARY" + + tag "modules" + tag "modules_nfcore" + tag "gatk4" + tag "gatk4/createsequencedictionary" + + test("sarscov2 - fasta") { + + when { + process { + """ + input[0] = [ [ id:'test' ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true) + ] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + + } + + test("sarscov2 - fasta - stub") { + + options "-stub" + + when { + process { + """ + input[0] = [ [ id:'test' ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true) + ] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + + } + +} diff --git a/modules/nf-core/gatk4/createsequencedictionary/tests/main.nf.test.snap b/modules/nf-core/gatk4/createsequencedictionary/tests/main.nf.test.snap new file mode 100644 index 00000000..e8a600fd --- /dev/null +++ b/modules/nf-core/gatk4/createsequencedictionary/tests/main.nf.test.snap @@ -0,0 +1,68 @@ +{ + "sarscov2 - fasta - stub": { + "content": [ + { + "0": [ + [ + { + "id": "test" + }, + "genome.dict:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "1": [ + "versions.yml:md5,e993b2c99f7f6b0fcd8428de15c61439" + ], + "dict": [ + [ + { + "id": "test" + }, + "genome.dict:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "versions": [ + "versions.yml:md5,e993b2c99f7f6b0fcd8428de15c61439" + ] + } + ], + "meta": { + "nf-test": "0.9.1", + "nextflow": "24.10.0" + }, + "timestamp": "2024-10-31T10:51:56.155954077" + }, + "sarscov2 - fasta": { + "content": [ + { + "0": [ + [ + { + "id": "test" + }, + "genome.dict:md5,7362679f176e0f52add03c08f457f646" + ] + ], + "1": [ + "versions.yml:md5,e993b2c99f7f6b0fcd8428de15c61439" + ], + "dict": [ + [ + { + "id": "test" + }, + "genome.dict:md5,7362679f176e0f52add03c08f457f646" + ] + ], + "versions": [ + "versions.yml:md5,e993b2c99f7f6b0fcd8428de15c61439" + ] + } + ], + "meta": { + "nf-test": "0.9.1", + "nextflow": "24.10.0" + }, + "timestamp": "2024-10-31T10:51:45.562993875" + } +} \ No newline at end of file diff --git a/modules/nf-core/gatk4/createsequencedictionary/tests/tags.yml b/modules/nf-core/gatk4/createsequencedictionary/tests/tags.yml new file mode 100644 index 00000000..035c5e4c --- /dev/null +++ b/modules/nf-core/gatk4/createsequencedictionary/tests/tags.yml @@ -0,0 +1,2 @@ +gatk4/createsequencedictionary: + - "modules/nf-core/gatk4/createsequencedictionary/**" diff --git a/modules/nf-core/gatk4/markduplicates/environment.yml b/modules/nf-core/gatk4/markduplicates/environment.yml new file mode 100644 index 00000000..ec65c32d --- /dev/null +++ b/modules/nf-core/gatk4/markduplicates/environment.yml @@ -0,0 +1,15 @@ +--- +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/environment-schema.json +channels: + - conda-forge + - bioconda + +dependencies: + # renovate: datasource=conda depName=bioconda/gatk4 + - bioconda::gatk4=4.6.1.0 + # renovate: datasource=conda depName=bioconda/gcnvkernel + - bioconda::gcnvkernel=0.9 + # renovate: datasource=conda depName=bioconda/htslib + - bioconda::htslib=1.19.1 + # renovate: datasource=conda depName=bioconda/samtools + - bioconda::samtools=1.19.2 diff --git a/modules/nf-core/gatk4/markduplicates/main.nf b/modules/nf-core/gatk4/markduplicates/main.nf new file mode 100644 index 00000000..cf770308 --- /dev/null +++ b/modules/nf-core/gatk4/markduplicates/main.nf @@ -0,0 +1,85 @@ +process GATK4_MARKDUPLICATES { + tag "$meta.id" + label 'process_low' + + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/mulled-v2-d9e7bad0f7fbc8f4458d5c3ab7ffaaf0235b59fb:7cc3d06cbf42e28c5e2ebfc7c858654c7340a9d5-0': + 'biocontainers/mulled-v2-d9e7bad0f7fbc8f4458d5c3ab7ffaaf0235b59fb:7cc3d06cbf42e28c5e2ebfc7c858654c7340a9d5-0' }" + + input: + tuple val(meta), path(bam) + path fasta + path fasta_fai + + output: + tuple val(meta), path("*cram"), emit: cram, optional: true + tuple val(meta), path("*bam"), emit: bam, optional: true + tuple val(meta), path("*.crai"), emit: crai, optional: true + tuple val(meta), path("*.bai"), emit: bai, optional: true + tuple val(meta), path("*.metrics"), emit: metrics + path "versions.yml", emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + prefix = task.ext.prefix ?: "${meta.id}.bam" + + // If the extension is CRAM, then change it to BAM + prefix_bam = prefix.tokenize('.')[-1] == 'cram' ? "${prefix.substring(0, prefix.lastIndexOf('.'))}.bam" : prefix + + def input_list = bam.collect{"--INPUT $it"}.join(' ') + def reference = fasta ? "--REFERENCE_SEQUENCE ${fasta}" : "" + + def avail_mem = 3072 + if (!task.memory) { + log.info '[GATK MarkDuplicates] Available memory not known - defaulting to 3GB. Specify process memory requirements to change this.' + } else { + avail_mem = (task.memory.mega*0.8).intValue() + } + + // Using samtools and not Markduplicates to compress to CRAM speeds up computation: + // https://medium.com/@acarroll.dna/looking-at-trade-offs-in-compression-levels-for-genomics-tools-eec2834e8b94 + """ + gatk --java-options "-Xmx${avail_mem}M -XX:-UsePerfData" \\ + MarkDuplicates \\ + $input_list \\ + --OUTPUT ${prefix_bam} \\ + --METRICS_FILE ${prefix}.metrics \\ + --TMP_DIR . \\ + ${reference} \\ + $args + + # If cram files are wished as output, the run samtools for conversion + if [[ ${prefix} == *.cram ]]; then + samtools view -Ch -T ${fasta} -o ${prefix} ${prefix_bam} + rm ${prefix_bam} + samtools index ${prefix} + fi + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + gatk4: \$(echo \$(gatk --version 2>&1) | sed 's/^.*(GATK) v//; s/ .*\$//') + samtools: \$(echo \$(samtools --version 2>&1) | sed 's/^.*samtools //; s/Using.*\$//') + END_VERSIONS + """ + + stub: + prefix = task.ext.prefix ?: "${meta.id}.bam" + prefix_no_suffix = task.ext.prefix ? prefix.tokenize('.')[0] : "${meta.id}" + """ + touch ${prefix_no_suffix}.bam + touch ${prefix_no_suffix}.cram + touch ${prefix_no_suffix}.cram.crai + touch ${prefix_no_suffix}.bai + touch ${prefix}.metrics + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + gatk4: \$(echo \$(gatk --version 2>&1) | sed 's/^.*(GATK) v//; s/ .*\$//') + samtools: \$(echo \$(samtools --version 2>&1) | sed 's/^.*samtools //; s/Using.*\$//') + END_VERSIONS + """ +} diff --git a/modules/nf-core/gatk4/markduplicates/meta.yml b/modules/nf-core/gatk4/markduplicates/meta.yml new file mode 100644 index 00000000..4772c5f3 --- /dev/null +++ b/modules/nf-core/gatk4/markduplicates/meta.yml @@ -0,0 +1,102 @@ +name: gatk4_markduplicates +description: This tool locates and tags duplicate reads in a BAM or SAM file, where + duplicate reads are defined as originating from a single fragment of DNA. +keywords: + - bam + - gatk4 + - markduplicates + - sort +tools: + - gatk4: + description: Developed in the Data Sciences Platform at the Broad Institute, the + toolkit offers a wide variety of tools with a primary focus on variant discovery + and genotyping. Its powerful processing engine and high-performance computing + features make it capable of taking on projects of any size. + homepage: https://gatk.broadinstitute.org/hc/en-us + documentation: https://gatk.broadinstitute.org/hc/en-us/articles/360037052812-MarkDuplicates-Picard- + tool_dev_url: https://github.com/broadinstitute/gatk + doi: 10.1158/1538-7445.AM2017-3590 + licence: ["MIT"] + identifier: "" +input: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - bam: + type: file + description: Sorted BAM file + pattern: "*.{bam}" + - - fasta: + type: file + description: Fasta file + pattern: "*.{fasta}" + - - fasta_fai: + type: file + description: Fasta index file + pattern: "*.{fai}" +output: + - cram: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*cram": + type: file + description: Marked duplicates CRAM file + pattern: "*.{cram}" + - bam: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*bam": + type: file + description: Marked duplicates BAM file + pattern: "*.{bam}" + - crai: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*.crai": + type: file + description: CRAM index file + pattern: "*.{cram.crai}" + - bai: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*.bai": + type: file + description: BAM index file + pattern: "*.{bam.bai}" + - metrics: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*.metrics": + type: file + description: Duplicate metrics file generated by GATK + pattern: "*.{metrics.txt}" + - versions: + - versions.yml: + type: file + description: File containing software versions + pattern: "versions.yml" +authors: + - "@ajodeh-juma" + - "@FriederikeHanssen" + - "@maxulysse" +maintainers: + - "@ajodeh-juma" + - "@FriederikeHanssen" + - "@maxulysse" diff --git a/modules/nf-core/gatk4/markduplicates/tests/bam.config b/modules/nf-core/gatk4/markduplicates/tests/bam.config new file mode 100644 index 00000000..0bbfbac3 --- /dev/null +++ b/modules/nf-core/gatk4/markduplicates/tests/bam.config @@ -0,0 +1,8 @@ +process { + + withName: GATK4_MARKDUPLICATES { + ext.args = '--CREATE_INDEX true' + ext.prefix = { "${meta.id}.bam" } + } + +} diff --git a/modules/nf-core/gatk4/markduplicates/tests/cram.config b/modules/nf-core/gatk4/markduplicates/tests/cram.config new file mode 100644 index 00000000..04a9b074 --- /dev/null +++ b/modules/nf-core/gatk4/markduplicates/tests/cram.config @@ -0,0 +1,8 @@ +process { + + withName: GATK4_MARKDUPLICATES { + ext.args = '--CREATE_INDEX true' + ext.prefix = { "${meta.id}.cram" } + } + +} diff --git a/modules/nf-core/gatk4/markduplicates/tests/main.nf.test b/modules/nf-core/gatk4/markduplicates/tests/main.nf.test new file mode 100644 index 00000000..bbcf74db --- /dev/null +++ b/modules/nf-core/gatk4/markduplicates/tests/main.nf.test @@ -0,0 +1,126 @@ +nextflow_process { + + name "Test Process GATK4_MARKDUPLICATES" + script "../main.nf" + process "GATK4_MARKDUPLICATES" + + tag "modules" + tag "modules_nfcore" + tag "gatk4" + tag "gatk4/markduplicates" + + test("sarscov2 - bam") { + config "./bam.config" + + when { + process { + """ + input[0] = [ + [ id:'test', single_end:false ], + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/bam/test.paired_end.sorted.bam', checkIfExists: true) + ] + input[1] = [] + input[2] = [] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out.bam).match("bam") }, + { assert snapshot(process.out.bai).match("bai") }, + { assert snapshot(process.out.versions).match("versions") }, + { assert snapshot(file(process.out.metrics[0][1]).name).match("test.metrics") } + ) + } + } + + test("homo_sapiens - multiple bam") { + config "./bam.config" + + when { + process { + """ + input[0] = [ + [ id:'test', single_end:false ], + [ + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/bam/test.paired_end.sorted.bam', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/bam/test2.paired_end.sorted.bam', checkIfExists: true) + ] + ] + input[1] = [] + input[2] = [] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out.bam).match("multi bam") }, + { assert snapshot(process.out.bai).match("multi bai") }, + { assert snapshot(process.out.versions).match("multi versions") }, + { assert snapshot(file(process.out.metrics[0][1]).name).match("multi test.metrics") } + ) + } + + } + + test("homo_sapiens - multiple cram") { + config "./cram.config" + + when { + process { + """ + input[0] = [ + [ id:'test', single_end:false ], // meta map + [ + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/bam/test.paired_end.sorted.bam', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/bam/test2.paired_end.sorted.bam', checkIfExists: true) + ] + ] + input[1] = file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.fasta', checkIfExists: true) + input[2] = file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.fasta.fai', checkIfExists: true) + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(file(process.out.cram[0][1]).name).match("multi cram")}, + { assert snapshot(file(process.out.crai[0][1]).name).match("multi crai") }, + { assert snapshot(process.out.versions).match("multi cram versions") }, + { assert snapshot(file(process.out.metrics[0][1]).name).match("multi cram test.metrics") } + ) + } + + } + + test("stub") { + config "./bam.config" + options "-stub" + + when { + process { + """ + input[0] = [ + [ id:'test', single_end:false ], + [] + ] + input[1] = [] + input[2] = [] + """ + } + } + + then { + assertAll( + { assert process.success } + ) + } + + } + +} diff --git a/modules/nf-core/gatk4/markduplicates/tests/main.nf.test.snap b/modules/nf-core/gatk4/markduplicates/tests/main.nf.test.snap new file mode 100644 index 00000000..336bb373 --- /dev/null +++ b/modules/nf-core/gatk4/markduplicates/tests/main.nf.test.snap @@ -0,0 +1,160 @@ +{ + "multi bam": { + "content": [ + [ + [ + { + "id": "test", + "single_end": false + }, + "test.bam:md5,8a808b1a94d2627c4d659a2151c4cb9f" + ] + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.0" + }, + "timestamp": "2024-02-13T15:21:36.059923" + }, + "multi crai": { + "content": [ + "test.cram.crai" + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.0" + }, + "timestamp": "2023-12-12T17:43:37.780426007" + }, + "multi bai": { + "content": [ + [ + [ + { + "id": "test", + "single_end": false + }, + "test.bai:md5,38b99c5f771895ecf5324c3186b9d452" + ] + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.0" + }, + "timestamp": "2024-02-13T15:21:36.09642" + }, + "versions": { + "content": [ + [ + "versions.yml:md5,c58bf16c6e3786cc4d17bb7249f9ffe5" + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.0" + }, + "timestamp": "2024-02-13T15:21:08.710549" + }, + "multi test.metrics": { + "content": [ + "test.bam.metrics" + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.0" + }, + "timestamp": "2023-12-12T17:43:11.732892667" + }, + "bai": { + "content": [ + [ + [ + { + "id": "test", + "single_end": false + }, + "test.bai:md5,26001bcdbce12e9f07557d8f7b8d360e" + ] + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.0" + }, + "timestamp": "2023-12-12T17:42:39.651888758" + }, + "multi cram versions": { + "content": [ + [ + "versions.yml:md5,c58bf16c6e3786cc4d17bb7249f9ffe5" + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.0" + }, + "timestamp": "2024-02-13T15:21:56.966376" + }, + "multi versions": { + "content": [ + [ + "versions.yml:md5,c58bf16c6e3786cc4d17bb7249f9ffe5" + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.0" + }, + "timestamp": "2024-02-13T15:21:36.138095" + }, + "multi cram test.metrics": { + "content": [ + "test.cram.metrics" + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.0" + }, + "timestamp": "2023-12-12T17:43:37.798977444" + }, + "multi cram": { + "content": [ + "test.cram" + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.0" + }, + "timestamp": "2023-12-12T17:43:37.771137858" + }, + "bam": { + "content": [ + [ + [ + { + "id": "test", + "single_end": false + }, + "test.bam:md5,75d914ba8804eaf2acf02ab432197ec9" + ] + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.0" + }, + "timestamp": "2024-02-13T15:21:08.645892" + }, + "test.metrics": { + "content": [ + "test.bam.metrics" + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.0" + }, + "timestamp": "2023-12-12T17:42:39.672508385" + } +} \ No newline at end of file diff --git a/modules/nf-core/gatk4/markduplicates/tests/tags.yml b/modules/nf-core/gatk4/markduplicates/tests/tags.yml new file mode 100644 index 00000000..8632e32b --- /dev/null +++ b/modules/nf-core/gatk4/markduplicates/tests/tags.yml @@ -0,0 +1,2 @@ +gatk4/markduplicates: + - "modules/nf-core/gatk4/markduplicates/**" diff --git a/modules/nf-core/gffread/environment.yml b/modules/nf-core/gffread/environment.yml new file mode 100644 index 00000000..ee239841 --- /dev/null +++ b/modules/nf-core/gffread/environment.yml @@ -0,0 +1,5 @@ +channels: + - conda-forge + - bioconda +dependencies: + - bioconda::gffread=0.12.7 diff --git a/modules/nf-core/gffread/main.nf b/modules/nf-core/gffread/main.nf new file mode 100644 index 00000000..da55cbab --- /dev/null +++ b/modules/nf-core/gffread/main.nf @@ -0,0 +1,60 @@ +process GFFREAD { + tag "$meta.id" + label 'process_low' + + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/gffread:0.12.7--hdcf5f25_4' : + 'biocontainers/gffread:0.12.7--hdcf5f25_4' }" + + input: + tuple val(meta), path(gff) + path fasta + + output: + tuple val(meta), path("*.gtf") , emit: gtf , optional: true + tuple val(meta), path("*.gff3") , emit: gffread_gff , optional: true + tuple val(meta), path("*.fasta"), emit: gffread_fasta , optional: true + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + def extension = args.contains("-T") ? 'gtf' : ( ( ['-w', '-x', '-y' ].any { args.contains(it) } ) ? 'fasta' : 'gff3' ) + def fasta_arg = fasta ? "-g $fasta" : '' + def output_name = "${prefix}.${extension}" + def output = extension == "fasta" ? "$output_name" : "-o $output_name" + def args_sorted = args.replaceAll(/(.*)(-[wxy])(.*)/) { all, pre, param, post -> "$pre $post $param" }.trim() + // args_sorted = Move '-w', '-x', and '-y' to the end of the args string as gffread expects the file name after these parameters + if ( "$output_name" in [ "$gff", "$fasta" ] ) error "Input and output names are the same, use \"task.ext.prefix\" to disambiguate!" + """ + gffread \\ + $gff \\ + $fasta_arg \\ + $args_sorted \\ + $output + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + gffread: \$(gffread --version 2>&1) + END_VERSIONS + """ + + stub: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + def extension = args.contains("-T") ? 'gtf' : ( ( ['-w', '-x', '-y' ].any { args.contains(it) } ) ? 'fasta' : 'gff3' ) + def output_name = "${prefix}.${extension}" + if ( "$output_name" in [ "$gff", "$fasta" ] ) error "Input and output names are the same, use \"task.ext.prefix\" to disambiguate!" + """ + touch $output_name + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + gffread: \$(gffread --version 2>&1) + END_VERSIONS + """ +} diff --git a/modules/nf-core/gffread/meta.yml b/modules/nf-core/gffread/meta.yml new file mode 100644 index 00000000..bebe7f57 --- /dev/null +++ b/modules/nf-core/gffread/meta.yml @@ -0,0 +1,75 @@ +name: gffread +description: Validate, filter, convert and perform various other operations on GFF + files +keywords: + - gff + - conversion + - validation +tools: + - gffread: + description: GFF/GTF utility providing format conversions, region filtering, FASTA + sequence extraction and more. + homepage: http://ccb.jhu.edu/software/stringtie/gff.shtml#gffread + documentation: http://ccb.jhu.edu/software/stringtie/gff.shtml#gffread + tool_dev_url: https://github.com/gpertea/gffread + doi: 10.12688/f1000research.23297.1 + licence: ["MIT"] + identifier: biotools:gffread +input: + - - meta: + type: map + description: | + Groovy Map containing meta data + e.g. [ id:'test' ] + - gff: + type: file + description: A reference file in either the GFF3, GFF2 or GTF format. + pattern: "*.{gff, gtf}" + - - fasta: + type: file + description: A multi-fasta file with the genomic sequences + pattern: "*.{fasta,fa,faa,fas,fsa}" +output: + - gtf: + - meta: + type: map + description: | + Groovy Map containing meta data + e.g. [ id:'test' ] + - "*.gtf": + type: file + description: GTF file resulting from the conversion of the GFF input file if + '-T' argument is present + pattern: "*.{gtf}" + - gffread_gff: + - meta: + type: map + description: | + Groovy Map containing meta data + e.g. [ id:'test' ] + - "*.gff3": + type: file + description: GFF3 file resulting from the conversion of the GFF input file if + '-T' argument is absent + pattern: "*.gff3" + - gffread_fasta: + - meta: + type: map + description: | + Groovy Map containing meta data + e.g. [ id:'test' ] + - "*.fasta": + type: file + description: Fasta file produced when either of '-w', '-x', '-y' parameters + is present + pattern: "*.fasta" + - versions: + - versions.yml: + type: file + description: File containing software versions + pattern: "versions.yml" +authors: + - "@edmundmiller" +maintainers: + - "@edmundmiller" + - "@gallvp" diff --git a/modules/nf-core/gffread/tests/main.nf.test b/modules/nf-core/gffread/tests/main.nf.test new file mode 100644 index 00000000..d039f367 --- /dev/null +++ b/modules/nf-core/gffread/tests/main.nf.test @@ -0,0 +1,224 @@ +nextflow_process { + + name "Test Process GFFREAD" + script "../main.nf" + process "GFFREAD" + + tag "gffread" + tag "modules_nfcore" + tag "modules" + + test("sarscov2-gff3-gtf") { + + config "./nextflow.config" + + when { + params { + outdir = "$outputDir" + } + process { + """ + input[0] = [ + [id: 'test'], + file(params.modules_testdata_base_path + "genomics/sarscov2/genome/genome.gff3", checkIfExists: true) + ] + input[1] = [] + + """ + } + } + + then { + assertAll ( + { assert process.success }, + { assert snapshot(process.out).match() }, + { assert process.out.gffread_gff == [] }, + { assert process.out.gffread_fasta == [] } + ) + } + + } + + test("sarscov2-gff3-gtf-stub") { + + options '-stub' + config "./nextflow.config" + + when { + params { + outdir = "$outputDir" + } + process { + """ + input[0] = [ + [id: 'test'], + file(params.modules_testdata_base_path + "genomics/sarscov2/genome/genome.gff3", checkIfExists: true) + ] + input[1] = [] + """ + } + } + + then { + assertAll ( + { assert process.success }, + { assert snapshot(process.out).match() }, + { assert process.out.gffread_gff == [] }, + { assert process.out.gffread_fasta == [] } + ) + } + + } + + test("sarscov2-gff3-gff3") { + + config "./nextflow-gff3.config" + + when { + params { + outdir = "$outputDir" + } + process { + """ + input[0] = [ + [id: 'test'], + file(params.modules_testdata_base_path + "genomics/sarscov2/genome/genome.gff3", checkIfExists: true) + ] + input[1] = [] + """ + } + } + + then { + assertAll ( + { assert process.success }, + { assert snapshot(process.out).match() }, + { assert process.out.gtf == [] }, + { assert process.out.gffread_fasta == [] } + ) + } + + } + + test("sarscov2-gff3-gff3-stub") { + + options '-stub' + config "./nextflow-gff3.config" + + when { + params { + outdir = "$outputDir" + } + process { + """ + input[0] = [ + [id: 'test'], + file(params.modules_testdata_base_path + "genomics/sarscov2/genome/genome.gff3", checkIfExists: true) + ] + input[1] = [] + """ + } + } + + then { + assertAll ( + { assert process.success }, + { assert snapshot(process.out).match() }, + { assert process.out.gtf == [] }, + { assert process.out.gffread_fasta == [] } + ) + } + + } + + test("sarscov2-gff3-fasta") { + + config "./nextflow-fasta.config" + + when { + params { + outdir = "$outputDir" + } + process { + """ + input[0] = [ + [id: 'test'], + file(params.modules_testdata_base_path + "genomics/sarscov2/genome/genome.gff3", checkIfExists: true) + ] + input[1] = file(params.modules_testdata_base_path + "genomics/sarscov2/genome/genome.fasta", checkIfExists: true) + """ + } + } + + then { + assertAll ( + { assert process.success }, + { assert snapshot(process.out).match() }, + { assert process.out.gtf == [] }, + { assert process.out.gffread_gff == [] } + ) + } + + } + + test("sarscov2-gff3-fasta-stub") { + + options '-stub' + config "./nextflow-fasta.config" + + when { + params { + outdir = "$outputDir" + } + process { + """ + input[0] = [ + [id: 'test'], + file(params.modules_testdata_base_path + "genomics/sarscov2/genome/genome.gff3", checkIfExists: true) + ] + input[1] = file(params.modules_testdata_base_path + "genomics/sarscov2/genome/genome.fasta", checkIfExists: true) + """ + } + } + + then { + assertAll ( + { assert process.success }, + { assert snapshot(process.out).match() }, + { assert process.out.gtf == [] }, + { assert process.out.gffread_gff == [] } + ) + } + + } + + test("sarscov2-gff3-fasta-fail-catch") { + + options '-stub' + config "./nextflow-fasta.config" + + when { + params { + outdir = "$outputDir" + } + process { + """ + input[0] = [ + [id: 'genome'], + file(params.modules_testdata_base_path + "genomics/sarscov2/genome/genome.gff3", checkIfExists: true) + ] + input[1] = file(params.modules_testdata_base_path + "genomics/sarscov2/genome/genome.fasta", checkIfExists: true) + """ + } + } + + then { + assertAll ( + { assert ! process.success }, + { assert process.stdout.toString().contains("Input and output names are the same") } + ) + } + + } + +} \ No newline at end of file diff --git a/modules/nf-core/gffread/tests/main.nf.test.snap b/modules/nf-core/gffread/tests/main.nf.test.snap new file mode 100644 index 00000000..15262320 --- /dev/null +++ b/modules/nf-core/gffread/tests/main.nf.test.snap @@ -0,0 +1,272 @@ +{ + "sarscov2-gff3-gtf": { + "content": [ + { + "0": [ + [ + { + "id": "test" + }, + "test.gtf:md5,1ea0ae98d3388e0576407dc4a24ef428" + ] + ], + "1": [ + + ], + "2": [ + + ], + "3": [ + "versions.yml:md5,05f671c6c6e530acedad0af0a5948dbd" + ], + "gffread_fasta": [ + + ], + "gffread_gff": [ + + ], + "gtf": [ + [ + { + "id": "test" + }, + "test.gtf:md5,1ea0ae98d3388e0576407dc4a24ef428" + ] + ], + "versions": [ + "versions.yml:md5,05f671c6c6e530acedad0af0a5948dbd" + ] + } + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-04-09T10:48:56.496187" + }, + "sarscov2-gff3-gff3": { + "content": [ + { + "0": [ + + ], + "1": [ + [ + { + "id": "test" + }, + "test.gff3:md5,c4e5da6267c6bee5899a2c204ae1ad91" + ] + ], + "2": [ + + ], + "3": [ + "versions.yml:md5,05f671c6c6e530acedad0af0a5948dbd" + ], + "gffread_fasta": [ + + ], + "gffread_gff": [ + [ + { + "id": "test" + }, + "test.gff3:md5,c4e5da6267c6bee5899a2c204ae1ad91" + ] + ], + "gtf": [ + + ], + "versions": [ + "versions.yml:md5,05f671c6c6e530acedad0af0a5948dbd" + ] + } + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-04-09T10:49:00.892782" + }, + "sarscov2-gff3-gtf-stub": { + "content": [ + { + "0": [ + [ + { + "id": "test" + }, + "test.gtf:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "1": [ + + ], + "2": [ + + ], + "3": [ + "versions.yml:md5,05f671c6c6e530acedad0af0a5948dbd" + ], + "gffread_fasta": [ + + ], + "gffread_gff": [ + + ], + "gtf": [ + [ + { + "id": "test" + }, + "test.gtf:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "versions": [ + "versions.yml:md5,05f671c6c6e530acedad0af0a5948dbd" + ] + } + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-04-09T11:11:26.975666" + }, + "sarscov2-gff3-fasta-stub": { + "content": [ + { + "0": [ + + ], + "1": [ + + ], + "2": [ + [ + { + "id": "test" + }, + "test.fasta:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "3": [ + "versions.yml:md5,05f671c6c6e530acedad0af0a5948dbd" + ], + "gffread_fasta": [ + [ + { + "id": "test" + }, + "test.fasta:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "gffread_gff": [ + + ], + "gtf": [ + + ], + "versions": [ + "versions.yml:md5,05f671c6c6e530acedad0af0a5948dbd" + ] + } + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-04-09T11:11:44.34792" + }, + "sarscov2-gff3-gff3-stub": { + "content": [ + { + "0": [ + + ], + "1": [ + [ + { + "id": "test" + }, + "test.gff3:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "2": [ + + ], + "3": [ + "versions.yml:md5,05f671c6c6e530acedad0af0a5948dbd" + ], + "gffread_fasta": [ + + ], + "gffread_gff": [ + [ + { + "id": "test" + }, + "test.gff3:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "gtf": [ + + ], + "versions": [ + "versions.yml:md5,05f671c6c6e530acedad0af0a5948dbd" + ] + } + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-04-09T11:11:35.221671" + }, + "sarscov2-gff3-fasta": { + "content": [ + { + "0": [ + + ], + "1": [ + + ], + "2": [ + [ + { + "id": "test" + }, + "test.fasta:md5,5f8108fb51739a0588ccf0a251de919a" + ] + ], + "3": [ + "versions.yml:md5,05f671c6c6e530acedad0af0a5948dbd" + ], + "gffread_fasta": [ + [ + { + "id": "test" + }, + "test.fasta:md5,5f8108fb51739a0588ccf0a251de919a" + ] + ], + "gffread_gff": [ + + ], + "gtf": [ + + ], + "versions": [ + "versions.yml:md5,05f671c6c6e530acedad0af0a5948dbd" + ] + } + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-04-09T10:54:02.88143" + } +} \ No newline at end of file diff --git a/modules/nf-core/gffread/tests/nextflow-fasta.config b/modules/nf-core/gffread/tests/nextflow-fasta.config new file mode 100644 index 00000000..ac6cb148 --- /dev/null +++ b/modules/nf-core/gffread/tests/nextflow-fasta.config @@ -0,0 +1,5 @@ +process { + withName: GFFREAD { + ext.args = '-w -S' + } +} diff --git a/modules/nf-core/gffread/tests/nextflow-gff3.config b/modules/nf-core/gffread/tests/nextflow-gff3.config new file mode 100644 index 00000000..afe0830e --- /dev/null +++ b/modules/nf-core/gffread/tests/nextflow-gff3.config @@ -0,0 +1,5 @@ +process { + withName: GFFREAD { + ext.args = '' + } +} diff --git a/modules/nf-core/gffread/tests/nextflow.config b/modules/nf-core/gffread/tests/nextflow.config new file mode 100644 index 00000000..74b25094 --- /dev/null +++ b/modules/nf-core/gffread/tests/nextflow.config @@ -0,0 +1,5 @@ +process { + withName: GFFREAD { + ext.args = '-T' + } +} diff --git a/modules/nf-core/gffread/tests/tags.yml b/modules/nf-core/gffread/tests/tags.yml new file mode 100644 index 00000000..05576065 --- /dev/null +++ b/modules/nf-core/gffread/tests/tags.yml @@ -0,0 +1,2 @@ +gffread: + - modules/nf-core/gffread/** diff --git a/modules/nf-core/picard/collectinsertsizemetrics/environment.yml b/modules/nf-core/picard/collectinsertsizemetrics/environment.yml new file mode 100644 index 00000000..1d715d56 --- /dev/null +++ b/modules/nf-core/picard/collectinsertsizemetrics/environment.yml @@ -0,0 +1,5 @@ +channels: + - conda-forge + - bioconda +dependencies: + - bioconda::picard=3.3.0 diff --git a/modules/nf-core/picard/collectinsertsizemetrics/main.nf b/modules/nf-core/picard/collectinsertsizemetrics/main.nf new file mode 100644 index 00000000..c3014d80 --- /dev/null +++ b/modules/nf-core/picard/collectinsertsizemetrics/main.nf @@ -0,0 +1,65 @@ +process PICARD_COLLECTINSERTSIZEMETRICS { + tag "$meta.id" + label 'process_single' + + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/picard:3.3.0--hdfd78af_0' : + 'biocontainers/picard:3.3.0--hdfd78af_0' }" + + input: + tuple val(meta), path(bam) + + output: + tuple val(meta), path("*.txt"), emit: metrics + tuple val(meta), path("*.pdf"), emit: histogram + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + + def avail_mem = 3072 + if (!task.memory) { + log.info '[Picard CollectInsertSizeMetrics] Available memory not known - defaulting to 3GB. Specify process memory requirements to change this.' + } else { + avail_mem = (task.memory.mega*0.8).intValue() + } + """ + picard \\ + -Xmx${avail_mem}M \\ + CollectInsertSizeMetrics \\ + $args \\ + --INPUT $bam \\ + --OUTPUT ${prefix}.txt \\ + --Histogram_FILE ${prefix}.pdf \\ + $args + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + picard: \$(picard CollectInsertSizeMetrics --version 2>&1 | grep -o 'Version:.*' | cut -f2- -d:) + END_VERSIONS + """ + + + stub: + def prefix = task.ext.prefix ?: "${meta.id}" + def avail_mem = 3072 + if (!task.memory) { + log.info '[Picard CollectInsertSizeMetrics] Available memory not known - defaulting to 3GB. Specify process memory requirements to change this.' + } + """ + touch ${prefix}.pdf + touch ${prefix}.txt + cat <<-END_VERSIONS > versions.yml + "${task.process}": + picard: \$(picard CollectInsertSizeMetrics --version 2>&1 | grep -o 'Version:.*' | cut -f2- -d:) + END_VERSIONS + """ + + + +} diff --git a/modules/nf-core/picard/collectinsertsizemetrics/meta.yml b/modules/nf-core/picard/collectinsertsizemetrics/meta.yml new file mode 100644 index 00000000..0947048d --- /dev/null +++ b/modules/nf-core/picard/collectinsertsizemetrics/meta.yml @@ -0,0 +1,56 @@ +name: "picard_collectinsertsizemetrics" +description: Collect metrics about the insert size distribution of a paired-end library. +keywords: + - metrics + - alignment + - insert + - statistics + - bam +tools: + - "picard": + description: "Java tools for working with NGS data in the BAM format" + homepage: "https://broadinstitute.github.io/picard/" + documentation: "https://broadinstitute.github.io/picard/" + tool_dev_url: "https://github.com/broadinstitute/picard" + licence: ["MIT"] + identifier: biotools:picard_tools +input: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - bam: + type: file + description: BAM/CRAM/SAM file + pattern: "*.{bam,cram,sam}" +output: + - metrics: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*.txt": + type: file + description: Values used by Picard to generate the insert size histograms + pattern: "*.txt" + - histogram: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*.pdf": + type: file + description: Insert size histogram in PDF format + pattern: "*.pdf" + - versions: + - versions.yml: + type: file + description: File containing software versions + pattern: "versions.yml" +authors: + - "@FerriolCalvet" +maintainers: + - "@FerriolCalvet" diff --git a/modules/nf-core/picard/collectinsertsizemetrics/tests/main.nf.test b/modules/nf-core/picard/collectinsertsizemetrics/tests/main.nf.test new file mode 100644 index 00000000..4cf7a332 --- /dev/null +++ b/modules/nf-core/picard/collectinsertsizemetrics/tests/main.nf.test @@ -0,0 +1,62 @@ + +nextflow_process { + + name "Test Process PICARD_COLLECTINSERTSIZEMETRICS" + script "../main.nf" + process "PICARD_COLLECTINSERTSIZEMETRICS" + + tag "modules" + tag "modules_nfcore" + tag "picard" + tag "picard/collectinsertsizemetrics" + + test("test-picard-collectinsertsizemetrics") { + + when { + process { + """ + input[0] = [ + [ id:'test', single_end:false ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/bam/test.paired_end.bam', checkIfExists: true) + ] + + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot( + file(process.out.metrics[0][1]).readLines()[5..8], + file(process.out.histogram[0][1]).name, + process.out.versions + ).match() + } + ) + } + } + + test("test-picard-collectinsertsizemetrics-stub") { + options '-stub' + when { + process { + """ + input[0] = [ + [ id:'test', single_end:false ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/bam/test.paired_end.bam', checkIfExists: true) + ] + + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + } + +} diff --git a/modules/nf-core/picard/collectinsertsizemetrics/tests/main.nf.test.snap b/modules/nf-core/picard/collectinsertsizemetrics/tests/main.nf.test.snap new file mode 100644 index 00000000..cbe9329d --- /dev/null +++ b/modules/nf-core/picard/collectinsertsizemetrics/tests/main.nf.test.snap @@ -0,0 +1,74 @@ +{ + "test-picard-collectinsertsizemetrics": { + "content": [ + [ + "## METRICS CLASS\tpicard.analysis.InsertSizeMetrics", + "MEDIAN_INSERT_SIZE\tMODE_INSERT_SIZE\tMEDIAN_ABSOLUTE_DEVIATION\tMIN_INSERT_SIZE\tMAX_INSERT_SIZE\tMEAN_INSERT_SIZE\tSTANDARD_DEVIATION\tREAD_PAIRS\tPAIR_ORIENTATION\tWIDTH_OF_10_PERCENT\tWIDTH_OF_20_PERCENT\tWIDTH_OF_30_PERCENT\tWIDTH_OF_40_PERCENT\tWIDTH_OF_50_PERCENT\tWIDTH_OF_60_PERCENT\tWIDTH_OF_70_PERCENT\tWIDTH_OF_80_PERCENT\tWIDTH_OF_90_PERCENT\tWIDTH_OF_95_PERCENT\tWIDTH_OF_99_PERCENT\tSAMPLE\tLIBRARY\tREAD_GROUP", + "209\t159\t46\t77\t364\t207.659794\t66.769018\t97\tFR\t25\t49\t59\t77\t93\t123\t145\t183\t223\t255\t311\t\t\t", + "" + ], + "test.pdf", + [ + "versions.yml:md5,38d39e9882afe7ac015213c286745056" + ] + ], + "meta": { + "nf-test": "0.9.0", + "nextflow": "24.04.4" + }, + "timestamp": "2024-10-18T10:55:45.769771444" + }, + "test-picard-collectinsertsizemetrics-stub": { + "content": [ + { + "0": [ + [ + { + "id": "test", + "single_end": false + }, + "test.txt:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "1": [ + [ + { + "id": "test", + "single_end": false + }, + "test.pdf:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "2": [ + "versions.yml:md5,38d39e9882afe7ac015213c286745056" + ], + "histogram": [ + [ + { + "id": "test", + "single_end": false + }, + "test.pdf:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "metrics": [ + [ + { + "id": "test", + "single_end": false + }, + "test.txt:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "versions": [ + "versions.yml:md5,38d39e9882afe7ac015213c286745056" + ] + } + ], + "meta": { + "nf-test": "0.9.0", + "nextflow": "24.04.4" + }, + "timestamp": "2024-10-18T10:56:09.914953495" + } +} \ No newline at end of file diff --git a/modules/nf-core/picard/collectrnaseqmetrics/environment.yml b/modules/nf-core/picard/collectrnaseqmetrics/environment.yml new file mode 100644 index 00000000..1d715d56 --- /dev/null +++ b/modules/nf-core/picard/collectrnaseqmetrics/environment.yml @@ -0,0 +1,5 @@ +channels: + - conda-forge + - bioconda +dependencies: + - bioconda::picard=3.3.0 diff --git a/modules/nf-core/picard/collectrnaseqmetrics/main.nf b/modules/nf-core/picard/collectrnaseqmetrics/main.nf new file mode 100644 index 00000000..eb80fdc7 --- /dev/null +++ b/modules/nf-core/picard/collectrnaseqmetrics/main.nf @@ -0,0 +1,62 @@ +process PICARD_COLLECTRNASEQMETRICS { + tag "$meta.id" + label 'process_single' + + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/picard:3.3.0--hdfd78af_0' : + 'biocontainers/picard:3.3.0--hdfd78af_0' }" + + input: + tuple val(meta), path(bam) + path ref_flat + path fasta + path rrna_intervals + + output: + tuple val(meta), path("*.rna_metrics") , emit: metrics + tuple val(meta), path("*.pdf") , emit: pdf, optional: true + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + def reference = fasta ? "--REFERENCE_SEQUENCE ${fasta}" : "" + def rrna = rrna_intervals ? "--RIBOSOMAL_INTERVALS ${rrna_intervals}" : "" + def avail_mem = 3072 + if (!task.memory) { + log.info '[Picard CollectRnaSeqMetrics] Available memory not known - defaulting to 3GB. Specify process memory requirements to change this.' + } else { + avail_mem = (task.memory.mega*0.8).intValue() + } + """ + picard \\ + -Xmx${avail_mem}M \\ + CollectRnaSeqMetrics \\ + $args \\ + $reference \\ + $rrna \\ + --REF_FLAT $ref_flat \\ + --INPUT $bam \\ + --OUTPUT ${prefix}.rna_metrics + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + picard: \$(echo \$(picard CollectRnaSeqMetrics --version 2>&1) | grep -o 'Version:.*' | cut -f2- -d:) + END_VERSIONS + """ + + stub: + def prefix = task.ext.prefix ?: "${meta.id}" + """ + touch ${prefix}.rna_metrics + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + picard: \$(echo \$(picard CollectRnaSeqMetrics --version 2>&1) | grep -o 'Version:.*' | cut -f2- -d:) + END_VERSIONS + """ +} diff --git a/modules/nf-core/picard/collectrnaseqmetrics/meta.yml b/modules/nf-core/picard/collectrnaseqmetrics/meta.yml new file mode 100644 index 00000000..15d146ba --- /dev/null +++ b/modules/nf-core/picard/collectrnaseqmetrics/meta.yml @@ -0,0 +1,68 @@ +name: "picard_collectrnaseqmetrics" +description: Collect metrics from a RNAseq BAM file +keywords: + - rna + - bam + - metrics + - alignment + - statistics + - quality +tools: + - "picard": + description: | + A set of command line tools (in Java) for manipulating high-throughput sequencing (HTS) + data and formats such as SAM/BAM/CRAM and VCF. + homepage: "https://broadinstitute.github.io/picard/" + documentation: "https://broadinstitute.github.io/picard/" + tool_dev_url: "https://github.com/broadinstitute/picard" + licence: ["MIT"] + identifier: biotools:picard_tools +input: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false, strandedness:true ] + - bam: + type: file + description: BAM/SAM file + pattern: "*.{bam,sam}" + - - ref_flat: + type: file + description: Genome ref_flat file + - - fasta: + type: file + description: Genome fasta file + - - rrna_intervals: + type: file + description: Interval file of ribosomal RNA regions +output: + - metrics: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*.rna_metrics": + type: file + description: RNA alignment metrics files generated by picard + pattern: "*.rna_metrics" + - pdf: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*.pdf": + type: file + description: Plot normalized position vs. coverage in a pdf file generated by + picard + - versions: + - versions.yml: + type: file + description: File containing software versions + pattern: "versions.yml" +authors: + - "@anoronh4" +maintainers: + - "@anoronh4" diff --git a/modules/nf-core/picard/collectrnaseqmetrics/tests/main.nf.test b/modules/nf-core/picard/collectrnaseqmetrics/tests/main.nf.test new file mode 100644 index 00000000..9ab18552 --- /dev/null +++ b/modules/nf-core/picard/collectrnaseqmetrics/tests/main.nf.test @@ -0,0 +1,81 @@ +nextflow_process { + + name "Test Process PICARD_COLLECTRNASEQMETRICS" + script "../main.nf" + process "PICARD_COLLECTRNASEQMETRICS" + config "./nextflow.config" + + tag "modules" + tag "modules_nfcore" + tag "picard" + tag "picard/collectrnaseqmetrics" + tag "ucsc/gtftogenepred" + + setup { + run("UCSC_GTFTOGENEPRED") { + script "../../../ucsc/gtftogenepred/main.nf" + process { + """ + input[0] = Channel.of([ + [ id:'test' ], + [ file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.gtf', checkIfExists: true) ] + ]) + """ + } + } + } + + test("sarscov2 - fasta - gtf") { + + when { + process { + """ + input[0] = [ + [ id:'test', single_end:false, strandedness:'forward' ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/bam/test.paired_end.sorted.bam', checkIfExists: true) + ] + input[1] = UCSC_GTFTOGENEPRED.out.refflat.map{ it[1] } + input[2] = file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true) + input[3] = [] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert file(process.out.metrics[0][1]).text.contains('CollectRnaSeqMetrics') }, + { assert snapshot( + process.out.versions, + process.out.pdf + ).match() } + ) + } + } + + test("sarscov2 - fasta - gtf - stub") { + + options "-stub" + + when { + process { + """ + input[0] = [ + [ id:'test', single_end:false, strandedness:'forward' ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/bam/test.paired_end.sorted.bam', checkIfExists: true) + ] + input[1] = UCSC_GTFTOGENEPRED.out.refflat.map{ it[1] } + input[2] = file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true) + input[3] = [] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + } +} diff --git a/modules/nf-core/picard/collectrnaseqmetrics/tests/main.nf.test.snap b/modules/nf-core/picard/collectrnaseqmetrics/tests/main.nf.test.snap new file mode 100644 index 00000000..ad6503af --- /dev/null +++ b/modules/nf-core/picard/collectrnaseqmetrics/tests/main.nf.test.snap @@ -0,0 +1,60 @@ +{ + "sarscov2 - fasta - gtf": { + "content": [ + [ + "versions.yml:md5,8d0867f89947c081711de09338138c2e" + ], + [ + + ] + ], + "meta": { + "nf-test": "0.9.0", + "nextflow": "24.04.4" + }, + "timestamp": "2024-10-18T16:10:39.199344417" + }, + "sarscov2 - fasta - gtf - stub": { + "content": [ + { + "0": [ + [ + { + "id": "test", + "single_end": false, + "strandedness": "forward" + }, + "test.rna_metrics:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "1": [ + + ], + "2": [ + "versions.yml:md5,8d0867f89947c081711de09338138c2e" + ], + "metrics": [ + [ + { + "id": "test", + "single_end": false, + "strandedness": "forward" + }, + "test.rna_metrics:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "pdf": [ + + ], + "versions": [ + "versions.yml:md5,8d0867f89947c081711de09338138c2e" + ] + } + ], + "meta": { + "nf-test": "0.9.0", + "nextflow": "24.04.4" + }, + "timestamp": "2024-10-18T16:10:57.248132065" + } +} \ No newline at end of file diff --git a/modules/nf-core/picard/collectrnaseqmetrics/tests/nextflow.config b/modules/nf-core/picard/collectrnaseqmetrics/tests/nextflow.config new file mode 100644 index 00000000..bc82e10c --- /dev/null +++ b/modules/nf-core/picard/collectrnaseqmetrics/tests/nextflow.config @@ -0,0 +1,13 @@ +process { + withName:UCSC_GTFTOGENEPRED { + ext.args = "-genePredExt -geneNameAsName2 -ignoreGroupsWithoutExons" + } + withName:PICARD_COLLECTRNASEQMETRICS { + ext.args = { ( meta.strandedness == "forward" || meta.single_end ) ? + "--STRAND_SPECIFICITY FIRST_READ_TRANSCRIPTION_STRAND" : + meta.strandedness == "reverse" ? + "--STRAND_SPECIFICITY SECOND_READ_TRANSCRIPTION_STRAND" : + "--STRAND_SPECIFICITY NONE" + } + } +} diff --git a/modules/nf-core/picard/collectwgsmetrics/environment.yml b/modules/nf-core/picard/collectwgsmetrics/environment.yml new file mode 100644 index 00000000..13265842 --- /dev/null +++ b/modules/nf-core/picard/collectwgsmetrics/environment.yml @@ -0,0 +1,6 @@ +channels: + - conda-forge + - bioconda +dependencies: + - bioconda::picard=3.3.0 + - conda-forge::r-base=4.4.1 diff --git a/modules/nf-core/picard/collectwgsmetrics/main.nf b/modules/nf-core/picard/collectwgsmetrics/main.nf new file mode 100644 index 00000000..39cf7d2b --- /dev/null +++ b/modules/nf-core/picard/collectwgsmetrics/main.nf @@ -0,0 +1,60 @@ +process PICARD_COLLECTWGSMETRICS { + tag "$meta.id" + label 'process_single' + + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/picard:3.3.0--hdfd78af_0' : + 'biocontainers/picard:3.3.0--hdfd78af_0' }" + + input: + tuple val(meta), path(bam), path(bai) + tuple val(meta2), path(fasta) + tuple val(meta3), path(fai) + path intervallist + + output: + tuple val(meta), path("*_metrics"), emit: metrics + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + def avail_mem = 3072 + def interval = intervallist ? "--INTERVALS ${intervallist}" : '' + if (!task.memory) { + log.info '[Picard CollectWgsMetrics] Available memory not known - defaulting to 3GB. Specify process memory requirements to change this.' + } else { + avail_mem = (task.memory.mega*0.8).intValue() + } + """ + picard \\ + -Xmx${avail_mem}M \\ + CollectWgsMetrics \\ + $args \\ + --INPUT $bam \\ + --OUTPUT ${prefix}.CollectWgsMetrics.coverage_metrics \\ + --REFERENCE_SEQUENCE ${fasta} \\ + $interval + + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + picard: \$(picard CollectWgsMetrics --version 2>&1 | grep -o 'Version.*' | cut -f2- -d:) + END_VERSIONS + """ + + stub: + def prefix = task.ext.prefix ?: "${meta.id}" + """ + touch ${prefix}.CollectWgsMetrics.coverage_metrics + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + picard: \$(picard CollectWgsMetrics --version 2>&1 | grep -o 'Version.*' | cut -f2- -d:) + END_VERSIONS + """ +} diff --git a/modules/nf-core/picard/collectwgsmetrics/meta.yml b/modules/nf-core/picard/collectwgsmetrics/meta.yml new file mode 100644 index 00000000..bb748080 --- /dev/null +++ b/modules/nf-core/picard/collectwgsmetrics/meta.yml @@ -0,0 +1,80 @@ +name: picard_collectwgsmetrics +description: Collect metrics about coverage and performance of whole genome sequencing + (WGS) experiments. +keywords: + - alignment + - metrics + - statistics + - quality + - bam +tools: + - picard: + description: | + A set of command line tools (in Java) for manipulating high-throughput sequencing (HTS) + data and formats such as SAM/BAM/CRAM and VCF. + homepage: https://broadinstitute.github.io/picard/ + documentation: https://broadinstitute.github.io/picard/ + licence: ["MIT"] + identifier: biotools:picard_tools +input: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - bam: + type: file + description: Aligned reads file + pattern: "*.{bam, cram}" + - bai: + type: file + description: (Optional) Aligned reads file index + pattern: "*.{bai,crai}" + - - meta2: + type: map + description: | + Groovy Map containing reference information + e.g. [ id:'genome' ] + - fasta: + type: file + description: Genome fasta file + pattern: "*.{fa,fasta,fna}" + - - meta3: + type: map + description: | + Groovy Map containing reference information + e.g. [ id:'genome' ] + - fai: + type: file + description: Genome fasta file index + pattern: "*.{fai}" + - - intervallist: + type: file + description: Picard Interval List. Defines which contigs to include. Can be + generated from a BED file with GATK BedToIntervalList. +output: + - metrics: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*_metrics": + type: file + description: Alignment metrics files generated by picard + pattern: "*_{metrics}" + - versions: + - versions.yml: + type: file + description: File containing software versions + pattern: "versions.yml" +authors: + - "@drpatelh" + - "@flowuenne" + - "@lassefolkersen" + - "@ramprasadn" +maintainers: + - "@drpatelh" + - "@flowuenne" + - "@lassefolkersen" + - "@ramprasadn" diff --git a/modules/nf-core/picard/collectwgsmetrics/tests/main.nf.test b/modules/nf-core/picard/collectwgsmetrics/tests/main.nf.test new file mode 100644 index 00000000..a3984566 --- /dev/null +++ b/modules/nf-core/picard/collectwgsmetrics/tests/main.nf.test @@ -0,0 +1,83 @@ + +nextflow_process { + + name "Test Process PICARD_COLLECTWGSMETRICS" + script "../main.nf" + process "PICARD_COLLECTWGSMETRICS" + + tag "modules" + tag "modules_nfcore" + tag "picard" + tag "picard/collectwgsmetrics" + + test("test-picard-collectwgsmetrics") { + + when { + process { + """ + input[0] = [ [ id:'test', single_end:false ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/bam/test.paired_end.sorted.bam', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/bam/test.paired_end.sorted.bam.bai', checkIfExists: true), + ] + input[1] = [ + [id:'genome'], + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true) + ] + input[2] = [ + [id:'genome'], + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta.fai', checkIfExists: true) + ] + input[3] = [] + + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot( + file(process.out.metrics[0][1]).text.contains('coverage high_quality_coverage_count'), + process.out.versions + ).match() + } + ) + } + } + + test("test-picard-collectwgsmetrics-with-interval") { + + when { + process { + """ + input[0] = [ [ id:'test', single_end:false ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/bam/test.paired_end.sorted.bam', checkIfExists: true), + [] + ] + input[1] = [ + [id:'genome'], + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true) + ] + input[2] = [ + [id:'genome'], + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta.fai', checkIfExists: true) + ] + input[3] = file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/picard/baits.interval_list', checkIfExists: true) + + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot( + file(process.out.metrics[0][1]).text.contains('coverage high_quality_coverage_count'), + process.out.versions + ).match() + } + ) + } + } + +} diff --git a/modules/nf-core/picard/collectwgsmetrics/tests/main.nf.test.snap b/modules/nf-core/picard/collectwgsmetrics/tests/main.nf.test.snap new file mode 100644 index 00000000..1958fcde --- /dev/null +++ b/modules/nf-core/picard/collectwgsmetrics/tests/main.nf.test.snap @@ -0,0 +1,28 @@ +{ + "test-picard-collectwgsmetrics-with-interval": { + "content": [ + true, + [ + "versions.yml:md5,9927db69fdd55176be5cdbd427d000c2" + ] + ], + "meta": { + "nf-test": "0.9.0", + "nextflow": "24.04.4" + }, + "timestamp": "2024-10-18T10:15:18.13771243" + }, + "test-picard-collectwgsmetrics": { + "content": [ + true, + [ + "versions.yml:md5,9927db69fdd55176be5cdbd427d000c2" + ] + ], + "meta": { + "nf-test": "0.9.0", + "nextflow": "24.04.4" + }, + "timestamp": "2024-10-18T10:14:57.786056996" + } +} \ No newline at end of file diff --git a/modules/nf-core/rrnatranscripts/environment.yml b/modules/nf-core/rrnatranscripts/environment.yml new file mode 100644 index 00000000..6f09494b --- /dev/null +++ b/modules/nf-core/rrnatranscripts/environment.yml @@ -0,0 +1,7 @@ +--- +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/environment-schema.json +channels: + - conda-forge + - bioconda +dependencies: + - python=3.12.2 diff --git a/modules/nf-core/rrnatranscripts/main.nf b/modules/nf-core/rrnatranscripts/main.nf new file mode 100644 index 00000000..982c53cd --- /dev/null +++ b/modules/nf-core/rrnatranscripts/main.nf @@ -0,0 +1,43 @@ +process RRNATRANSCRIPTS { + tag "$gtf" + label 'process_single' + + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/python:3.12' : + 'biocontainers/python:3.12' }" + + input: + path(gtf) + + output: + path("*rrna_intervals.gtf") , emit: rrna_gtf, optional: true + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def prefix = task.ext.prefix ?: "${gtf.baseName}" + """ + grep -E '^#|rRNA' ${gtf} > ${prefix}_rrna_intervals.gtf || true + if [ ! -s ${prefix}_rrna_intervals.gtf ]; then + rm ${prefix}_rrna_intervals.gtf + fi + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + python: \$(python --version | sed -e "s/Python //g") + END_VERSIONS + """ + + stub: + def prefix = task.ext.prefix ?: "${gtf.baseName}" + """ + touch ${prefix}_rrna_intervals.gtf + cat <<-END_VERSIONS > versions.yml + "${task.process}": + python: \$(python --version | sed -e "s/Python //g") + END_VERSIONS + """ +} diff --git a/modules/nf-core/rrnatranscripts/meta.yml b/modules/nf-core/rrnatranscripts/meta.yml new file mode 100644 index 00000000..93f6a10e --- /dev/null +++ b/modules/nf-core/rrnatranscripts/meta.yml @@ -0,0 +1,34 @@ +name: rrnatranscripts +description: Ribosomal RNA extraction from a GTF file. +keywords: + - ribosomal + - rna + - genomics +tools: + - rrnatranscripts: + description: | + Extraction of ribosomal RNA + homepage: https://github.com/nf-core/rnafusion + licence: ["GPL-3.0-or-later"] + identifier: "" +input: + - - gtf: + type: file + description: GTF file + pattern: "*.{gtf}" +output: + # + - rrna_gtf: + - "*rrna_intervals.gtf": + type: file + description: GTF file with ribosomal RNA only + pattern: "*.{gtf}" + - versions: + - versions.yml: + type: file + description: File containing software versions + pattern: "versions.yml" +authors: + - "@rannick" +maintainers: + - "@rannick" diff --git a/modules/nf-core/rrnatranscripts/templates/get_rrna_transcripts.py b/modules/nf-core/rrnatranscripts/templates/get_rrna_transcripts.py new file mode 100644 index 00000000..ea3ce0f4 --- /dev/null +++ b/modules/nf-core/rrnatranscripts/templates/get_rrna_transcripts.py @@ -0,0 +1,83 @@ +#!/usr/bin/env python3 + +import logging +import platform +import sys +from pathlib import Path + +# Configure logging +logging.basicConfig(format="%(name)s - %(asctime)s %(levelname)s: %(message)s") +logger = logging.getLogger(__name__) +logger.setLevel(logging.INFO) + + +def get_rrna_intervals(gtf: str, rrna_transcripts: str): + """ + Get lines containing ``#`` or ``gene_type rRNA`` or ```` or ``gene_type rRNA_pseudogene`` or ``gene_type MT_rRNA`` + Create output file + + Args: + file_in (pathlib.Path): The given GTF file. + file_out (pathlib.Path): Where the ribosomal RNA GTF file should + be created; always in GTF format. + """ + patterns = { + "#", + 'transcript_biotype "Mt_rRNA"', + 'transcript_biotype "rRNA"', + 'transcript_biotype "rRNA_pseudogene"', + } + line_starts = {"MT", "1", "2", "3", "4", "5", "6", "7", "8", "9"} + out_lines = [] + path_gtf = Path(gtf) + path_rrna_transcripts = Path(rrna_transcripts) + if not path_gtf.is_file(): + logger.error(f"The given input file {gtf} was not found!") + sys.exit(2) + with path_gtf.open() as f: + data = f.readlines() + for line in data: + for pattern in patterns: + if pattern in line: + for line_start in line_starts: + if line.startswith(line_start): + out_lines.append(line) + if out_lines != []: + with path_rrna_transcripts.open(mode="w") as out_file: + out_file.writelines(out_lines) + + +def format_yaml_like(data: dict, indent: int = 0) -> str: + """Formats a dictionary to a YAML-like string. + + Args: + data (dict): The dictionary to format. + indent (int): The current indentation level. + + Returns: + str: A string formatted as YAML. + """ + yaml_str = "" + for key, value in data.items(): + spaces = " " * indent + if isinstance(value, dict): + yaml_str += f"{spaces}{key}:\\n{format_yaml_like(value, indent + 1)}" + else: + yaml_str += f"{spaces}{key}: {value}\\n" + return yaml_str + + +if __name__ == "__main__": + if "${task.ext.prefix}" != "null": + prefix = "${task.ext.prefix}." + else: + prefix = "${task.ext.gtf}." + + if not get_rrna_intervals("$gtf", f"{prefix}_rrna_intervals.gtf"): + logging.error("Failed to extract rrna transcipts.") + + # Write the versions + versions_this_module = {} + versions_this_module["${task.process}"] = {"python": platform.python_version()} + with open("versions.yml", "w") as f: + f.write(format_yaml_like(versions_this_module)) diff --git a/modules/nf-core/rrnatranscripts/tests/main.nf.test b/modules/nf-core/rrnatranscripts/tests/main.nf.test new file mode 100644 index 00000000..db0d5565 --- /dev/null +++ b/modules/nf-core/rrnatranscripts/tests/main.nf.test @@ -0,0 +1,53 @@ +nextflow_process { + + name "Test Process RRNATRANSCRIPTS" + script "../main.nf" + process "RRNATRANSCRIPTS" + + tag "modules" + tag "modules_nfcore" + tag "rrnatranscripts" + + test("homo_sapiens - gtf") { + + when { + process { + """ + input[0] = [ + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.gtf', checkIfExists: true) + ] """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + + } + + test("homo_sapiens - gtf - stub") { + + options "-stub" + + when { + process { + """ + input[0] = [ + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.gtf', checkIfExists: true) + ] """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + + } + +} diff --git a/modules/nf-core/rrnatranscripts/tests/main.nf.test.snap b/modules/nf-core/rrnatranscripts/tests/main.nf.test.snap new file mode 100644 index 00000000..a190ddcb --- /dev/null +++ b/modules/nf-core/rrnatranscripts/tests/main.nf.test.snap @@ -0,0 +1,40 @@ +{ + "homo_sapiens - gtf": { + "content": [ + { + "0": [ + + ], + "1": [ + "versions.yml:md5,fa2f785dbbe87a180f4254910957e01e" + ], + "rrna_gtf": [ + + ], + "versions": [ + "versions.yml:md5,fa2f785dbbe87a180f4254910957e01e" + ] + } + ], + "timestamp": "2024-05-21T13:55:44.831609" + }, + "homo_sapiens - gtf - stub": { + "content": [ + { + "0": [ + "genome_rrna_intervals.gtf:md5,d41d8cd98f00b204e9800998ecf8427e" + ], + "1": [ + "versions.yml:md5,fa2f785dbbe87a180f4254910957e01e" + ], + "rrna_gtf": [ + "genome_rrna_intervals.gtf:md5,d41d8cd98f00b204e9800998ecf8427e" + ], + "versions": [ + "versions.yml:md5,fa2f785dbbe87a180f4254910957e01e" + ] + } + ], + "timestamp": "2024-05-21T13:55:56.134136" + } +} \ No newline at end of file diff --git a/modules/nf-core/rrnatranscripts/tests/tags.yml b/modules/nf-core/rrnatranscripts/tests/tags.yml new file mode 100644 index 00000000..ade15226 --- /dev/null +++ b/modules/nf-core/rrnatranscripts/tests/tags.yml @@ -0,0 +1,2 @@ +rrnatranscripts: + - "modules/nf-core/rrnatranscripts/**" diff --git a/modules/nf-core/salmon/index/environment.yml b/modules/nf-core/salmon/index/environment.yml new file mode 100644 index 00000000..b3f75777 --- /dev/null +++ b/modules/nf-core/salmon/index/environment.yml @@ -0,0 +1,5 @@ +channels: + - conda-forge + - bioconda +dependencies: + - bioconda::salmon=1.10.3 diff --git a/modules/nf-core/salmon/index/main.nf b/modules/nf-core/salmon/index/main.nf new file mode 100644 index 00000000..3d653c0d --- /dev/null +++ b/modules/nf-core/salmon/index/main.nf @@ -0,0 +1,72 @@ +process SALMON_INDEX { + tag "$transcript_fasta" + label "process_medium" + + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/salmon:1.10.3--h6dccd9a_2' : + 'biocontainers/salmon:1.10.3--h6dccd9a_2' }" + + input: + path genome_fasta + path transcript_fasta + + output: + path "salmon" , emit: index + path "versions.yml", emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def get_decoy_ids = "grep '^>' $genome_fasta | cut -d ' ' -f 1 | cut -d \$'\\t' -f 1 > decoys.txt" + def gentrome = "gentrome.fa" + if (genome_fasta.endsWith('.gz')) { + get_decoy_ids = "grep '^>' <(gunzip -c $genome_fasta) | cut -d ' ' -f 1 | cut -d \$'\\t' -f 1 > decoys.txt" + gentrome = "gentrome.fa.gz" + } + """ + $get_decoy_ids + sed -i.bak -e 's/>//g' decoys.txt + cat $transcript_fasta $genome_fasta > $gentrome + + salmon \\ + index \\ + --threads $task.cpus \\ + -t $gentrome \\ + -d decoys.txt \\ + $args \\ + -i salmon + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + salmon: \$(echo \$(salmon --version) | sed -e "s/salmon //g") + END_VERSIONS + """ + + stub: + """ + mkdir salmon + touch salmon/complete_ref_lens.bin + touch salmon/ctable.bin + touch salmon/ctg_offsets.bin + touch salmon/duplicate_clusters.tsv + touch salmon/info.json + touch salmon/mphf.bin + touch salmon/pos.bin + touch salmon/pre_indexing.log + touch salmon/rank.bin + touch salmon/refAccumLengths.bin + touch salmon/ref_indexing.log + touch salmon/reflengths.bin + touch salmon/refseq.bin + touch salmon/seq.bin + touch salmon/versionInfo.json + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + salmon: \$(echo \$(salmon --version) | sed -e "s/salmon //g") + END_VERSIONS + """ +} diff --git a/modules/nf-core/salmon/index/meta.yml b/modules/nf-core/salmon/index/meta.yml new file mode 100644 index 00000000..48486a2b --- /dev/null +++ b/modules/nf-core/salmon/index/meta.yml @@ -0,0 +1,40 @@ +name: salmon_index +description: Create index for salmon +keywords: + - index + - fasta + - genome + - reference +tools: + - salmon: + description: | + Salmon is a tool for wicked-fast transcript quantification from RNA-seq data + homepage: https://salmon.readthedocs.io/en/latest/salmon.html + manual: https://salmon.readthedocs.io/en/latest/salmon.html + doi: 10.1038/nmeth.4197 + licence: ["GPL-3.0-or-later"] + identifier: biotools:salmon +input: + - - genome_fasta: + type: file + description: Fasta file of the reference genome + - - transcript_fasta: + type: file + description: Fasta file of the reference transcriptome +output: + - index: + - salmon: + type: directory + description: Folder containing the star index files + pattern: "salmon" + - versions: + - versions.yml: + type: file + description: File containing software versions + pattern: "versions.yml" +authors: + - "@kevinmenden" + - "@drpatelh" +maintainers: + - "@kevinmenden" + - "@drpatelh" diff --git a/modules/nf-core/salmon/index/tests/main.nf.test b/modules/nf-core/salmon/index/tests/main.nf.test new file mode 100644 index 00000000..16b3c1a7 --- /dev/null +++ b/modules/nf-core/salmon/index/tests/main.nf.test @@ -0,0 +1,59 @@ +nextflow_process { + + name "Test Process SALMON_INDEX" + script "../main.nf" + process "SALMON_INDEX" + tag "modules" + tag "modules_nfcore" + tag "salmon" + tag "salmon/index" + + test("sarscov2") { + + when { + params { + outdir = "$outputDir" + } + process { + """ + input[0] = Channel.of([file(params.modules_testdata_base_path + "genomics/homo_sapiens/genome/genome.fasta", checkIfExists: true)]) + input[1] = Channel.of([file(params.modules_testdata_base_path + "genomics/sarscov2/genome/transcriptome.fasta", checkIfExists: true)]) + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert path(process.out.index.get(0)).exists() }, + { assert snapshot(process.out.versions).match("versions") } + ) + } + + } + + test("sarscov2 stub") { + options "-stub" + when { + params { + outdir = "$outputDir" + } + process { + """ + input[0] = Channel.of([file(params.modules_testdata_base_path + "genomics/homo_sapiens/genome/genome.fasta", checkIfExists: true)]) + input[1] = Channel.of([file(params.modules_testdata_base_path + "genomics/sarscov2/genome/transcriptome.fasta", checkIfExists: true)]) + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert path(process.out.index.get(0)).exists() }, + { assert snapshot(process.out.versions).match("versions stub") } + ) + } + + } + +} \ No newline at end of file diff --git a/modules/nf-core/salmon/index/tests/main.nf.test.snap b/modules/nf-core/salmon/index/tests/main.nf.test.snap new file mode 100644 index 00000000..e5899b51 --- /dev/null +++ b/modules/nf-core/salmon/index/tests/main.nf.test.snap @@ -0,0 +1,26 @@ +{ + "versions": { + "content": [ + [ + "versions.yml:md5,85337fa0a286ea35073ee5260974e307" + ] + ], + "meta": { + "nf-test": "0.9.0", + "nextflow": "24.04.4" + }, + "timestamp": "2024-10-18T10:00:47.087293189" + }, + "versions stub": { + "content": [ + [ + "versions.yml:md5,85337fa0a286ea35073ee5260974e307" + ] + ], + "meta": { + "nf-test": "0.9.0", + "nextflow": "24.04.4" + }, + "timestamp": "2024-10-18T10:01:03.89824494" + } +} \ No newline at end of file diff --git a/modules/nf-core/salmon/index/tests/tags.yml b/modules/nf-core/salmon/index/tests/tags.yml new file mode 100644 index 00000000..02997890 --- /dev/null +++ b/modules/nf-core/salmon/index/tests/tags.yml @@ -0,0 +1,2 @@ +salmon/index: + - modules/nf-core/salmon/index/** diff --git a/modules/nf-core/salmon/quant/environment.yml b/modules/nf-core/salmon/quant/environment.yml new file mode 100644 index 00000000..b3f75777 --- /dev/null +++ b/modules/nf-core/salmon/quant/environment.yml @@ -0,0 +1,5 @@ +channels: + - conda-forge + - bioconda +dependencies: + - bioconda::salmon=1.10.3 diff --git a/modules/nf-core/salmon/quant/main.nf b/modules/nf-core/salmon/quant/main.nf new file mode 100644 index 00000000..f1e3b5cd --- /dev/null +++ b/modules/nf-core/salmon/quant/main.nf @@ -0,0 +1,96 @@ +process SALMON_QUANT { + tag "$meta.id" + label "process_medium" + + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/salmon:1.10.3--h6dccd9a_2' : + 'biocontainers/salmon:1.10.3--h6dccd9a_2' }" + + input: + tuple val(meta), path(reads) + path index + path gtf + path transcript_fasta + val alignment_mode + val lib_type + + output: + tuple val(meta), path("${prefix}") , emit: results + tuple val(meta), path("*info.json") , emit: json_info, optional: true + tuple val(meta), path("*lib_format_counts.json"), emit: lib_format_counts, optional: true + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + prefix = task.ext.prefix ?: "${meta.id}" + + def reference = "--index $index" + def reads1 = [], reads2 = [] + meta.single_end ? [reads].flatten().each{reads1 << it} : reads.eachWithIndex{ v, ix -> ( ix & 1 ? reads2 : reads1) << v } + def input_reads = meta.single_end ? "-r ${reads1.join(" ")}" : "-1 ${reads1.join(" ")} -2 ${reads2.join(" ")}" + if (alignment_mode) { + reference = "-t $transcript_fasta" + input_reads = "-a $reads" + } + + def strandedness_opts = [ + 'A', 'U', 'SF', 'SR', + 'IS', 'IU' , 'ISF', 'ISR', + 'OS', 'OU' , 'OSF', 'OSR', + 'MS', 'MU' , 'MSF', 'MSR' + ] + def strandedness = 'A' + if (lib_type) { + if (strandedness_opts.contains(lib_type)) { + strandedness = lib_type + } else { + log.info "[Salmon Quant] Invalid library type specified '--libType=${lib_type}', defaulting to auto-detection with '--libType=A'." + } + } else { + strandedness = meta.single_end ? 'U' : 'IU' + if (meta.strandedness == 'forward') { + strandedness = meta.single_end ? 'SF' : 'ISF' + } else if (meta.strandedness == 'reverse') { + strandedness = meta.single_end ? 'SR' : 'ISR' + } + } + """ + salmon quant \\ + --geneMap $gtf \\ + --threads $task.cpus \\ + --libType=$strandedness \\ + $reference \\ + $input_reads \\ + $args \\ + -o $prefix + + if [ -f $prefix/aux_info/meta_info.json ]; then + cp $prefix/aux_info/meta_info.json "${prefix}_meta_info.json" + fi + if [ -f $prefix/lib_format_counts.json ]; then + cp $prefix/lib_format_counts.json "${prefix}_lib_format_counts.json" + fi + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + salmon: \$(echo \$(salmon --version) | sed -e "s/salmon //g") + END_VERSIONS + """ + + stub: + prefix = task.ext.prefix ?: "${meta.id}" + """ + mkdir ${prefix} + touch ${prefix}_meta_info.json + touch ${prefix}_lib_format_counts.json + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + salmon: \$(echo \$(salmon --version) | sed -e "s/salmon //g") + END_VERSIONS + """ +} diff --git a/modules/nf-core/salmon/quant/meta.yml b/modules/nf-core/salmon/quant/meta.yml new file mode 100644 index 00000000..4cacde0f --- /dev/null +++ b/modules/nf-core/salmon/quant/meta.yml @@ -0,0 +1,84 @@ +name: salmon_quant +description: gene/transcript quantification with Salmon +keywords: + - index + - fasta + - genome + - reference +tools: + - salmon: + description: | + Salmon is a tool for wicked-fast transcript quantification from RNA-seq data + homepage: https://salmon.readthedocs.io/en/latest/salmon.html + manual: https://salmon.readthedocs.io/en/latest/salmon.html + doi: 10.1038/nmeth.4197 + licence: ["GPL-3.0-or-later"] + identifier: biotools:salmon +input: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - reads: + type: file + description: | + List of input FastQ files for single-end or paired-end data. + Multiple single-end fastqs or pairs of paired-end fastqs are + handled. + - - index: + type: directory + description: Folder containing the star index files + - - gtf: + type: file + description: GTF of the reference transcriptome + - - transcript_fasta: + type: file + description: Fasta file of the reference transcriptome + - - alignment_mode: + type: boolean + description: whether to run salmon in alignment mode + - - lib_type: + type: string + description: | + Override library type inferred based on strandedness defined in meta object +output: + - results: + - meta: + type: directory + description: Folder containing the quantification results for a specific sample + pattern: "${prefix}" + - ${prefix}: + type: directory + description: Folder containing the quantification results for a specific sample + pattern: "${prefix}" + - json_info: + - meta: + type: file + description: File containing meta information from Salmon quant + pattern: "*info.json" + - "*info.json": + type: file + description: File containing meta information from Salmon quant + pattern: "*info.json" + - lib_format_counts: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*lib_format_counts.json": + type: file + description: File containing the library format counts + pattern: "*lib_format_counts.json" + - versions: + - versions.yml: + type: file + description: File containing software versions + pattern: "versions.yml" +authors: + - "@kevinmenden" + - "@drpatelh" +maintainers: + - "@kevinmenden" + - "@drpatelh" diff --git a/modules/nf-core/salmon/quant/tests/main.nf.test b/modules/nf-core/salmon/quant/tests/main.nf.test new file mode 100644 index 00000000..2964cc3d --- /dev/null +++ b/modules/nf-core/salmon/quant/tests/main.nf.test @@ -0,0 +1,320 @@ +nextflow_process { + + name "Test Process SALMON_QUANT" + script "../main.nf" + process "SALMON_QUANT" + config "./nextflow.config" + tag "modules" + tag "modules_nfcore" + tag "salmon" + tag "salmon/quant" + tag "salmon/index" + + setup { + run("SALMON_INDEX") { + script "../../../salmon/index/main.nf" + process { + """ + input[0] = Channel.of([file(params.modules_testdata_base_path + "genomics/homo_sapiens/genome/genome.fasta", checkIfExists: true)]) + input[1] = Channel.of([file(params.modules_testdata_base_path + "genomics/sarscov2/genome/transcriptome.fasta", checkIfExists: true)]) + """ + } + } + } + + test("sarscov2 - single_end") { + + when { + params { + outdir = "$outputDir" + } + process { + """ + input[0] = Channel.of([ + [ id:'test', single_end: true ], // meta map + [ file(params.modules_testdata_base_path + "genomics/sarscov2/illumina/fastq/test_1.fastq.gz", checkIfExists: true) ] + ]) + input[1] = SALMON_INDEX.out.index + input[2] = Channel.of([file(params.modules_testdata_base_path + "genomics/sarscov2/genome/genome.gtf", checkIfExists: true)]) + input[3] = Channel.of([file(params.modules_testdata_base_path + "genomics/sarscov2/genome/transcriptome.fasta", checkIfExists: true)]) + input[4] = false + input[5] = '' + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert path(process.out.json_info.get(0).get(1)).exists() }, + { assert path(process.out.results.get(0).get(1)).exists() }, + { assert snapshot( + process.out.versions, + process.out.lib_format_counts + ).match() } + ) + } + + } + + test("sarscov2 - single_end stub") { + options "-stub" + when { + params { + outdir = "$outputDir" + } + process { + """ + input[0] = Channel.of([ + [ id:'test', single_end: true ], // meta map + [ file(params.modules_testdata_base_path + "genomics/sarscov2/illumina/fastq/test_1.fastq.gz", checkIfExists: true) ] + ]) + input[1] = SALMON_INDEX.out.index + input[2] = Channel.of([file(params.modules_testdata_base_path + "genomics/sarscov2/genome/genome.gtf", checkIfExists: true)]) + input[3] = Channel.of([file(params.modules_testdata_base_path + "genomics/sarscov2/genome/transcriptome.fasta", checkIfExists: true)]) + input[4] = false + input[5] = '' + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert path(process.out.json_info.get(0).get(1)).exists() }, + { assert path(process.out.results.get(0).get(1)).exists() }, + { assert snapshot( + process.out.versions, + process.out.lib_format_counts + ).match() } + ) + } + + } + + test("sarscov2 - single_end lib type A") { + + when { + params { + outdir = "$outputDir" + } + process { + """ + input[0] = Channel.of([ + [ id:'test', single_end: true ], // meta map + [ file(params.modules_testdata_base_path + "genomics/sarscov2/illumina/fastq/test_1.fastq.gz", checkIfExists: true) ] + ]) + input[1] = SALMON_INDEX.out.index + input[2] = Channel.of([file(params.modules_testdata_base_path + "genomics/sarscov2/genome/genome.gtf", checkIfExists: true)]) + input[3] = Channel.of([file(params.modules_testdata_base_path + "genomics/sarscov2/genome/transcriptome.fasta", checkIfExists: true)]) + input[4] = false + input[5] = 'A' + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert path(process.out.json_info.get(0).get(1)).exists() }, + { assert path(process.out.results.get(0).get(1)).exists() }, + { assert snapshot( + process.out.versions, + process.out.lib_format_counts + ).match() } + ) + } + + } + + test("sarscov2 - single_end lib type A stub") { + options "-stub" + when { + params { + outdir = "$outputDir" + } + process { + """ + input[0] = Channel.of([ + [ id:'test', single_end: true ], // meta map + [ file(params.modules_testdata_base_path + "genomics/sarscov2/illumina/fastq/test_1.fastq.gz", checkIfExists: true) ] + ]) + input[1] = SALMON_INDEX.out.index + input[2] = Channel.of([file(params.modules_testdata_base_path + "genomics/sarscov2/genome/genome.gtf", checkIfExists: true)]) + input[3] = Channel.of([file(params.modules_testdata_base_path + "genomics/sarscov2/genome/transcriptome.fasta", checkIfExists: true)]) + input[4] = false + input[5] = 'A' + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert path(process.out.json_info.get(0).get(1)).exists() }, + { assert path(process.out.results.get(0).get(1)).exists() }, + { assert snapshot( + process.out.versions, + process.out.lib_format_counts + ).match() } + ) + } + + } + + test("sarscov2 - pair_end") { + + when { + params { + outdir = "$outputDir" + } + process { + """ + input[0] = Channel.of([ + [ id:'test', single_end: true ], // meta map + [ + file(params.modules_testdata_base_path + "genomics/sarscov2/illumina/fastq/test_1.fastq.gz", checkIfExists: true), + file(params.modules_testdata_base_path + "genomics/sarscov2/illumina/fastq/test_2.fastq.gz", checkIfExists: true) + ] + ]) + input[1] = SALMON_INDEX.out.index + input[2] = Channel.of([file(params.modules_testdata_base_path + "genomics/sarscov2/genome/genome.gtf", checkIfExists: true)]) + input[3] = Channel.of([file(params.modules_testdata_base_path + "genomics/sarscov2/genome/transcriptome.fasta", checkIfExists: true)]) + input[4] = false + input[5] = '' + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert path(process.out.json_info.get(0).get(1)).exists() }, + { assert path(process.out.results.get(0).get(1)).exists() }, + { assert snapshot( + process.out.versions, + process.out.lib_format_counts + ).match() } + ) + } + + } + + test("sarscov2 - pair_end stub") { + options "-stub" + when { + params { + outdir = "$outputDir" + } + process { + """ + input[0] = Channel.of([ + [ id:'test', single_end: true ], // meta map + [ + file(params.modules_testdata_base_path + "genomics/sarscov2/illumina/fastq/test_1.fastq.gz", checkIfExists: true), + file(params.modules_testdata_base_path + "genomics/sarscov2/illumina/fastq/test_2.fastq.gz", checkIfExists: true) + ] + ]) + input[1] = SALMON_INDEX.out.index + input[2] = Channel.of([file(params.modules_testdata_base_path + "genomics/sarscov2/genome/genome.gtf", checkIfExists: true)]) + input[3] = Channel.of([file(params.modules_testdata_base_path + "genomics/sarscov2/genome/transcriptome.fasta", checkIfExists: true)]) + input[4] = false + input[5] = '' + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert path(process.out.json_info.get(0).get(1)).exists() }, + { assert path(process.out.results.get(0).get(1)).exists() }, + { assert snapshot( + process.out.versions, + process.out.lib_format_counts + ).match() } + ) + } + + } + + test("sarscov2 - pair_end multiple") { + + when { + params { + outdir = "$outputDir" + } + process { + """ + input[0] = Channel.of([ + [ id:'test', single_end:false ], // meta map + [ + file(params.modules_testdata_base_path + "genomics/sarscov2/illumina/fastq/test_1.fastq.gz", checkIfExists: true), + file(params.modules_testdata_base_path + "genomics/sarscov2/illumina/fastq/test_2.fastq.gz", checkIfExists: true), + file(params.modules_testdata_base_path + "genomics/sarscov2/illumina/fastq/test2_1.fastq.gz", checkIfExists: true), + file(params.modules_testdata_base_path + "genomics/sarscov2/illumina/fastq/test2_2.fastq.gz", checkIfExists: true) + ] + ]) + input[1] = SALMON_INDEX.out.index + input[2] = Channel.of([file(params.modules_testdata_base_path + "genomics/sarscov2/genome/genome.gtf", checkIfExists: true)]) + input[3] = Channel.of([file(params.modules_testdata_base_path + "genomics/sarscov2/genome/transcriptome.fasta", checkIfExists: true)]) + input[4] = false + input[5] = '' + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert path(process.out.json_info.get(0).get(1)).exists() }, + { assert path(process.out.results.get(0).get(1)).exists() }, + { assert snapshot( + process.out.versions, + process.out.lib_format_counts + ).match() } + ) + } + + } + + test("sarscov2 - pair_end multiple stub") { + options "-stub" + when { + params { + outdir = "$outputDir" + } + process { + """ + input[0] = Channel.of([ + [ id:'test', single_end:false ], // meta map + [ + file(params.modules_testdata_base_path + "genomics/sarscov2/illumina/fastq/test_1.fastq.gz", checkIfExists: true), + file(params.modules_testdata_base_path + "genomics/sarscov2/illumina/fastq/test_2.fastq.gz", checkIfExists: true), + file(params.modules_testdata_base_path + "genomics/sarscov2/illumina/fastq/test2_1.fastq.gz", checkIfExists: true), + file(params.modules_testdata_base_path + "genomics/sarscov2/illumina/fastq/test2_2.fastq.gz", checkIfExists: true) + ] + ]) + input[1] = SALMON_INDEX.out.index + input[2] = Channel.of([file(params.modules_testdata_base_path + "genomics/sarscov2/genome/genome.gtf", checkIfExists: true)]) + input[3] = Channel.of([file(params.modules_testdata_base_path + "genomics/sarscov2/genome/transcriptome.fasta", checkIfExists: true)]) + input[4] = false + input[5] = '' + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert path(process.out.json_info.get(0).get(1)).exists() }, + { assert path(process.out.results.get(0).get(1)).exists() }, + { assert snapshot( + process.out.versions, + process.out.lib_format_counts + ).match() } + ) + } + + } +} diff --git a/modules/nf-core/salmon/quant/tests/main.nf.test.snap b/modules/nf-core/salmon/quant/tests/main.nf.test.snap new file mode 100644 index 00000000..ea22a80c --- /dev/null +++ b/modules/nf-core/salmon/quant/tests/main.nf.test.snap @@ -0,0 +1,170 @@ +{ + "sarscov2 - single_end": { + "content": [ + [ + "versions.yml:md5,0d510d5db6398e2c8ca9443330740607" + ], + [ + [ + { + "id": "test", + "single_end": true + }, + "test_lib_format_counts.json:md5,c7999dfccd32c090d94e5951522eecd4" + ] + ] + ], + "meta": { + "nf-test": "0.9.0", + "nextflow": "24.04.4" + }, + "timestamp": "2024-10-18T10:01:16.989080539" + }, + "sarscov2 - single_end stub": { + "content": [ + [ + "versions.yml:md5,0d510d5db6398e2c8ca9443330740607" + ], + [ + [ + { + "id": "test", + "single_end": true + }, + "test_lib_format_counts.json:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ] + ], + "meta": { + "nf-test": "0.9.0", + "nextflow": "24.04.4" + }, + "timestamp": "2024-10-18T10:01:29.340996235" + }, + "sarscov2 - single_end lib type A": { + "content": [ + [ + "versions.yml:md5,0d510d5db6398e2c8ca9443330740607" + ], + [ + [ + { + "id": "test", + "single_end": true + }, + "test_lib_format_counts.json:md5,c7999dfccd32c090d94e5951522eecd4" + ] + ] + ], + "meta": { + "nf-test": "0.9.0", + "nextflow": "24.04.4" + }, + "timestamp": "2024-10-18T10:01:43.056167576" + }, + "sarscov2 - pair_end multiple": { + "content": [ + [ + "versions.yml:md5,0d510d5db6398e2c8ca9443330740607" + ], + [ + [ + { + "id": "test", + "single_end": false + }, + "test_lib_format_counts.json:md5,4a2ee0fac91a4a3471872808d8bd3ff8" + ] + ] + ], + "meta": { + "nf-test": "0.9.0", + "nextflow": "24.04.4" + }, + "timestamp": "2024-10-18T10:03:05.500792631" + }, + "sarscov2 - pair_end multiple stub": { + "content": [ + [ + "versions.yml:md5,0d510d5db6398e2c8ca9443330740607" + ], + [ + [ + { + "id": "test", + "single_end": false + }, + "test_lib_format_counts.json:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ] + ], + "meta": { + "nf-test": "0.9.0", + "nextflow": "24.04.4" + }, + "timestamp": "2024-10-18T10:03:26.428959203" + }, + "sarscov2 - single_end lib type A stub": { + "content": [ + [ + "versions.yml:md5,0d510d5db6398e2c8ca9443330740607" + ], + [ + [ + { + "id": "test", + "single_end": true + }, + "test_lib_format_counts.json:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ] + ], + "meta": { + "nf-test": "0.9.0", + "nextflow": "24.04.4" + }, + "timestamp": "2024-10-18T10:02:03.420850208" + }, + "sarscov2 - pair_end": { + "content": [ + [ + "versions.yml:md5,0d510d5db6398e2c8ca9443330740607" + ], + [ + [ + { + "id": "test", + "single_end": true + }, + "test_lib_format_counts.json:md5,e9516e73c9fb39145513b2a41a0af95f" + ] + ] + ], + "meta": { + "nf-test": "0.9.0", + "nextflow": "24.04.4" + }, + "timestamp": "2024-10-18T10:02:16.130074696" + }, + "sarscov2 - pair_end stub": { + "content": [ + [ + "versions.yml:md5,0d510d5db6398e2c8ca9443330740607" + ], + [ + [ + { + "id": "test", + "single_end": true + }, + "test_lib_format_counts.json:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ] + ], + "meta": { + "nf-test": "0.9.0", + "nextflow": "24.04.4" + }, + "timestamp": "2024-10-18T10:02:39.470004547" + } +} \ No newline at end of file diff --git a/modules/nf-core/salmon/quant/tests/nextflow.config b/modules/nf-core/salmon/quant/tests/nextflow.config new file mode 100644 index 00000000..37c08212 --- /dev/null +++ b/modules/nf-core/salmon/quant/tests/nextflow.config @@ -0,0 +1,7 @@ +process { + + withName: SALMON_QUANT { + ext.args = '--minAssignedFrags 1' + } + +} diff --git a/modules/nf-core/salmon/quant/tests/tags.yml b/modules/nf-core/salmon/quant/tests/tags.yml new file mode 100644 index 00000000..048d8164 --- /dev/null +++ b/modules/nf-core/salmon/quant/tests/tags.yml @@ -0,0 +1,2 @@ +salmon/quant: + - modules/nf-core/salmon/quant/** diff --git a/modules/nf-core/samtools/faidx/environment.yml b/modules/nf-core/samtools/faidx/environment.yml new file mode 100644 index 00000000..62054fc9 --- /dev/null +++ b/modules/nf-core/samtools/faidx/environment.yml @@ -0,0 +1,8 @@ +--- +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/environment-schema.json +channels: + - conda-forge + - bioconda +dependencies: + - bioconda::htslib=1.21 + - bioconda::samtools=1.21 diff --git a/modules/nf-core/samtools/faidx/main.nf b/modules/nf-core/samtools/faidx/main.nf new file mode 100644 index 00000000..28c0a81c --- /dev/null +++ b/modules/nf-core/samtools/faidx/main.nf @@ -0,0 +1,50 @@ +process SAMTOOLS_FAIDX { + tag "$fasta" + label 'process_single' + + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/samtools:1.21--h50ea8bc_0' : + 'biocontainers/samtools:1.21--h50ea8bc_0' }" + + input: + tuple val(meta), path(fasta) + tuple val(meta2), path(fai) + + output: + tuple val(meta), path ("*.{fa,fasta}") , emit: fa , optional: true + tuple val(meta), path ("*.fai") , emit: fai, optional: true + tuple val(meta), path ("*.gzi") , emit: gzi, optional: true + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + """ + samtools \\ + faidx \\ + $fasta \\ + $args + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + samtools: \$(echo \$(samtools --version 2>&1) | sed 's/^.*samtools //; s/Using.*\$//') + END_VERSIONS + """ + + stub: + def match = (task.ext.args =~ /-o(?:utput)?\s(.*)\s?/).findAll() + def fastacmd = match[0] ? "touch ${match[0][1]}" : '' + """ + ${fastacmd} + touch ${fasta}.fai + + cat <<-END_VERSIONS > versions.yml + + "${task.process}": + samtools: \$(echo \$(samtools --version 2>&1) | sed 's/^.*samtools //; s/Using.*\$//') + END_VERSIONS + """ +} diff --git a/modules/nf-core/samtools/faidx/meta.yml b/modules/nf-core/samtools/faidx/meta.yml new file mode 100644 index 00000000..6721b2cb --- /dev/null +++ b/modules/nf-core/samtools/faidx/meta.yml @@ -0,0 +1,80 @@ +name: samtools_faidx +description: Index FASTA file +keywords: + - index + - fasta + - faidx +tools: + - samtools: + description: | + SAMtools is a set of utilities for interacting with and post-processing + short DNA sequence read alignments in the SAM, BAM and CRAM formats, written by Heng Li. + These files are generated as output by short read aligners like BWA. + homepage: http://www.htslib.org/ + documentation: http://www.htslib.org/doc/samtools.html + doi: 10.1093/bioinformatics/btp352 + licence: ["MIT"] + identifier: biotools:samtools +input: + - - meta: + type: map + description: | + Groovy Map containing reference information + e.g. [ id:'test' ] + - fasta: + type: file + description: FASTA file + pattern: "*.{fa,fasta}" + - - meta2: + type: map + description: | + Groovy Map containing reference information + e.g. [ id:'test' ] + - fai: + type: file + description: FASTA index file + pattern: "*.{fai}" +output: + - fa: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*.{fa,fasta}": + type: file + description: FASTA file + pattern: "*.{fa}" + - fai: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*.fai": + type: file + description: FASTA index file + pattern: "*.{fai}" + - gzi: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*.gzi": + type: file + description: Optional gzip index file for compressed inputs + pattern: "*.gzi" + - versions: + - versions.yml: + type: file + description: File containing software versions + pattern: "versions.yml" +authors: + - "@drpatelh" + - "@ewels" + - "@phue" +maintainers: + - "@drpatelh" + - "@ewels" + - "@phue" diff --git a/modules/nf-core/samtools/faidx/tests/main.nf.test b/modules/nf-core/samtools/faidx/tests/main.nf.test new file mode 100644 index 00000000..17244ef2 --- /dev/null +++ b/modules/nf-core/samtools/faidx/tests/main.nf.test @@ -0,0 +1,122 @@ +nextflow_process { + + name "Test Process SAMTOOLS_FAIDX" + script "../main.nf" + process "SAMTOOLS_FAIDX" + + tag "modules" + tag "modules_nfcore" + tag "samtools" + tag "samtools/faidx" + + test("test_samtools_faidx") { + + when { + process { + """ + input[0] = [ [ id:'test', single_end:false ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true) ] + + input[1] = [[],[]] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + } + + test("test_samtools_faidx_bgzip") { + + when { + process { + """ + input[0] = [ [ id:'test', single_end:false ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta.gz', checkIfExists: true)] + + input[1] = [[],[]] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + } + + test("test_samtools_faidx_fasta") { + + config "./nextflow.config" + + when { + process { + """ + input[0] = [ [ id:'test', single_end:false ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true) ] + + input[1] = [ [ id:'test', single_end:false ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta.fai', checkIfExists: true) ] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + } + + test("test_samtools_faidx_stub_fasta") { + + config "./nextflow2.config" + + when { + process { + """ + input[0] = [ [ id:'test', single_end:false ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true) ] + + input[1] = [ [ id:'test', single_end:false ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta.fai', checkIfExists: true) ] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + } + + test("test_samtools_faidx_stub_fai") { + + when { + process { + """ + input[0] = [ [ id:'test', single_end:false ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true) ] + + input[1] = [[],[]] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + } +} \ No newline at end of file diff --git a/modules/nf-core/samtools/faidx/tests/main.nf.test.snap b/modules/nf-core/samtools/faidx/tests/main.nf.test.snap new file mode 100644 index 00000000..1bbb3ec2 --- /dev/null +++ b/modules/nf-core/samtools/faidx/tests/main.nf.test.snap @@ -0,0 +1,249 @@ +{ + "test_samtools_faidx": { + "content": [ + { + "0": [ + + ], + "1": [ + [ + { + "id": "test", + "single_end": false + }, + "genome.fasta.fai:md5,9da2a56e2853dc8c0b86a9e7229c9fe5" + ] + ], + "2": [ + + ], + "3": [ + "versions.yml:md5,6bbe80a2e14bd61202ca63e12d66027f" + ], + "fa": [ + + ], + "fai": [ + [ + { + "id": "test", + "single_end": false + }, + "genome.fasta.fai:md5,9da2a56e2853dc8c0b86a9e7229c9fe5" + ] + ], + "gzi": [ + + ], + "versions": [ + "versions.yml:md5,6bbe80a2e14bd61202ca63e12d66027f" + ] + } + ], + "meta": { + "nf-test": "0.9.0", + "nextflow": "24.04.4" + }, + "timestamp": "2024-09-16T07:57:47.450887871" + }, + "test_samtools_faidx_bgzip": { + "content": [ + { + "0": [ + + ], + "1": [ + [ + { + "id": "test", + "single_end": false + }, + "genome.fasta.gz.fai:md5,9da2a56e2853dc8c0b86a9e7229c9fe5" + ] + ], + "2": [ + [ + { + "id": "test", + "single_end": false + }, + "genome.fasta.gz.gzi:md5,7dea362b3fac8e00956a4952a3d4f474" + ] + ], + "3": [ + "versions.yml:md5,6bbe80a2e14bd61202ca63e12d66027f" + ], + "fa": [ + + ], + "fai": [ + [ + { + "id": "test", + "single_end": false + }, + "genome.fasta.gz.fai:md5,9da2a56e2853dc8c0b86a9e7229c9fe5" + ] + ], + "gzi": [ + [ + { + "id": "test", + "single_end": false + }, + "genome.fasta.gz.gzi:md5,7dea362b3fac8e00956a4952a3d4f474" + ] + ], + "versions": [ + "versions.yml:md5,6bbe80a2e14bd61202ca63e12d66027f" + ] + } + ], + "meta": { + "nf-test": "0.9.0", + "nextflow": "24.04.4" + }, + "timestamp": "2024-09-16T07:58:04.804905659" + }, + "test_samtools_faidx_fasta": { + "content": [ + { + "0": [ + [ + { + "id": "test", + "single_end": false + }, + "extract.fa:md5,6a0774a0ad937ba0bfd2ac7457d90f36" + ] + ], + "1": [ + + ], + "2": [ + + ], + "3": [ + "versions.yml:md5,6bbe80a2e14bd61202ca63e12d66027f" + ], + "fa": [ + [ + { + "id": "test", + "single_end": false + }, + "extract.fa:md5,6a0774a0ad937ba0bfd2ac7457d90f36" + ] + ], + "fai": [ + + ], + "gzi": [ + + ], + "versions": [ + "versions.yml:md5,6bbe80a2e14bd61202ca63e12d66027f" + ] + } + ], + "meta": { + "nf-test": "0.9.0", + "nextflow": "24.04.4" + }, + "timestamp": "2024-09-16T07:58:23.831268154" + }, + "test_samtools_faidx_stub_fasta": { + "content": [ + { + "0": [ + [ + { + "id": "test", + "single_end": false + }, + "extract.fa:md5,9da2a56e2853dc8c0b86a9e7229c9fe5" + ] + ], + "1": [ + + ], + "2": [ + + ], + "3": [ + "versions.yml:md5,6bbe80a2e14bd61202ca63e12d66027f" + ], + "fa": [ + [ + { + "id": "test", + "single_end": false + }, + "extract.fa:md5,9da2a56e2853dc8c0b86a9e7229c9fe5" + ] + ], + "fai": [ + + ], + "gzi": [ + + ], + "versions": [ + "versions.yml:md5,6bbe80a2e14bd61202ca63e12d66027f" + ] + } + ], + "meta": { + "nf-test": "0.9.0", + "nextflow": "24.04.4" + }, + "timestamp": "2024-09-16T07:58:35.600243706" + }, + "test_samtools_faidx_stub_fai": { + "content": [ + { + "0": [ + + ], + "1": [ + [ + { + "id": "test", + "single_end": false + }, + "genome.fasta.fai:md5,9da2a56e2853dc8c0b86a9e7229c9fe5" + ] + ], + "2": [ + + ], + "3": [ + "versions.yml:md5,6bbe80a2e14bd61202ca63e12d66027f" + ], + "fa": [ + + ], + "fai": [ + [ + { + "id": "test", + "single_end": false + }, + "genome.fasta.fai:md5,9da2a56e2853dc8c0b86a9e7229c9fe5" + ] + ], + "gzi": [ + + ], + "versions": [ + "versions.yml:md5,6bbe80a2e14bd61202ca63e12d66027f" + ] + } + ], + "meta": { + "nf-test": "0.9.0", + "nextflow": "24.04.4" + }, + "timestamp": "2024-09-16T07:58:54.705460167" + } +} \ No newline at end of file diff --git a/modules/nf-core/samtools/faidx/tests/nextflow.config b/modules/nf-core/samtools/faidx/tests/nextflow.config new file mode 100644 index 00000000..f76a3ba0 --- /dev/null +++ b/modules/nf-core/samtools/faidx/tests/nextflow.config @@ -0,0 +1,7 @@ +process { + + withName: SAMTOOLS_FAIDX { + ext.args = 'MT192765.1 -o extract.fa' + } + +} diff --git a/modules/nf-core/samtools/faidx/tests/nextflow2.config b/modules/nf-core/samtools/faidx/tests/nextflow2.config new file mode 100644 index 00000000..33ebbd5d --- /dev/null +++ b/modules/nf-core/samtools/faidx/tests/nextflow2.config @@ -0,0 +1,6 @@ +process { + + withName: SAMTOOLS_FAIDX { + ext.args = '-o extract.fa' + } +} diff --git a/modules/nf-core/samtools/faidx/tests/tags.yml b/modules/nf-core/samtools/faidx/tests/tags.yml new file mode 100644 index 00000000..e4a83948 --- /dev/null +++ b/modules/nf-core/samtools/faidx/tests/tags.yml @@ -0,0 +1,2 @@ +samtools/faidx: + - modules/nf-core/samtools/faidx/** diff --git a/modules/nf-core/samtools/index/environment.yml b/modules/nf-core/samtools/index/environment.yml new file mode 100644 index 00000000..62054fc9 --- /dev/null +++ b/modules/nf-core/samtools/index/environment.yml @@ -0,0 +1,8 @@ +--- +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/environment-schema.json +channels: + - conda-forge + - bioconda +dependencies: + - bioconda::htslib=1.21 + - bioconda::samtools=1.21 diff --git a/modules/nf-core/samtools/index/main.nf b/modules/nf-core/samtools/index/main.nf new file mode 100644 index 00000000..31175610 --- /dev/null +++ b/modules/nf-core/samtools/index/main.nf @@ -0,0 +1,49 @@ +process SAMTOOLS_INDEX { + tag "$meta.id" + label 'process_low' + + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/samtools:1.21--h50ea8bc_0' : + 'biocontainers/samtools:1.21--h50ea8bc_0' }" + + input: + tuple val(meta), path(input) + + output: + tuple val(meta), path("*.bai") , optional:true, emit: bai + tuple val(meta), path("*.csi") , optional:true, emit: csi + tuple val(meta), path("*.crai"), optional:true, emit: crai + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + """ + samtools \\ + index \\ + -@ ${task.cpus-1} \\ + $args \\ + $input + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + samtools: \$(echo \$(samtools --version 2>&1) | sed 's/^.*samtools //; s/Using.*\$//') + END_VERSIONS + """ + + stub: + def args = task.ext.args ?: '' + def extension = file(input).getExtension() == 'cram' ? + "crai" : args.contains("-c") ? "csi" : "bai" + """ + touch ${input}.${extension} + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + samtools: \$(echo \$(samtools --version 2>&1) | sed 's/^.*samtools //; s/Using.*\$//') + END_VERSIONS + """ +} diff --git a/modules/nf-core/samtools/index/meta.yml b/modules/nf-core/samtools/index/meta.yml new file mode 100644 index 00000000..db8df0d5 --- /dev/null +++ b/modules/nf-core/samtools/index/meta.yml @@ -0,0 +1,71 @@ +name: samtools_index +description: Index SAM/BAM/CRAM file +keywords: + - index + - bam + - sam + - cram +tools: + - samtools: + description: | + SAMtools is a set of utilities for interacting with and post-processing + short DNA sequence read alignments in the SAM, BAM and CRAM formats, written by Heng Li. + These files are generated as output by short read aligners like BWA. + homepage: http://www.htslib.org/ + documentation: http://www.htslib.org/doc/samtools.html + doi: 10.1093/bioinformatics/btp352 + licence: ["MIT"] + identifier: biotools:samtools +input: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - input: + type: file + description: input file +output: + - bai: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*.bai": + type: file + description: BAM/CRAM/SAM index file + pattern: "*.{bai,crai,sai}" + - csi: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*.csi": + type: file + description: CSI index file + pattern: "*.{csi}" + - crai: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*.crai": + type: file + description: BAM/CRAM/SAM index file + pattern: "*.{bai,crai,sai}" + - versions: + - versions.yml: + type: file + description: File containing software versions + pattern: "versions.yml" +authors: + - "@drpatelh" + - "@ewels" + - "@maxulysse" +maintainers: + - "@drpatelh" + - "@ewels" + - "@maxulysse" diff --git a/modules/nf-core/samtools/index/tests/csi.nextflow.config b/modules/nf-core/samtools/index/tests/csi.nextflow.config new file mode 100644 index 00000000..0ed260ef --- /dev/null +++ b/modules/nf-core/samtools/index/tests/csi.nextflow.config @@ -0,0 +1,7 @@ +process { + + withName: SAMTOOLS_INDEX { + ext.args = '-c' + } + +} diff --git a/modules/nf-core/samtools/index/tests/main.nf.test b/modules/nf-core/samtools/index/tests/main.nf.test new file mode 100644 index 00000000..ca34fb5c --- /dev/null +++ b/modules/nf-core/samtools/index/tests/main.nf.test @@ -0,0 +1,140 @@ +nextflow_process { + + name "Test Process SAMTOOLS_INDEX" + script "../main.nf" + process "SAMTOOLS_INDEX" + tag "modules" + tag "modules_nfcore" + tag "samtools" + tag "samtools/index" + + test("bai") { + when { + process { + """ + input[0] = Channel.of([ + [ id:'test', single_end:false ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/bam/test.paired_end.sorted.bam', checkIfExists: true) + ]) + """ + } + } + + then { + assertAll ( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + } + + test("crai") { + when { + process { + """ + input[0] = Channel.of([ + [ id:'test', single_end:false ], // meta map + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/cram/test.paired_end.recalibrated.sorted.cram', checkIfExists: true) + ]) + """ + } + } + + then { + assertAll ( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + } + + test("csi") { + config "./csi.nextflow.config" + + when { + process { + """ + input[0] = Channel.of([ + [ id:'test', single_end:false ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/bam/test.paired_end.sorted.bam', checkIfExists: true) + ]) + """ + } + } + + then { + assertAll ( + { assert process.success }, + { assert snapshot( + file(process.out.csi[0][1]).name, + process.out.versions + ).match() } + ) + } + } + + test("bai - stub") { + options "-stub" + when { + process { + """ + input[0] = Channel.of([ + [ id:'test', single_end:false ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/bam/test.paired_end.sorted.bam', checkIfExists: true) + ]) + """ + } + } + + then { + assertAll ( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + } + + test("crai - stub") { + options "-stub" + when { + process { + """ + input[0] = Channel.of([ + [ id:'test', single_end:false ], // meta map + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/cram/test.paired_end.recalibrated.sorted.cram', checkIfExists: true) + ]) + """ + } + } + + then { + assertAll ( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + } + + test("csi - stub") { + options "-stub" + config "./csi.nextflow.config" + + when { + process { + """ + input[0] = Channel.of([ + [ id:'test', single_end:false ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/bam/test.paired_end.sorted.bam', checkIfExists: true) + ]) + """ + } + } + + then { + assertAll ( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + } +} diff --git a/modules/nf-core/samtools/index/tests/main.nf.test.snap b/modules/nf-core/samtools/index/tests/main.nf.test.snap new file mode 100644 index 00000000..72d65e81 --- /dev/null +++ b/modules/nf-core/samtools/index/tests/main.nf.test.snap @@ -0,0 +1,250 @@ +{ + "csi - stub": { + "content": [ + { + "0": [ + + ], + "1": [ + [ + { + "id": "test", + "single_end": false + }, + "test.paired_end.sorted.bam.csi:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "2": [ + + ], + "3": [ + "versions.yml:md5,5e09a6fdf76de396728f877193d72315" + ], + "bai": [ + + ], + "crai": [ + + ], + "csi": [ + [ + { + "id": "test", + "single_end": false + }, + "test.paired_end.sorted.bam.csi:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "versions": [ + "versions.yml:md5,5e09a6fdf76de396728f877193d72315" + ] + } + ], + "meta": { + "nf-test": "0.9.0", + "nextflow": "24.04.4" + }, + "timestamp": "2024-09-16T08:21:25.261127166" + }, + "crai - stub": { + "content": [ + { + "0": [ + + ], + "1": [ + + ], + "2": [ + [ + { + "id": "test", + "single_end": false + }, + "test.paired_end.recalibrated.sorted.cram.crai:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "3": [ + "versions.yml:md5,5e09a6fdf76de396728f877193d72315" + ], + "bai": [ + + ], + "crai": [ + [ + { + "id": "test", + "single_end": false + }, + "test.paired_end.recalibrated.sorted.cram.crai:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "csi": [ + + ], + "versions": [ + "versions.yml:md5,5e09a6fdf76de396728f877193d72315" + ] + } + ], + "meta": { + "nf-test": "0.9.0", + "nextflow": "24.04.4" + }, + "timestamp": "2024-09-16T08:21:12.653194876" + }, + "bai - stub": { + "content": [ + { + "0": [ + [ + { + "id": "test", + "single_end": false + }, + "test.paired_end.sorted.bam.bai:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "1": [ + + ], + "2": [ + + ], + "3": [ + "versions.yml:md5,5e09a6fdf76de396728f877193d72315" + ], + "bai": [ + [ + { + "id": "test", + "single_end": false + }, + "test.paired_end.sorted.bam.bai:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "crai": [ + + ], + "csi": [ + + ], + "versions": [ + "versions.yml:md5,5e09a6fdf76de396728f877193d72315" + ] + } + ], + "meta": { + "nf-test": "0.9.0", + "nextflow": "24.04.4" + }, + "timestamp": "2024-09-16T08:21:01.854932651" + }, + "csi": { + "content": [ + "test.paired_end.sorted.bam.csi", + [ + "versions.yml:md5,5e09a6fdf76de396728f877193d72315" + ] + ], + "meta": { + "nf-test": "0.9.0", + "nextflow": "24.04.4" + }, + "timestamp": "2024-09-16T08:20:51.485364222" + }, + "crai": { + "content": [ + { + "0": [ + + ], + "1": [ + + ], + "2": [ + [ + { + "id": "test", + "single_end": false + }, + "test.paired_end.recalibrated.sorted.cram.crai:md5,14bc3bd5c89cacc8f4541f9062429029" + ] + ], + "3": [ + "versions.yml:md5,5e09a6fdf76de396728f877193d72315" + ], + "bai": [ + + ], + "crai": [ + [ + { + "id": "test", + "single_end": false + }, + "test.paired_end.recalibrated.sorted.cram.crai:md5,14bc3bd5c89cacc8f4541f9062429029" + ] + ], + "csi": [ + + ], + "versions": [ + "versions.yml:md5,5e09a6fdf76de396728f877193d72315" + ] + } + ], + "meta": { + "nf-test": "0.9.0", + "nextflow": "24.04.4" + }, + "timestamp": "2024-09-16T08:20:40.518873972" + }, + "bai": { + "content": [ + { + "0": [ + [ + { + "id": "test", + "single_end": false + }, + "test.paired_end.sorted.bam.bai:md5,704c10dd1326482448ca3073fdebc2f4" + ] + ], + "1": [ + + ], + "2": [ + + ], + "3": [ + "versions.yml:md5,5e09a6fdf76de396728f877193d72315" + ], + "bai": [ + [ + { + "id": "test", + "single_end": false + }, + "test.paired_end.sorted.bam.bai:md5,704c10dd1326482448ca3073fdebc2f4" + ] + ], + "crai": [ + + ], + "csi": [ + + ], + "versions": [ + "versions.yml:md5,5e09a6fdf76de396728f877193d72315" + ] + } + ], + "meta": { + "nf-test": "0.9.0", + "nextflow": "24.04.4" + }, + "timestamp": "2024-09-16T08:20:21.184050361" + } +} \ No newline at end of file diff --git a/modules/nf-core/samtools/index/tests/tags.yml b/modules/nf-core/samtools/index/tests/tags.yml new file mode 100644 index 00000000..e0f58a7a --- /dev/null +++ b/modules/nf-core/samtools/index/tests/tags.yml @@ -0,0 +1,2 @@ +samtools/index: + - modules/nf-core/samtools/index/** diff --git a/modules/nf-core/samtools/sort/environment.yml b/modules/nf-core/samtools/sort/environment.yml new file mode 100644 index 00000000..62054fc9 --- /dev/null +++ b/modules/nf-core/samtools/sort/environment.yml @@ -0,0 +1,8 @@ +--- +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/environment-schema.json +channels: + - conda-forge + - bioconda +dependencies: + - bioconda::htslib=1.21 + - bioconda::samtools=1.21 diff --git a/modules/nf-core/samtools/sort/main.nf b/modules/nf-core/samtools/sort/main.nf new file mode 100644 index 00000000..caf3c61a --- /dev/null +++ b/modules/nf-core/samtools/sort/main.nf @@ -0,0 +1,72 @@ +process SAMTOOLS_SORT { + tag "$meta.id" + label 'process_medium' + + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/samtools:1.21--h50ea8bc_0' : + 'biocontainers/samtools:1.21--h50ea8bc_0' }" + + input: + tuple val(meta) , path(bam) + tuple val(meta2), path(fasta) + + output: + tuple val(meta), path("*.bam"), emit: bam, optional: true + tuple val(meta), path("*.cram"), emit: cram, optional: true + tuple val(meta), path("*.crai"), emit: crai, optional: true + tuple val(meta), path("*.csi"), emit: csi, optional: true + path "versions.yml", emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + def extension = args.contains("--output-fmt sam") ? "sam" : + args.contains("--output-fmt cram") ? "cram" : + "bam" + def reference = fasta ? "--reference ${fasta}" : "" + if ("$bam" == "${prefix}.bam") error "Input and output names are the same, use \"task.ext.prefix\" to disambiguate!" + + """ + samtools cat \\ + ${bam} \\ + | \\ + samtools sort \\ + $args \\ + -T ${prefix} \\ + --threads $task.cpus \\ + ${reference} \\ + -o ${prefix}.${extension} \\ + - + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + samtools: \$(echo \$(samtools --version 2>&1) | sed 's/^.*samtools //; s/Using.*\$//') + END_VERSIONS + """ + + stub: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + def extension = args.contains("--output-fmt sam") ? "sam" : + args.contains("--output-fmt cram") ? "cram" : + "bam" + """ + touch ${prefix}.${extension} + if [ "${extension}" == "bam" ]; + then + touch ${prefix}.${extension}.csi + elif [ "${extension}" == "cram" ]; + then + touch ${prefix}.${extension}.crai + fi + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + samtools: \$(echo \$(samtools --version 2>&1) | sed 's/^.*samtools //; s/Using.*\$//') + END_VERSIONS + """ +} diff --git a/modules/nf-core/samtools/sort/meta.yml b/modules/nf-core/samtools/sort/meta.yml new file mode 100644 index 00000000..a9dbec5a --- /dev/null +++ b/modules/nf-core/samtools/sort/meta.yml @@ -0,0 +1,92 @@ +name: samtools_sort +description: Sort SAM/BAM/CRAM file +keywords: + - sort + - bam + - sam + - cram +tools: + - samtools: + description: | + SAMtools is a set of utilities for interacting with and post-processing + short DNA sequence read alignments in the SAM, BAM and CRAM formats, written by Heng Li. + These files are generated as output by short read aligners like BWA. + homepage: http://www.htslib.org/ + documentation: http://www.htslib.org/doc/samtools.html + doi: 10.1093/bioinformatics/btp352 + licence: ["MIT"] + identifier: biotools:samtools +input: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - bam: + type: file + description: BAM/CRAM/SAM file(s) + pattern: "*.{bam,cram,sam}" + - - meta2: + type: map + description: | + Groovy Map containing reference information + e.g. [ id:'genome' ] + - fasta: + type: file + description: Reference genome FASTA file + pattern: "*.{fa,fasta,fna}" + optional: true +output: + - bam: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*.bam": + type: file + description: Sorted BAM file + pattern: "*.{bam}" + - cram: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*.cram": + type: file + description: Sorted CRAM file + pattern: "*.{cram}" + - crai: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*.crai": + type: file + description: CRAM index file (optional) + pattern: "*.crai" + - csi: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*.csi": + type: file + description: BAM index file (optional) + pattern: "*.csi" + - versions: + - versions.yml: + type: file + description: File containing software versions + pattern: "versions.yml" +authors: + - "@drpatelh" + - "@ewels" + - "@matthdsm" +maintainers: + - "@drpatelh" + - "@ewels" + - "@matthdsm" diff --git a/modules/nf-core/samtools/sort/tests/main.nf.test b/modules/nf-core/samtools/sort/tests/main.nf.test new file mode 100644 index 00000000..b05e6691 --- /dev/null +++ b/modules/nf-core/samtools/sort/tests/main.nf.test @@ -0,0 +1,192 @@ +nextflow_process { + + name "Test Process SAMTOOLS_SORT" + script "../main.nf" + process "SAMTOOLS_SORT" + tag "modules" + tag "modules_nfcore" + tag "samtools" + tag "samtools/sort" + + test("bam") { + + config "./nextflow.config" + + when { + process { + """ + input[0] = Channel.of([ + [ id:'test', single_end:false ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/bam/test.paired_end.bam', checkIfExists: true) + ]) + input[1] = Channel.of([ + [ id:'fasta' ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true) + ]) + """ + } + } + + then { + assertAll ( + { assert process.success }, + { assert snapshot( + process.out.bam, + process.out.csi.collect { it.collect { it instanceof Map ? it : file(it).name } }, + process.out.versions + ).match()} + ) + } + } + + test("multiple bam") { + + config "./nextflow.config" + + when { + process { + """ + input[0] = Channel.of([ + [ id:'test', single_end:false ], // meta map + [ + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/bam/test.paired_end.sorted.bam', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/bam/test2.paired_end.sorted.bam', checkIfExists: true) + ] + ]) + input[1] = Channel.of([ + [ id:'fasta' ], // meta map + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.fasta', checkIfExists: true) + ]) + """ + } + } + + then { + assertAll ( + { assert process.success }, + { assert snapshot( + process.out.bam, + process.out.csi.collect { it.collect { it instanceof Map ? it : file(it).name } }, + process.out.versions + ).match()} + ) + } + } + + test("cram") { + + config "./nextflow_cram.config" + + when { + process { + """ + input[0] = Channel.of([ + [ id:'test', single_end:false ], // meta map + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/cram/test.paired_end.sorted.cram', checkIfExists: true) + ]) + input[1] = Channel.of([ + [ id:'fasta' ], // meta map + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.fasta', checkIfExists: true) + ]) + """ + } + } + + then { + assertAll ( + { assert process.success }, + { assert snapshot( + process.out.cram.collect { it.collect { it instanceof Map ? it : file(it).name } }, + process.out.crai.collect { it.collect { it instanceof Map ? it : file(it).name } }, + process.out.versions + ).match()} + ) + } + } + + test("bam - stub") { + + options "-stub" + config "./nextflow.config" + + when { + process { + """ + input[0] = Channel.of([ + [ id:'test', single_end:false ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/bam/test.paired_end.bam', checkIfExists: true) + ]) + input[1] = Channel.of([ + [ id:'fasta' ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true) + ]) + """ + } + } + + then { + assertAll ( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + } + + test("multiple bam - stub") { + + config "./nextflow.config" + + when { + process { + """ + input[0] = Channel.of([ + [ id:'test', single_end:false ], // meta map + [ + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/bam/test.paired_end.sorted.bam', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/bam/test2.paired_end.sorted.bam', checkIfExists: true) + ] + ]) + input[1] = Channel.of([ + [ id:'fasta' ], // meta map + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.fasta', checkIfExists: true) + ]) + """ + } + } + + then { + assertAll ( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + } + + test("cram - stub") { + + options "-stub" + config "./nextflow_cram.config" + + when { + process { + """ + input[0] = Channel.of([ + [ id:'test', single_end:false ], // meta map + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/cram/test.paired_end.sorted.cram', checkIfExists: true) + ]) + input[1] = Channel.of([ + [ id:'fasta' ], // meta map + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.fasta', checkIfExists: true) + ]) + """ + } + } + + then { + assertAll ( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + } +} diff --git a/modules/nf-core/samtools/sort/tests/main.nf.test.snap b/modules/nf-core/samtools/sort/tests/main.nf.test.snap new file mode 100644 index 00000000..469891fe --- /dev/null +++ b/modules/nf-core/samtools/sort/tests/main.nf.test.snap @@ -0,0 +1,287 @@ +{ + "cram": { + "content": [ + [ + [ + { + "id": "test", + "single_end": false + }, + "test.sorted.cram" + ] + ], + [ + [ + { + "id": "test", + "single_end": false + }, + "test.sorted.cram.crai" + ] + ], + [ + "versions.yml:md5,2659b187d681241451539d4c53500b9f" + ] + ], + "meta": { + "nf-test": "0.9.0", + "nextflow": "24.04.4" + }, + "timestamp": "2024-09-16T08:49:58.207549273" + }, + "bam - stub": { + "content": [ + { + "0": [ + [ + { + "id": "test", + "single_end": false + }, + "test.sorted.bam:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "1": [ + + ], + "2": [ + + ], + "3": [ + [ + { + "id": "test", + "single_end": false + }, + "test.sorted.bam.csi:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "4": [ + "versions.yml:md5,2659b187d681241451539d4c53500b9f" + ], + "bam": [ + [ + { + "id": "test", + "single_end": false + }, + "test.sorted.bam:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "crai": [ + + ], + "cram": [ + + ], + "csi": [ + [ + { + "id": "test", + "single_end": false + }, + "test.sorted.bam.csi:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "versions": [ + "versions.yml:md5,2659b187d681241451539d4c53500b9f" + ] + } + ], + "meta": { + "nf-test": "0.9.0", + "nextflow": "24.04.4" + }, + "timestamp": "2024-09-16T08:50:08.630951018" + }, + "cram - stub": { + "content": [ + { + "0": [ + + ], + "1": [ + [ + { + "id": "test", + "single_end": false + }, + "test.sorted.cram:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "2": [ + [ + { + "id": "test", + "single_end": false + }, + "test.sorted.cram.crai:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "3": [ + + ], + "4": [ + "versions.yml:md5,2659b187d681241451539d4c53500b9f" + ], + "bam": [ + + ], + "crai": [ + [ + { + "id": "test", + "single_end": false + }, + "test.sorted.cram.crai:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "cram": [ + [ + { + "id": "test", + "single_end": false + }, + "test.sorted.cram:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "csi": [ + + ], + "versions": [ + "versions.yml:md5,2659b187d681241451539d4c53500b9f" + ] + } + ], + "meta": { + "nf-test": "0.9.0", + "nextflow": "24.04.4" + }, + "timestamp": "2024-09-16T08:50:19.061912443" + }, + "multiple bam": { + "content": [ + [ + [ + { + "id": "test", + "single_end": false + }, + "test.sorted.bam:md5,8a16ba90c7d294cbb4c33ac0f7127a12" + ] + ], + [ + [ + { + "id": "test", + "single_end": false + }, + "test.sorted.bam.csi" + ] + ], + [ + "versions.yml:md5,2659b187d681241451539d4c53500b9f" + ] + ], + "meta": { + "nf-test": "0.9.0", + "nextflow": "24.09.0" + }, + "timestamp": "2024-10-08T11:59:55.479443" + }, + "multiple bam - stub": { + "content": [ + { + "0": [ + [ + { + "id": "test", + "single_end": false + }, + "test.sorted.bam:md5,8a16ba90c7d294cbb4c33ac0f7127a12" + ] + ], + "1": [ + + ], + "2": [ + + ], + "3": [ + [ + { + "id": "test", + "single_end": false + }, + "test.sorted.bam.csi:md5,d185916eaff9afeb4d0aeab3310371f9" + ] + ], + "4": [ + "versions.yml:md5,2659b187d681241451539d4c53500b9f" + ], + "bam": [ + [ + { + "id": "test", + "single_end": false + }, + "test.sorted.bam:md5,8a16ba90c7d294cbb4c33ac0f7127a12" + ] + ], + "crai": [ + + ], + "cram": [ + + ], + "csi": [ + [ + { + "id": "test", + "single_end": false + }, + "test.sorted.bam.csi:md5,d185916eaff9afeb4d0aeab3310371f9" + ] + ], + "versions": [ + "versions.yml:md5,2659b187d681241451539d4c53500b9f" + ] + } + ], + "meta": { + "nf-test": "0.9.0", + "nextflow": "24.09.0" + }, + "timestamp": "2024-10-08T11:36:13.781404" + }, + "bam": { + "content": [ + [ + [ + { + "id": "test", + "single_end": false + }, + "test.sorted.bam:md5,34aa85e86abefe637f7a4a9887f016fc" + ] + ], + [ + [ + { + "id": "test", + "single_end": false + }, + "test.sorted.bam.csi" + ] + ], + [ + "versions.yml:md5,2659b187d681241451539d4c53500b9f" + ] + ], + "meta": { + "nf-test": "0.9.0", + "nextflow": "24.09.0" + }, + "timestamp": "2024-10-08T11:59:46.372244" + } +} \ No newline at end of file diff --git a/modules/nf-core/samtools/sort/tests/nextflow.config b/modules/nf-core/samtools/sort/tests/nextflow.config new file mode 100644 index 00000000..f642771f --- /dev/null +++ b/modules/nf-core/samtools/sort/tests/nextflow.config @@ -0,0 +1,8 @@ +process { + + withName: SAMTOOLS_SORT { + ext.prefix = { "${meta.id}.sorted" } + ext.args = "--write-index" + } + +} diff --git a/modules/nf-core/samtools/sort/tests/nextflow_cram.config b/modules/nf-core/samtools/sort/tests/nextflow_cram.config new file mode 100644 index 00000000..3a8c0188 --- /dev/null +++ b/modules/nf-core/samtools/sort/tests/nextflow_cram.config @@ -0,0 +1,8 @@ +process { + + withName: SAMTOOLS_SORT { + ext.prefix = { "${meta.id}.sorted" } + ext.args = "--write-index --output-fmt cram" + } + +} diff --git a/modules/nf-core/samtools/sort/tests/tags.yml b/modules/nf-core/samtools/sort/tests/tags.yml new file mode 100644 index 00000000..cd63ea20 --- /dev/null +++ b/modules/nf-core/samtools/sort/tests/tags.yml @@ -0,0 +1,3 @@ +samtools/sort: + - modules/nf-core/samtools/sort/** + - tests/modules/nf-core/samtools/sort/** diff --git a/modules/nf-core/samtools/view/environment.yml b/modules/nf-core/samtools/view/environment.yml new file mode 100644 index 00000000..02cda6e6 --- /dev/null +++ b/modules/nf-core/samtools/view/environment.yml @@ -0,0 +1,10 @@ +--- +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/environment-schema.json +channels: + - conda-forge + - bioconda +dependencies: + # renovate: datasource=conda depName=bioconda/htslib + - bioconda::htslib=1.21 + # renovate: datasource=conda depName=bioconda/samtools + - bioconda::samtools=1.21 diff --git a/modules/nf-core/samtools/view/main.nf b/modules/nf-core/samtools/view/main.nf new file mode 100644 index 00000000..a6941e63 --- /dev/null +++ b/modules/nf-core/samtools/view/main.nf @@ -0,0 +1,77 @@ +process SAMTOOLS_VIEW { + tag "$meta.id" + label 'process_low' + + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://community-cr-prod.seqera.io/docker/registry/v2/blobs/sha256/9e/9edc2564215d5cd137a8b25ca8a311600987186d406b092022444adf3c4447f7/data' : + 'community.wave.seqera.io/library/htslib_samtools:1.21--6cb89bfd40cbaabf' }" + + input: + tuple val(meta), path(input), path(index) + tuple val(meta2), path(fasta) + path qname + + output: + tuple val(meta), path("${prefix}.bam"), emit: bam, optional: true + tuple val(meta), path("${prefix}.cram"), emit: cram, optional: true + tuple val(meta), path("${prefix}.sam"), emit: sam, optional: true + tuple val(meta), path("${prefix}.${file_type}.bai"), emit: bai, optional: true + tuple val(meta), path("${prefix}.${file_type}.csi"), emit: csi, optional: true + tuple val(meta), path("${prefix}.${file_type}.crai"), emit: crai, optional: true + tuple val(meta), path("${prefix}.unselected.${file_type}"), emit: unselected, optional: true + tuple val(meta), path("${prefix}.unselected.${file_type}.{bai,csi,crsi}"), emit: unselected_index, optional: true + path "versions.yml", emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def args2 = task.ext.args2 ?: '' + prefix = task.ext.prefix ?: "${meta.id}" + def reference = fasta ? "--reference ${fasta}" : "" + file_type = args.contains("--output-fmt sam") ? "sam" : + args.contains("--output-fmt bam") ? "bam" : + args.contains("--output-fmt cram") ? "cram" : + input.getExtension() + readnames = qname ? "--qname-file ${qname} --output-unselected ${prefix}.unselected.${file_type}": "" + if ("$input" == "${prefix}.${file_type}") error "Input and output names are the same, use \"task.ext.prefix\" to disambiguate!" + """ + samtools \\ + view \\ + --threads ${task.cpus-1} \\ + ${reference} \\ + ${readnames} \\ + $args \\ + -o ${prefix}.${file_type} \\ + $input \\ + $args2 + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + samtools: \$(echo \$(samtools --version 2>&1) | sed 's/^.*samtools //; s/Using.*\$//') + END_VERSIONS + """ + + stub: + def args = task.ext.args ?: '' + prefix = task.ext.prefix ?: "${meta.id}" + file_type = args.contains("--output-fmt sam") ? "sam" : + args.contains("--output-fmt bam") ? "bam" : + args.contains("--output-fmt cram") ? "cram" : + input.getExtension() + if ("$input" == "${prefix}.${file_type}") error "Input and output names are the same, use \"task.ext.prefix\" to disambiguate!" + + index = args.contains("--write-index") ? "touch ${prefix}.${file_type}.csi" : "" + + """ + touch ${prefix}.${file_type} + ${index} + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + samtools: \$(echo \$(samtools --version 2>&1) | sed 's/^.*samtools //; s/Using.*\$//') + END_VERSIONS + """ +} diff --git a/modules/nf-core/samtools/view/meta.yml b/modules/nf-core/samtools/view/meta.yml new file mode 100644 index 00000000..caa7b015 --- /dev/null +++ b/modules/nf-core/samtools/view/meta.yml @@ -0,0 +1,141 @@ +name: samtools_view +description: filter/convert SAM/BAM/CRAM file +keywords: + - view + - bam + - sam + - cram +tools: + - samtools: + description: | + SAMtools is a set of utilities for interacting with and post-processing + short DNA sequence read alignments in the SAM, BAM and CRAM formats, written by Heng Li. + These files are generated as output by short read aligners like BWA. + homepage: http://www.htslib.org/ + documentation: http://www.htslib.org/doc/samtools.html + doi: 10.1093/bioinformatics/btp352 + licence: ["MIT"] + identifier: biotools:samtools +input: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - input: + type: file + description: BAM/CRAM/SAM file + pattern: "*.{bam,cram,sam}" + - index: + type: file + description: BAM.BAI/BAM.CSI/CRAM.CRAI file (optional) + pattern: "*.{.bai,.csi,.crai}" + - - meta2: + type: map + description: | + Groovy Map containing reference information + e.g. [ id:'test' ] + - fasta: + type: file + description: Reference file the CRAM was created with (optional) + pattern: "*.{fasta,fa}" + - - qname: + type: file + description: Optional file with read names to output only select alignments + pattern: "*.{txt,list}" +output: + - bam: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - ${prefix}.bam: + type: file + description: optional filtered/converted BAM file + pattern: "*.{bam}" + - cram: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - ${prefix}.cram: + type: file + description: optional filtered/converted CRAM file + pattern: "*.{cram}" + - sam: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - ${prefix}.sam: + type: file + description: optional filtered/converted SAM file + pattern: "*.{sam}" + - bai: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - ${prefix}.${file_type}.bai: + type: file + description: optional BAM file index + pattern: "*.{bai}" + - csi: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - ${prefix}.${file_type}.csi: + type: file + description: optional tabix BAM file index + pattern: "*.{csi}" + - crai: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - ${prefix}.${file_type}.crai: + type: file + description: optional CRAM file index + pattern: "*.{crai}" + - unselected: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - ${prefix}.unselected.${file_type}: + type: file + description: optional file with unselected alignments + pattern: "*.unselected.{bam,cram,sam}" + - unselected_index: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - ${prefix}.unselected.${file_type}.{bai,csi,crsi}: + type: file + description: index for the "unselected" file + pattern: "*.unselected.{bai,csi,crai}" + - versions: + - versions.yml: + type: file + description: File containing software versions + pattern: "versions.yml" +authors: + - "@drpatelh" + - "@joseespinosa" + - "@FriederikeHanssen" + - "@priyanka-surana" +maintainers: + - "@drpatelh" + - "@joseespinosa" + - "@FriederikeHanssen" + - "@priyanka-surana" diff --git a/modules/nf-core/samtools/view/tests/bam.config b/modules/nf-core/samtools/view/tests/bam.config new file mode 100644 index 00000000..c10d1081 --- /dev/null +++ b/modules/nf-core/samtools/view/tests/bam.config @@ -0,0 +1,3 @@ +process { + ext.args = "--output-fmt bam" +} \ No newline at end of file diff --git a/modules/nf-core/samtools/view/tests/bam_index.config b/modules/nf-core/samtools/view/tests/bam_index.config new file mode 100644 index 00000000..771ae033 --- /dev/null +++ b/modules/nf-core/samtools/view/tests/bam_index.config @@ -0,0 +1,3 @@ +process { + ext.args = "--output-fmt bam --write-index" +} \ No newline at end of file diff --git a/modules/nf-core/samtools/view/tests/main.nf.test b/modules/nf-core/samtools/view/tests/main.nf.test new file mode 100644 index 00000000..37b81a91 --- /dev/null +++ b/modules/nf-core/samtools/view/tests/main.nf.test @@ -0,0 +1,214 @@ +nextflow_process { + + name "Test Process SAMTOOLS_VIEW" + script "../main.nf" + process "SAMTOOLS_VIEW" + + tag "modules" + tag "modules_nfcore" + tag "samtools" + tag "samtools/view" + + test("bam") { + + when { + process { + """ + input[0] = Channel.of([ + [ id:'test', single_end:false ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/bam/test.paired_end.bam', checkIfExists: true), + [] + ]) + input[1] = [[],[]] + input[2] = [] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(file(process.out.bam[0][1]).name).match("bam_bam") }, + { assert snapshot(process.out.bai).match("bam_bai") }, + { assert snapshot(process.out.crai).match("bam_crai") }, + { assert snapshot(process.out.cram).match("bam_cram") }, + { assert snapshot(process.out.csi).match("bam_csi") }, + { assert snapshot(process.out.sam).match("bam_sam") }, + { assert snapshot(process.out.versions).match("bam_versions") } + ) + } + } + + test("cram") { + + when { + process { + """ + input[0] = Channel.of([ + [ id:'test', single_end:false ], // meta map + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/cram/test.paired_end.sorted.cram', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/cram/test.paired_end.sorted.cram.crai', checkIfExists: true) + ]) + input[1] = Channel.of([ + [ id:'genome' ], // meta map + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.fasta', checkIfExists: true) + ]) + input[2] = [] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(file(process.out.cram[0][1]).name).match("cram_cram") }, + { assert snapshot(process.out.bai).match("cram_bai") }, + { assert snapshot(process.out.bam).match("cram_bam") }, + { assert snapshot(process.out.crai).match("cram_crai") }, + { assert snapshot(process.out.csi).match("cram_csi") }, + { assert snapshot(process.out.sam).match("cram_sam") }, + { assert snapshot(process.out.versions).match("cram_versions") } + ) + } + } + + test("cram_to_bam") { + + config "./bam.config" + + when { + process { + """ + input[0] = Channel.of([ + [ id:'test', single_end:false ], // meta map + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/cram/test.paired_end.sorted.cram', checkIfExists: true), + [] + ]) + input[1] = Channel.of([ + [ id:'genome' ], // meta map + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.fasta', checkIfExists: true) + ]) + input[2] = [] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(file(process.out.bam[0][1]).name).match("cram_to_bam_bam") }, + { assert snapshot(process.out.bai).match("cram_to_bam_bai") }, + { assert snapshot(process.out.crai).match("cram_to_bam_crai") }, + { assert snapshot(process.out.cram).match("cram_to_bam_cram") }, + { assert snapshot(process.out.csi).match("cram_to_bam_csi") }, + { assert snapshot(process.out.sam).match("cram_to_bam_sam") }, + { assert snapshot(process.out.versions).match("cram_to_bam_versions") } + ) + } + } + + test("cram_to_bam_index") { + + config "./bam_index.config" + + when { + process { + """ + input[0] = Channel.of([ + [ id:'test', single_end:false ], // meta map + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/cram/test.paired_end.sorted.cram', checkIfExists: true), + [] + ]) + input[1] = Channel.of([ + [ id:'genome' ], // meta map + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.fasta', checkIfExists: true) + ]) + input[2] = [] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(file(process.out.bam[0][1]).name).match("cram_to_bam_index_bam") }, + { assert snapshot(file(process.out.csi[0][1]).name).match("cram_to_bam_index_csi") }, + { assert snapshot(process.out.bai).match("cram_to_bam_index_bai") }, + { assert snapshot(process.out.crai).match("cram_to_bam_index_crai") }, + { assert snapshot(process.out.cram).match("cram_to_bam_index_cram") }, + { assert snapshot(process.out.sam).match("cram_to_bam_index_sam") }, + { assert snapshot(process.out.versions).match("cram_to_bam_index_versions") } + ) + } + } + + test("cram_to_bam_index_qname") { + + config "./bam_index.config" + + when { + process { + """ + input[0] = Channel.of([ + [ id:'test', single_end:false ], // meta map + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/cram/test.paired_end.sorted.cram', checkIfExists: true), + [] + ]) + input[1] = Channel.of([ + [ id:'genome' ], // meta map + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.fasta', checkIfExists: true) + ]) + input[2] = Channel.of("testN:2817", "testN:2814").collectFile(name: "readnames.list", newLine: true) + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(file(process.out.bam[0][1]).name).match("cram_to_bam_index_qname_bam") }, + { assert snapshot(file(process.out.csi[0][1]).name).match("cram_to_bam_index_qname_csi") }, + { assert snapshot(process.out.bai).match("cram_to_bam_index_qname_bai") }, + { assert snapshot(process.out.crai).match("cram_to_bam_index_qname_crai") }, + { assert snapshot(process.out.cram).match("cram_to_bam_index_qname_cram") }, + { assert snapshot(process.out.sam).match("cram_to_bam_index_qname_sam") }, + { assert snapshot(file(process.out.unselected[0][1]).name).match("cram_to_bam_index_qname_unselected") }, + { assert snapshot(file(process.out.unselected_index[0][1]).name).match("cram_to_bam_index_qname_unselected_csi") }, + { assert snapshot(process.out.versions).match("cram_to_bam_index_qname_versions") } + ) + } + } + + test("bam_stub") { + + options "-stub" + config "./bam_index.config" + + when { + process { + """ + input[0] = Channel.of([ + [ id:'test', single_end:false ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/bam/test.paired_end.bam', checkIfExists: true), + [] + ]) + input[1] = [[],[]] + input[2] = [] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(file(process.out.bam[0][1]).name).match("bam_stub_bam") }, + { assert snapshot(file(process.out.csi[0][1]).name).match("bam_stub_csi") }, + { assert snapshot(process.out.bai).match("bam_stub_bai") }, + { assert snapshot(process.out.crai).match("bam_stub_crai") }, + { assert snapshot(process.out.cram).match("bam_stub_cram") }, + { assert snapshot(process.out.sam).match("bam_stub_sam") }, + { assert snapshot(process.out.versions).match("bam_stub_versions") } + ) + } + } +} diff --git a/modules/nf-core/samtools/view/tests/main.nf.test.snap b/modules/nf-core/samtools/view/tests/main.nf.test.snap new file mode 100644 index 00000000..63849b03 --- /dev/null +++ b/modules/nf-core/samtools/view/tests/main.nf.test.snap @@ -0,0 +1,528 @@ +{ + "bam_bam": { + "content": [ + "test.bam" + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.04.3" + }, + "timestamp": "2024-02-12T19:37:51.256068" + }, + "cram_to_bam_index_csi": { + "content": [ + "test.bam.csi" + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.04.3" + }, + "timestamp": "2024-02-12T19:38:12.958617" + }, + "bam_stub_bam": { + "content": [ + "test.bam" + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.04.3" + }, + "timestamp": "2024-02-12T19:38:32.065301" + }, + "bam_bai": { + "content": [ + [ + + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.04.3" + }, + "timestamp": "2024-02-12T19:37:51.258578" + }, + "bam_stub_bai": { + "content": [ + [ + + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.04.3" + }, + "timestamp": "2024-02-12T19:38:32.071284" + }, + "bam_stub_versions": { + "content": [ + [ + "versions.yml:md5,176db5ec46b965219604bcdbb3ef9e07" + ] + ], + "meta": { + "nf-test": "0.9.0", + "nextflow": "24.04.4" + }, + "timestamp": "2024-09-16T09:26:24.461775464" + }, + "cram_to_bam_index_cram": { + "content": [ + [ + + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.04.3" + }, + "timestamp": "2024-02-12T19:38:12.972288" + }, + "cram_to_bam_sam": { + "content": [ + [ + + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.04.3" + }, + "timestamp": "2024-02-12T19:38:04.999247" + }, + "cram_to_bam_index_sam": { + "content": [ + [ + + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.04.3" + }, + "timestamp": "2024-02-12T19:38:12.976457" + }, + "cram_crai": { + "content": [ + [ + + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.04.3" + }, + "timestamp": "2024-02-12T19:37:56.497581" + }, + "cram_csi": { + "content": [ + [ + + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.04.3" + }, + "timestamp": "2024-02-12T19:37:56.50038" + }, + "cram_to_bam_cram": { + "content": [ + [ + + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.04.3" + }, + "timestamp": "2024-02-12T19:38:04.992239" + }, + "cram_to_bam_index_qname_csi": { + "content": [ + "test.bam.csi" + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.04.3" + }, + "timestamp": "2024-02-12T19:38:23.325496" + }, + "bam_stub_sam": { + "content": [ + [ + + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.04.3" + }, + "timestamp": "2024-02-12T19:38:32.079529" + }, + "cram_cram": { + "content": [ + "test.cram" + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.04.3" + }, + "timestamp": "2024-02-12T19:37:56.490286" + }, + "cram_to_bam_index_qname_unselected_csi": { + "content": [ + "test.unselected.bam.csi" + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.04.3" + }, + "timestamp": "2024-02-12T19:38:23.328458" + }, + "bam_csi": { + "content": [ + [ + + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.04.3" + }, + "timestamp": "2024-02-12T19:37:51.262882" + }, + "cram_to_bam_crai": { + "content": [ + [ + + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.04.3" + }, + "timestamp": "2024-02-12T19:38:04.989247" + }, + "cram_to_bam_index_crai": { + "content": [ + [ + + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.04.3" + }, + "timestamp": "2024-02-12T19:38:12.967681" + }, + "cram_to_bam_index_qname_versions": { + "content": [ + [ + "versions.yml:md5,176db5ec46b965219604bcdbb3ef9e07" + ] + ], + "meta": { + "nf-test": "0.9.0", + "nextflow": "24.04.4" + }, + "timestamp": "2024-09-16T09:25:51.953436682" + }, + "cram_to_bam_bam": { + "content": [ + "test.bam" + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.04.3" + }, + "timestamp": "2024-02-12T19:38:04.982361" + }, + "cram_to_bam_index_bam": { + "content": [ + "test.bam" + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.04.3" + }, + "timestamp": "2024-02-12T19:38:12.95456" + }, + "cram_to_bam_index_versions": { + "content": [ + [ + "versions.yml:md5,176db5ec46b965219604bcdbb3ef9e07" + ] + ], + "meta": { + "nf-test": "0.9.0", + "nextflow": "24.04.4" + }, + "timestamp": "2024-09-16T09:25:14.475388399" + }, + "cram_to_bam_bai": { + "content": [ + [ + + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.04.3" + }, + "timestamp": "2024-02-12T19:38:04.98601" + }, + "cram_to_bam_versions": { + "content": [ + [ + "versions.yml:md5,176db5ec46b965219604bcdbb3ef9e07" + ] + ], + "meta": { + "nf-test": "0.9.0", + "nextflow": "24.04.4" + }, + "timestamp": "2024-09-16T09:24:49.673441798" + }, + "cram_bam": { + "content": [ + [ + + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.04.3" + }, + "timestamp": "2024-02-12T19:37:56.495512" + }, + "bam_stub_cram": { + "content": [ + [ + + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.04.3" + }, + "timestamp": "2024-02-12T19:38:32.076908" + }, + "cram_to_bam_index_qname_bai": { + "content": [ + [ + + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.04.3" + }, + "timestamp": "2024-02-12T19:38:23.328458" + }, + "cram_to_bam_index_qname_crai": { + "content": [ + [ + + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.04.3" + }, + "timestamp": "2024-02-12T19:38:23.330789" + }, + "cram_bai": { + "content": [ + [ + + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.04.3" + }, + "timestamp": "2024-02-12T19:37:56.493129" + }, + "bam_stub_crai": { + "content": [ + [ + + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.04.3" + }, + "timestamp": "2024-02-12T19:38:32.074313" + }, + "cram_to_bam_index_qname_bam": { + "content": [ + "test.bam" + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.04.3" + }, + "timestamp": "2024-02-12T19:38:23.322874" + }, + "cram_to_bam_index_qname_unselected": { + "content": [ + "test.unselected.bam" + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.04.3" + }, + "timestamp": "2024-02-12T19:38:23.322874" + }, + "cram_to_bam_index_qname_unselected_csi": { + "content": [ + "test.unselected.bam.csi" + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.04.3" + }, + "timestamp": "2024-02-12T19:38:23.328458" + }, + "bam_versions": { + "content": [ + [ + "versions.yml:md5,176db5ec46b965219604bcdbb3ef9e07" + ] + ], + "meta": { + "nf-test": "0.9.0", + "nextflow": "24.04.4" + }, + "timestamp": "2024-09-16T09:23:27.151650338" + }, + "cram_to_bam_index_qname_cram": { + "content": [ + [ + + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.04.3" + }, + "timestamp": "2024-02-12T19:38:23.333248" + }, + "bam_crai": { + "content": [ + [ + + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.04.3" + }, + "timestamp": "2024-02-12T19:37:51.259774" + }, + "bam_cram": { + "content": [ + [ + + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.04.3" + }, + "timestamp": "2024-02-12T19:37:51.261287" + }, + "cram_to_bam_csi": { + "content": [ + [ + + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.04.3" + }, + "timestamp": "2024-02-12T19:38:04.995454" + }, + "cram_sam": { + "content": [ + [ + + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.04.3" + }, + "timestamp": "2024-02-12T19:37:56.502625" + }, + "cram_versions": { + "content": [ + [ + "versions.yml:md5,176db5ec46b965219604bcdbb3ef9e07" + ] + ], + "meta": { + "nf-test": "0.9.0", + "nextflow": "24.04.4" + }, + "timestamp": "2024-09-16T09:24:12.95416913" + }, + "cram_to_bam_index_qname_unselected": { + "content": [ + "test.unselected.bam" + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.04.3" + }, + "timestamp": "2024-02-12T19:38:23.322874" + }, + "bam_sam": { + "content": [ + [ + + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.04.3" + }, + "timestamp": "2024-02-12T19:37:51.264651" + }, + "cram_to_bam_index_bai": { + "content": [ + [ + + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.04.3" + }, + "timestamp": "2024-02-12T19:38:12.962863" + }, + "cram_to_bam_index_qname_sam": { + "content": [ + [ + + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.04.3" + }, + "timestamp": "2024-02-12T19:38:23.337634" + }, + "bam_stub_csi": { + "content": [ + "test.bam.csi" + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.04.3" + }, + "timestamp": "2024-02-12T19:38:32.068596" + } +} \ No newline at end of file diff --git a/modules/nf-core/samtools/view/tests/tags.yml b/modules/nf-core/samtools/view/tests/tags.yml new file mode 100644 index 00000000..4fdf1dd1 --- /dev/null +++ b/modules/nf-core/samtools/view/tests/tags.yml @@ -0,0 +1,2 @@ +samtools/view: + - "modules/nf-core/samtools/view/**" diff --git a/modules/nf-core/star/align/environment.yml b/modules/nf-core/star/align/environment.yml new file mode 100644 index 00000000..7c57530a --- /dev/null +++ b/modules/nf-core/star/align/environment.yml @@ -0,0 +1,9 @@ +channels: + - conda-forge + - bioconda + +dependencies: + - bioconda::htslib=1.20 + - bioconda::samtools=1.20 + - bioconda::star=2.7.11b + - conda-forge::gawk=5.1.0 diff --git a/modules/nf-core/star/align/main.nf b/modules/nf-core/star/align/main.nf new file mode 100644 index 00000000..417071ba --- /dev/null +++ b/modules/nf-core/star/align/main.nf @@ -0,0 +1,110 @@ +process STAR_ALIGN { + tag "$meta.id" + label 'process_high' + + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://community-cr-prod.seqera.io/docker/registry/v2/blobs/sha256/b4/b425bc2a95806d878993f9a66dae3ae80ac2dafff4c208c5ae01b7a90a32fa91/data' : + 'community.wave.seqera.io/library/star_samtools_htslib_gawk:10c6e8c834460019' }" + + input: + tuple val(meta), path(reads, stageAs: "input*/*") + tuple val(meta2), path(index) + tuple val(meta3), path(gtf) + val star_ignore_sjdbgtf + val seq_platform + val seq_center + + output: + tuple val(meta), path('*Log.final.out') , emit: log_final + tuple val(meta), path('*Log.out') , emit: log_out + tuple val(meta), path('*Log.progress.out'), emit: log_progress + path "versions.yml" , emit: versions + + tuple val(meta), path('*d.out.bam') , optional:true, emit: bam + tuple val(meta), path("${prefix}.sortedByCoord.out.bam") , optional:true, emit: bam_sorted + tuple val(meta), path("${prefix}.Aligned.sortedByCoord.out.bam") , optional:true, emit: bam_sorted_aligned + tuple val(meta), path('*toTranscriptome.out.bam') , optional:true, emit: bam_transcript + tuple val(meta), path('*Aligned.unsort.out.bam') , optional:true, emit: bam_unsorted + tuple val(meta), path('*fastq.gz') , optional:true, emit: fastq + tuple val(meta), path('*.tab') , optional:true, emit: tab + tuple val(meta), path('*.SJ.out.tab') , optional:true, emit: spl_junc_tab + tuple val(meta), path('*.ReadsPerGene.out.tab') , optional:true, emit: read_per_gene_tab + tuple val(meta), path('*.out.junction') , optional:true, emit: junction + tuple val(meta), path('*.out.sam') , optional:true, emit: sam + tuple val(meta), path('*.wig') , optional:true, emit: wig + tuple val(meta), path('*.bg') , optional:true, emit: bedgraph + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + prefix = task.ext.prefix ?: "${meta.id}" + def reads1 = [], reads2 = [] + meta.single_end ? [reads].flatten().each{reads1 << it} : reads.eachWithIndex{ v, ix -> ( ix & 1 ? reads2 : reads1) << v } + def ignore_gtf = star_ignore_sjdbgtf ? '' : "--sjdbGTFfile $gtf" + def seq_platform = seq_platform ? "'PL:$seq_platform'" : "" + def seq_center = seq_center ? "'CN:$seq_center'" : "" + attrRG = args.contains("--outSAMattrRGline") ? "" : "--outSAMattrRGline 'ID:$prefix' $seq_center 'SM:$prefix' $seq_platform" + def out_sam_type = (args.contains('--outSAMtype')) ? '' : '--outSAMtype BAM Unsorted' + mv_unsorted_bam = (args.contains('--outSAMtype BAM Unsorted SortedByCoordinate')) ? "mv ${prefix}.Aligned.out.bam ${prefix}.Aligned.unsort.out.bam" : '' + """ + STAR \\ + --genomeDir $index \\ + --readFilesIn ${reads1.join(",")} ${reads2.join(",")} \\ + --runThreadN $task.cpus \\ + --outFileNamePrefix $prefix. \\ + $out_sam_type \\ + $ignore_gtf \\ + $attrRG \\ + $args + + $mv_unsorted_bam + + if [ -f ${prefix}.Unmapped.out.mate1 ]; then + mv ${prefix}.Unmapped.out.mate1 ${prefix}.unmapped_1.fastq + gzip ${prefix}.unmapped_1.fastq + fi + if [ -f ${prefix}.Unmapped.out.mate2 ]; then + mv ${prefix}.Unmapped.out.mate2 ${prefix}.unmapped_2.fastq + gzip ${prefix}.unmapped_2.fastq + fi + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + star: \$(STAR --version | sed -e "s/STAR_//g") + samtools: \$(echo \$(samtools --version 2>&1) | sed 's/^.*samtools //; s/Using.*\$//') + gawk: \$(echo \$(gawk --version 2>&1) | sed 's/^.*GNU Awk //; s/, .*\$//') + END_VERSIONS + """ + + stub: + prefix = task.ext.prefix ?: "${meta.id}" + """ + echo "" | gzip > ${prefix}.unmapped_1.fastq.gz + echo "" | gzip > ${prefix}.unmapped_2.fastq.gz + touch ${prefix}Xd.out.bam + touch ${prefix}.Log.final.out + touch ${prefix}.Log.out + touch ${prefix}.Log.progress.out + touch ${prefix}.sortedByCoord.out.bam + touch ${prefix}.toTranscriptome.out.bam + touch ${prefix}.Aligned.unsort.out.bam + touch ${prefix}.Aligned.sortedByCoord.out.bam + touch ${prefix}.tab + touch ${prefix}.SJ.out.tab + touch ${prefix}.ReadsPerGene.out.tab + touch ${prefix}.Chimeric.out.junction + touch ${prefix}.out.sam + touch ${prefix}.Signal.UniqueMultiple.str1.out.wig + touch ${prefix}.Signal.UniqueMultiple.str1.out.bg + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + star: \$(STAR --version | sed -e "s/STAR_//g") + samtools: \$(echo \$(samtools --version 2>&1) | sed 's/^.*samtools //; s/Using.*\$//') + gawk: \$(echo \$(gawk --version 2>&1) | sed 's/^.*GNU Awk //; s/, .*\$//') + END_VERSIONS + """ +} diff --git a/modules/nf-core/star/align/meta.yml b/modules/nf-core/star/align/meta.yml new file mode 100644 index 00000000..5cfe763e --- /dev/null +++ b/modules/nf-core/star/align/meta.yml @@ -0,0 +1,230 @@ +name: star_align +description: Align reads to a reference genome using STAR +keywords: + - align + - fasta + - genome + - reference +tools: + - star: + description: | + STAR is a software package for mapping DNA sequences against + a large reference genome, such as the human genome. + homepage: https://github.com/alexdobin/STAR + manual: https://github.com/alexdobin/STAR/blob/master/doc/STARmanual.pdf + doi: 10.1093/bioinformatics/bts635 + licence: ["MIT"] + identifier: biotools:star +input: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - reads: + type: file + description: | + List of input FastQ files of size 1 and 2 for single-end and paired-end data, + respectively. + - - meta2: + type: map + description: | + Groovy Map containing reference information + e.g. [ id:'test' ] + - index: + type: directory + description: STAR genome index + pattern: "star" + - - meta3: + type: map + description: | + Groovy Map containing reference information + e.g. [ id:'test' ] + - gtf: + type: file + description: Annotation GTF file + pattern: "*.{gtf}" + - - star_ignore_sjdbgtf: + type: boolean + description: Ignore annotation GTF file + - - seq_platform: + type: string + description: Sequencing platform + - - seq_center: + type: string + description: Sequencing center +output: + - log_final: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*Log.final.out": + type: file + description: STAR final log file + pattern: "*Log.final.out" + - log_out: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*Log.out": + type: file + description: STAR lot out file + pattern: "*Log.out" + - log_progress: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*Log.progress.out": + type: file + description: STAR log progress file + pattern: "*Log.progress.out" + - versions: + - versions.yml: + type: file + description: File containing software versions + pattern: "versions.yml" + - bam: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*d.out.bam": + type: file + description: Output BAM file containing read alignments + pattern: "*.{bam}" + - bam_sorted: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - ${prefix}.sortedByCoord.out.bam: + type: file + description: Sorted BAM file of read alignments (optional) + pattern: "*sortedByCoord.out.bam" + - bam_sorted_aligned: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - ${prefix}.Aligned.sortedByCoord.out.bam: + type: file + description: Sorted BAM file of read alignments (optional) + pattern: "*.Aligned.sortedByCoord.out.bam" + - bam_transcript: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*toTranscriptome.out.bam": + type: file + description: Output BAM file of transcriptome alignment (optional) + pattern: "*toTranscriptome.out.bam" + - bam_unsorted: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*Aligned.unsort.out.bam": + type: file + description: Unsorted BAM file of read alignments (optional) + pattern: "*Aligned.unsort.out.bam" + - fastq: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*fastq.gz": + type: file + description: Unmapped FastQ files (optional) + pattern: "*fastq.gz" + - tab: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*.tab": + type: file + description: STAR output tab file(s) (optional) + pattern: "*.tab" + - spl_junc_tab: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*.SJ.out.tab": + type: file + description: STAR output splice junction tab file + pattern: "*.SJ.out.tab" + - read_per_gene_tab: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*.ReadsPerGene.out.tab": + type: file + description: STAR output read per gene tab file + pattern: "*.ReadsPerGene.out.tab" + - junction: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*.out.junction": + type: file + description: STAR chimeric junction output file (optional) + pattern: "*.out.junction" + - sam: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + pattern: "*.out.sam" + - "*.out.sam": + type: file + description: STAR output SAM file(s) (optional) + pattern: "*.out.sam" + - wig: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*.wig": + type: file + description: STAR output wiggle format file(s) (optional) + pattern: "*.wig" + - bedgraph: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*.bg": + type: file + description: STAR output bedGraph format file(s) (optional) + pattern: "*.bg" +authors: + - "@kevinmenden" + - "@drpatelh" + - "@praveenraj2018" +maintainers: + - "@kevinmenden" + - "@drpatelh" + - "@praveenraj2018" diff --git a/modules/nf-core/star/align/tests/main.nf.test b/modules/nf-core/star/align/tests/main.nf.test new file mode 100644 index 00000000..a62c17db --- /dev/null +++ b/modules/nf-core/star/align/tests/main.nf.test @@ -0,0 +1,593 @@ +nextflow_process { + + name "Test Process STAR_ALIGN" + script "../main.nf" + process "STAR_ALIGN" + tag "modules" + tag "modules_nfcore" + tag "star" + tag "star/align" + tag "star/genomegenerate" + + test("homo_sapiens - single_end") { + config "./nextflow.config" + + setup { + run("STAR_GENOMEGENERATE") { + script "../../../star/genomegenerate/main.nf" + process { + """ + input[0] = Channel.of([ + [ id:'test_fasta' ], + [ file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.fasta', checkIfExists: true) ] + ]) + input[1] = Channel.of([ + [ id:'test_gtf' ], + [ file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.gtf', checkIfExists: true) ] + ]) + """ + } + } + } + + when { + process { + """ + input[0] = Channel.of([ + [ id:'test', single_end:true ], // meta map + [ file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/fastq/test_rnaseq_1.fastq.gz', checkIfExists: true) ] + ]) + input[1] = STAR_GENOMEGENERATE.out.index + input[2] = Channel.of([ + [ id:'test_gtf' ], + [ file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.gtf', checkIfExists: true) ] + ]) + input[3] = false + input[4] = 'illumina' + input[5] = false + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot( + file(process.out.log_final[0][1]).name, + file(process.out.log_out[0][1]).name, + file(process.out.log_progress[0][1]).name, + bam(process.out.bam[0][1]).getReadsMD5(), + bam(process.out.bam_sorted_aligned[0][1]).getReadsMD5(), + process.out.bedgraph, + process.out.fastq, + process.out.read_per_gene_tab, + process.out.sam, + process.out.spl_junc_tab, + process.out.tab, + process.out.wig, + process.out.versions + ).match() } + ) + } + } + + test("homo_sapiens - paired_end") { + config "./nextflow.config" + + setup { + run("STAR_GENOMEGENERATE") { + script "../../../star/genomegenerate/main.nf" + process { + """ + input[0] = Channel.of([ + [ id:'test_fasta' ], + [ file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.fasta', checkIfExists: true) ] + ]) + input[1] = Channel.of([ + [ id:'test_gtf' ], + [ file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.gtf', checkIfExists: true) ] + ]) + """ + } + } + } + + when { + process { + """ + input[0] = Channel.of([ + [ id:'test', single_end:false ], // meta map + [ + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/fastq/test_rnaseq_1.fastq.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/fastq/test_rnaseq_2.fastq.gz', checkIfExists: true) + ] + ]) + input[1] = STAR_GENOMEGENERATE.out.index + input[2] = Channel.of([ + [ id:'test_gtf' ], + [ file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.gtf', checkIfExists: true) ] + ]) + input[3] = false + input[4] = 'illumina' + input[5] = false + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot( + file(process.out.log_final[0][1]).name, + file(process.out.log_out[0][1]).name, + file(process.out.log_progress[0][1]).name, + bam(process.out.bam[0][1]).getReadsMD5(), + bam(process.out.bam_sorted_aligned[0][1]).getReadsMD5(), + process.out.bedgraph, + process.out.fastq, + process.out.read_per_gene_tab, + process.out.sam, + process.out.spl_junc_tab, + process.out.tab, + process.out.wig, + process.out.versions + ).match() } + ) + } + } + + test("homo_sapiens - paired_end - arriba") { + config "./nextflow.arriba.config" + + setup { + run("STAR_GENOMEGENERATE") { + script "../../../star/genomegenerate/main.nf" + process { + """ + input[0] = Channel.of([ + [ id:'test_fasta' ], + [ file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.fasta', checkIfExists: true) ] + ]) + input[1] = Channel.of([ + [ id:'test_gtf' ], + [ file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.gtf', checkIfExists: true) ] + ]) + """ + } + } + } + + when { + process { + """ + input[0] = Channel.of([ + [ id:'test', single_end:false ], // meta map + [ + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/fastq/test_rnaseq_1.fastq.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/fastq/test_rnaseq_2.fastq.gz', checkIfExists: true) + ] + ]) + input[1] = STAR_GENOMEGENERATE.out.index + input[2] = Channel.of([ + [ id:'test_gtf' ], + [ file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.gtf', checkIfExists: true) ] + ]) + input[3] = false + input[4] = 'illumina' + input[5] = false + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot( + file(process.out.log_final[0][1]).name, + file(process.out.log_out[0][1]).name, + file(process.out.log_progress[0][1]).name, + bam(process.out.bam[0][1]).getReadsMD5(), + process.out.bedgraph, + process.out.fastq, + process.out.read_per_gene_tab, + process.out.sam, + process.out.spl_junc_tab, + process.out.tab, + process.out.wig, + process.out.versions + ).match() } + ) + } + } + + test("homo_sapiens - paired_end - starfusion") { + config "./nextflow.starfusion.config" + + setup { + run("STAR_GENOMEGENERATE") { + script "../../../star/genomegenerate/main.nf" + process { + """ + input[0] = Channel.of([ + [ id:'test_fasta' ], + [ file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.fasta', checkIfExists: true) ] + ]) + input[1] = Channel.of([ + [ id:'test_gtf' ], + [ file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.gtf', checkIfExists: true) ] + ]) + """ + } + } + } + + when { + process { + """ + input[0] = Channel.of([ + [ id:'test', single_end:false ], // meta map + [ + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/fastq/test_rnaseq_1.fastq.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/fastq/test_rnaseq_2.fastq.gz', checkIfExists: true) + ] + ]) + input[1] = STAR_GENOMEGENERATE.out.index + input[2] = Channel.of([ + [ id:'test_gtf' ], + [ file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.gtf', checkIfExists: true) ] + ]) + input[3] = false + input[4] = 'illumina' + input[5] = false + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot( + file(process.out.log_final[0][1]).name, + file(process.out.log_out[0][1]).name, + file(process.out.log_progress[0][1]).name, + file(process.out.junction[0][1]).name, + bam(process.out.bam[0][1]).getReadsMD5(), + process.out.bedgraph, + process.out.fastq, + process.out.read_per_gene_tab, + process.out.sam, + process.out.spl_junc_tab, + process.out.tab, + process.out.wig, + process.out.versions + ).match() } + ) + } + } + + test("homo_sapiens - paired_end - multiple") { + config "./nextflow.config" + + setup { + run("STAR_GENOMEGENERATE") { + script "../../../star/genomegenerate/main.nf" + process { + """ + input[0] = Channel.of([ + [ id:'test_fasta' ], + [ file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.fasta', checkIfExists: true) ] + ]) + input[1] = Channel.of([ + [ id:'test_gtf' ], + [ file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.gtf', checkIfExists: true) ] + ]) + """ + } + } + } + + when { + process { + """ + input[0] = Channel.of([ + [ id:'test', single_end:false ], // meta map + [ + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/fastq/test_rnaseq_1.fastq.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/fastq/test_rnaseq_2.fastq.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/fastq/test_rnaseq_1.fastq.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/fastq/test_rnaseq_2.fastq.gz', checkIfExists: true) + ] + ]) + input[1] = STAR_GENOMEGENERATE.out.index + input[2] = Channel.of([ + [ id:'test_gtf' ], + [ file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.gtf', checkIfExists: true) ] + ]) + input[3] = false + input[4] = 'illumina' + input[5] = false + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot( + file(process.out.log_final[0][1]).name, + file(process.out.log_out[0][1]).name, + file(process.out.log_progress[0][1]).name, + bam(process.out.bam[0][1]).getReadsMD5(), + bam(process.out.bam_sorted_aligned[0][1]).getReadsMD5(), + process.out.bedgraph, + process.out.fastq, + process.out.read_per_gene_tab, + process.out.sam, + process.out.spl_junc_tab, + process.out.tab, + process.out.wig, + process.out.versions + ).match() } + ) + } + } + + test("homo_sapiens - single_end - stub") { + options "-stub" + config "./nextflow.config" + + setup { + run("STAR_GENOMEGENERATE") { + script "../../../star/genomegenerate/main.nf" + process { + """ + input[0] = Channel.of([ + [ id:'test_fasta' ], + [ file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.fasta', checkIfExists: true) ] + ]) + input[1] = Channel.of([ + [ id:'test_gtf' ], + [ file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.gtf', checkIfExists: true) ] + ]) + """ + } + } + } + + when { + process { + """ + input[0] = Channel.of([ + [ id:'test', single_end:true ], // meta map + [ file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/fastq/test_rnaseq_1.fastq.gz', checkIfExists: true) ] + ]) + input[1] = STAR_GENOMEGENERATE.out.index + input[2] = Channel.of([ + [ id:'test_gtf' ], + [ file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.gtf', checkIfExists: true) ] + ]) + input[3] = false + input[4] = 'illumina' + input[5] = false + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + } + + test("homo_sapiens - paired_end - stub") { + options "-stub" + config "./nextflow.config" + + setup { + run("STAR_GENOMEGENERATE") { + script "../../../star/genomegenerate/main.nf" + process { + """ + input[0] = Channel.of([ + [ id:'test_fasta' ], + [ file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.fasta', checkIfExists: true) ] + ]) + input[1] = Channel.of([ + [ id:'test_gtf' ], + [ file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.gtf', checkIfExists: true) ] + ]) + """ + } + } + } + + when { + process { + """ + input[0] = Channel.of([ + [ id:'test', single_end:false ], // meta map + [ + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/fastq/test_rnaseq_1.fastq.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/fastq/test_rnaseq_2.fastq.gz', checkIfExists: true) + ] + ]) + input[1] = STAR_GENOMEGENERATE.out.index + input[2] = Channel.of([ + [ id:'test_gtf' ], + [ file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.gtf', checkIfExists: true) ] + ]) + input[3] = false + input[4] = 'illumina' + input[5] = false + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + } + + test("homo_sapiens - paired_end - arriba - stub") { + options "-stub" + config "./nextflow.arriba.config" + + setup { + run("STAR_GENOMEGENERATE") { + script "../../../star/genomegenerate/main.nf" + process { + """ + input[0] = Channel.of([ + [ id:'test_fasta' ], + [ file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.fasta', checkIfExists: true) ] + ]) + input[1] = Channel.of([ + [ id:'test_gtf' ], + [ file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.gtf', checkIfExists: true) ] + ]) + """ + } + } + } + + when { + process { + """ + input[0] = Channel.of([ + [ id:'test', single_end:false ], // meta map + [ + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/fastq/test_rnaseq_1.fastq.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/fastq/test_rnaseq_2.fastq.gz', checkIfExists: true) + ] + ]) + input[1] = STAR_GENOMEGENERATE.out.index + input[2] = Channel.of([ + [ id:'test_gtf' ], + [ file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.gtf', checkIfExists: true) ] + ]) + input[3] = false + input[4] = 'illumina' + input[5] = false + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + } + + test("homo_sapiens - paired_end - starfusion - stub") { + options "-stub" + config "./nextflow.starfusion.config" + + setup { + run("STAR_GENOMEGENERATE") { + script "../../../star/genomegenerate/main.nf" + process { + """ + input[0] = Channel.of([ + [ id:'test_fasta' ], + [ file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.fasta', checkIfExists: true) ] + ]) + input[1] = Channel.of([ + [ id:'test_gtf' ], + [ file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.gtf', checkIfExists: true) ] + ]) + """ + } + } + } + + when { + process { + """ + input[0] = Channel.of([ + [ id:'test', single_end:false ], // meta map + [ + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/fastq/test_rnaseq_1.fastq.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/fastq/test_rnaseq_2.fastq.gz', checkIfExists: true) + ] + ]) + input[1] = STAR_GENOMEGENERATE.out.index + input[2] = Channel.of([ + [ id:'test_gtf' ], + [ file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.gtf', checkIfExists: true) ] + ]) + input[3] = false + input[4] = 'illumina' + input[5] = false + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + } + + test("homo_sapiens - paired_end - multiple - stub") { + options "-stub" + config "./nextflow.config" + + setup { + run("STAR_GENOMEGENERATE") { + script "../../../star/genomegenerate/main.nf" + process { + """ + input[0] = Channel.of([ + [ id:'test_fasta' ], + [ file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.fasta', checkIfExists: true) ] + ]) + input[1] = Channel.of([ + [ id:'test_gtf' ], + [ file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.gtf', checkIfExists: true) ] + ]) + """ + } + } + } + + when { + process { + """ + input[0] = Channel.of([ + [ id:'test', single_end:false ], // meta map + [ + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/fastq/test_rnaseq_1.fastq.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/fastq/test_rnaseq_2.fastq.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/fastq/test_rnaseq_1.fastq.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/fastq/test_rnaseq_2.fastq.gz', checkIfExists: true) + ] + ]) + input[1] = STAR_GENOMEGENERATE.out.index + input[2] = Channel.of([ + [ id:'test_gtf' ], + [ file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.gtf', checkIfExists: true) ] + ]) + input[3] = false + input[4] = 'illumina' + input[5] = false + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + } +} diff --git a/modules/nf-core/star/align/tests/main.nf.test.snap b/modules/nf-core/star/align/tests/main.nf.test.snap new file mode 100644 index 00000000..b533fb8b --- /dev/null +++ b/modules/nf-core/star/align/tests/main.nf.test.snap @@ -0,0 +1,1913 @@ +{ + "homo_sapiens - single_end - stub": { + "content": [ + { + "0": [ + [ + { + "id": "test", + "single_end": true + }, + "test.Log.final.out:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "1": [ + [ + { + "id": "test", + "single_end": true + }, + "test.Log.out:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "10": [ + [ + { + "id": "test", + "single_end": true + }, + [ + "test.ReadsPerGene.out.tab:md5,d41d8cd98f00b204e9800998ecf8427e", + "test.SJ.out.tab:md5,d41d8cd98f00b204e9800998ecf8427e", + "test.tab:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ] + ], + "11": [ + [ + { + "id": "test", + "single_end": true + }, + "test.SJ.out.tab:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "12": [ + [ + { + "id": "test", + "single_end": true + }, + "test.ReadsPerGene.out.tab:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "13": [ + [ + { + "id": "test", + "single_end": true + }, + "test.Chimeric.out.junction:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "14": [ + [ + { + "id": "test", + "single_end": true + }, + "test.out.sam:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "15": [ + [ + { + "id": "test", + "single_end": true + }, + "test.Signal.UniqueMultiple.str1.out.wig:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "16": [ + [ + { + "id": "test", + "single_end": true + }, + "test.Signal.UniqueMultiple.str1.out.bg:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "2": [ + [ + { + "id": "test", + "single_end": true + }, + "test.Log.progress.out:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "3": [ + "versions.yml:md5,a149bba1dbb5194560abdd813c7848e3" + ], + "4": [ + [ + { + "id": "test", + "single_end": true + }, + [ + "test.Aligned.sortedByCoord.out.bam:md5,d41d8cd98f00b204e9800998ecf8427e", + "test.sortedByCoord.out.bam:md5,d41d8cd98f00b204e9800998ecf8427e", + "testXd.out.bam:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ] + ], + "5": [ + [ + { + "id": "test", + "single_end": true + }, + "test.sortedByCoord.out.bam:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "6": [ + [ + { + "id": "test", + "single_end": true + }, + "test.Aligned.sortedByCoord.out.bam:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "7": [ + [ + { + "id": "test", + "single_end": true + }, + "test.toTranscriptome.out.bam:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "8": [ + [ + { + "id": "test", + "single_end": true + }, + "test.Aligned.unsort.out.bam:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "9": [ + [ + { + "id": "test", + "single_end": true + }, + [ + "test.unmapped_1.fastq.gz:md5,68b329da9893e34099c7d8ad5cb9c940", + "test.unmapped_2.fastq.gz:md5,68b329da9893e34099c7d8ad5cb9c940" + ] + ] + ], + "bam": [ + [ + { + "id": "test", + "single_end": true + }, + [ + "test.Aligned.sortedByCoord.out.bam:md5,d41d8cd98f00b204e9800998ecf8427e", + "test.sortedByCoord.out.bam:md5,d41d8cd98f00b204e9800998ecf8427e", + "testXd.out.bam:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ] + ], + "bam_sorted": [ + [ + { + "id": "test", + "single_end": true + }, + "test.sortedByCoord.out.bam:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "bam_sorted_aligned": [ + [ + { + "id": "test", + "single_end": true + }, + "test.Aligned.sortedByCoord.out.bam:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "bam_transcript": [ + [ + { + "id": "test", + "single_end": true + }, + "test.toTranscriptome.out.bam:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "bam_unsorted": [ + [ + { + "id": "test", + "single_end": true + }, + "test.Aligned.unsort.out.bam:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "bedgraph": [ + [ + { + "id": "test", + "single_end": true + }, + "test.Signal.UniqueMultiple.str1.out.bg:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "fastq": [ + [ + { + "id": "test", + "single_end": true + }, + [ + "test.unmapped_1.fastq.gz:md5,68b329da9893e34099c7d8ad5cb9c940", + "test.unmapped_2.fastq.gz:md5,68b329da9893e34099c7d8ad5cb9c940" + ] + ] + ], + "junction": [ + [ + { + "id": "test", + "single_end": true + }, + "test.Chimeric.out.junction:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "log_final": [ + [ + { + "id": "test", + "single_end": true + }, + "test.Log.final.out:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "log_out": [ + [ + { + "id": "test", + "single_end": true + }, + "test.Log.out:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "log_progress": [ + [ + { + "id": "test", + "single_end": true + }, + "test.Log.progress.out:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "read_per_gene_tab": [ + [ + { + "id": "test", + "single_end": true + }, + "test.ReadsPerGene.out.tab:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "sam": [ + [ + { + "id": "test", + "single_end": true + }, + "test.out.sam:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "spl_junc_tab": [ + [ + { + "id": "test", + "single_end": true + }, + "test.SJ.out.tab:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "tab": [ + [ + { + "id": "test", + "single_end": true + }, + [ + "test.ReadsPerGene.out.tab:md5,d41d8cd98f00b204e9800998ecf8427e", + "test.SJ.out.tab:md5,d41d8cd98f00b204e9800998ecf8427e", + "test.tab:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ] + ], + "versions": [ + "versions.yml:md5,a149bba1dbb5194560abdd813c7848e3" + ], + "wig": [ + [ + { + "id": "test", + "single_end": true + }, + "test.Signal.UniqueMultiple.str1.out.wig:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ] + } + ], + "meta": { + "nf-test": "0.9.0", + "nextflow": "24.04.4" + }, + "timestamp": "2024-10-20T17:09:08.738074176" + }, + "homo_sapiens - paired_end - arriba - stub": { + "content": [ + { + "0": [ + [ + { + "id": "test", + "single_end": false + }, + "test.Log.final.out:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "1": [ + [ + { + "id": "test", + "single_end": false + }, + "test.Log.out:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "10": [ + [ + { + "id": "test", + "single_end": false + }, + [ + "test.ReadsPerGene.out.tab:md5,d41d8cd98f00b204e9800998ecf8427e", + "test.SJ.out.tab:md5,d41d8cd98f00b204e9800998ecf8427e", + "test.tab:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ] + ], + "11": [ + [ + { + "id": "test", + "single_end": false + }, + "test.SJ.out.tab:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "12": [ + [ + { + "id": "test", + "single_end": false + }, + "test.ReadsPerGene.out.tab:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "13": [ + [ + { + "id": "test", + "single_end": false + }, + "test.Chimeric.out.junction:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "14": [ + [ + { + "id": "test", + "single_end": false + }, + "test.out.sam:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "15": [ + [ + { + "id": "test", + "single_end": false + }, + "test.Signal.UniqueMultiple.str1.out.wig:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "16": [ + [ + { + "id": "test", + "single_end": false + }, + "test.Signal.UniqueMultiple.str1.out.bg:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "2": [ + [ + { + "id": "test", + "single_end": false + }, + "test.Log.progress.out:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "3": [ + "versions.yml:md5,a149bba1dbb5194560abdd813c7848e3" + ], + "4": [ + [ + { + "id": "test", + "single_end": false + }, + [ + "test.Aligned.sortedByCoord.out.bam:md5,d41d8cd98f00b204e9800998ecf8427e", + "test.sortedByCoord.out.bam:md5,d41d8cd98f00b204e9800998ecf8427e", + "testXd.out.bam:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ] + ], + "5": [ + [ + { + "id": "test", + "single_end": false + }, + "test.sortedByCoord.out.bam:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "6": [ + [ + { + "id": "test", + "single_end": false + }, + "test.Aligned.sortedByCoord.out.bam:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "7": [ + [ + { + "id": "test", + "single_end": false + }, + "test.toTranscriptome.out.bam:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "8": [ + [ + { + "id": "test", + "single_end": false + }, + "test.Aligned.unsort.out.bam:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "9": [ + [ + { + "id": "test", + "single_end": false + }, + [ + "test.unmapped_1.fastq.gz:md5,68b329da9893e34099c7d8ad5cb9c940", + "test.unmapped_2.fastq.gz:md5,68b329da9893e34099c7d8ad5cb9c940" + ] + ] + ], + "bam": [ + [ + { + "id": "test", + "single_end": false + }, + [ + "test.Aligned.sortedByCoord.out.bam:md5,d41d8cd98f00b204e9800998ecf8427e", + "test.sortedByCoord.out.bam:md5,d41d8cd98f00b204e9800998ecf8427e", + "testXd.out.bam:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ] + ], + "bam_sorted": [ + [ + { + "id": "test", + "single_end": false + }, + "test.sortedByCoord.out.bam:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "bam_sorted_aligned": [ + [ + { + "id": "test", + "single_end": false + }, + "test.Aligned.sortedByCoord.out.bam:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "bam_transcript": [ + [ + { + "id": "test", + "single_end": false + }, + "test.toTranscriptome.out.bam:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "bam_unsorted": [ + [ + { + "id": "test", + "single_end": false + }, + "test.Aligned.unsort.out.bam:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "bedgraph": [ + [ + { + "id": "test", + "single_end": false + }, + "test.Signal.UniqueMultiple.str1.out.bg:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "fastq": [ + [ + { + "id": "test", + "single_end": false + }, + [ + "test.unmapped_1.fastq.gz:md5,68b329da9893e34099c7d8ad5cb9c940", + "test.unmapped_2.fastq.gz:md5,68b329da9893e34099c7d8ad5cb9c940" + ] + ] + ], + "junction": [ + [ + { + "id": "test", + "single_end": false + }, + "test.Chimeric.out.junction:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "log_final": [ + [ + { + "id": "test", + "single_end": false + }, + "test.Log.final.out:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "log_out": [ + [ + { + "id": "test", + "single_end": false + }, + "test.Log.out:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "log_progress": [ + [ + { + "id": "test", + "single_end": false + }, + "test.Log.progress.out:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "read_per_gene_tab": [ + [ + { + "id": "test", + "single_end": false + }, + "test.ReadsPerGene.out.tab:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "sam": [ + [ + { + "id": "test", + "single_end": false + }, + "test.out.sam:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "spl_junc_tab": [ + [ + { + "id": "test", + "single_end": false + }, + "test.SJ.out.tab:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "tab": [ + [ + { + "id": "test", + "single_end": false + }, + [ + "test.ReadsPerGene.out.tab:md5,d41d8cd98f00b204e9800998ecf8427e", + "test.SJ.out.tab:md5,d41d8cd98f00b204e9800998ecf8427e", + "test.tab:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ] + ], + "versions": [ + "versions.yml:md5,a149bba1dbb5194560abdd813c7848e3" + ], + "wig": [ + [ + { + "id": "test", + "single_end": false + }, + "test.Signal.UniqueMultiple.str1.out.wig:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ] + } + ], + "meta": { + "nf-test": "0.9.0", + "nextflow": "24.04.4" + }, + "timestamp": "2024-10-20T17:09:36.122131869" + }, + "homo_sapiens - single_end": { + "content": [ + "test.Log.final.out", + "test.Log.out", + "test.Log.progress.out", + "9f76be49a6607613a64f760101bdddce", + "9f76be49a6607613a64f760101bdddce", + [ + [ + { + "id": "test", + "single_end": true + }, + [ + "test.Signal.Unique.str1.out.bg:md5,c56fc1472776fb927eaf62d973da5f9a", + "test.Signal.UniqueMultiple.str1.out.bg:md5,e93373cf6f2a2a9506e2efdb260cdd4f" + ] + ] + ], + [ + + ], + [ + + ], + [ + + ], + [ + [ + { + "id": "test", + "single_end": true + }, + "test.SJ.out.tab:md5,75a516ab950fb958f40b29996474949c" + ] + ], + [ + [ + { + "id": "test", + "single_end": true + }, + "test.SJ.out.tab:md5,75a516ab950fb958f40b29996474949c" + ] + ], + [ + + ], + [ + "versions.yml:md5,a149bba1dbb5194560abdd813c7848e3" + ] + ], + "meta": { + "nf-test": "0.9.0", + "nextflow": "24.04.4" + }, + "timestamp": "2024-10-20T17:01:22.197991909" + }, + "homo_sapiens - paired_end": { + "content": [ + "test.Log.final.out", + "test.Log.out", + "test.Log.progress.out", + "db9a8324b5163b025bcc0c33e848486", + "db9a8324b5163b025bcc0c33e848486", + [ + [ + { + "id": "test", + "single_end": false + }, + [ + "test.Signal.Unique.str1.out.bg:md5,d7bf8b70b436ca048a62513e1d0ece3a", + "test.Signal.UniqueMultiple.str1.out.bg:md5,686d58493b9eb445b56ace4d67f76ef6" + ] + ] + ], + [ + + ], + [ + + ], + [ + + ], + [ + [ + { + "id": "test", + "single_end": false + }, + "test.SJ.out.tab:md5,844af19ab0fc8cd9a3f75228445aca0d" + ] + ], + [ + [ + { + "id": "test", + "single_end": false + }, + "test.SJ.out.tab:md5,844af19ab0fc8cd9a3f75228445aca0d" + ] + ], + [ + + ], + [ + "versions.yml:md5,a149bba1dbb5194560abdd813c7848e3" + ] + ], + "meta": { + "nf-test": "0.9.0", + "nextflow": "24.04.4" + }, + "timestamp": "2024-10-20T17:02:06.988663857" + }, + "homo_sapiens - paired_end - multiple - stub": { + "content": [ + { + "0": [ + [ + { + "id": "test", + "single_end": false + }, + "test.Log.final.out:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "1": [ + [ + { + "id": "test", + "single_end": false + }, + "test.Log.out:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "10": [ + [ + { + "id": "test", + "single_end": false + }, + [ + "test.ReadsPerGene.out.tab:md5,d41d8cd98f00b204e9800998ecf8427e", + "test.SJ.out.tab:md5,d41d8cd98f00b204e9800998ecf8427e", + "test.tab:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ] + ], + "11": [ + [ + { + "id": "test", + "single_end": false + }, + "test.SJ.out.tab:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "12": [ + [ + { + "id": "test", + "single_end": false + }, + "test.ReadsPerGene.out.tab:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "13": [ + [ + { + "id": "test", + "single_end": false + }, + "test.Chimeric.out.junction:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "14": [ + [ + { + "id": "test", + "single_end": false + }, + "test.out.sam:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "15": [ + [ + { + "id": "test", + "single_end": false + }, + "test.Signal.UniqueMultiple.str1.out.wig:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "16": [ + [ + { + "id": "test", + "single_end": false + }, + "test.Signal.UniqueMultiple.str1.out.bg:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "2": [ + [ + { + "id": "test", + "single_end": false + }, + "test.Log.progress.out:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "3": [ + "versions.yml:md5,a149bba1dbb5194560abdd813c7848e3" + ], + "4": [ + [ + { + "id": "test", + "single_end": false + }, + [ + "test.Aligned.sortedByCoord.out.bam:md5,d41d8cd98f00b204e9800998ecf8427e", + "test.sortedByCoord.out.bam:md5,d41d8cd98f00b204e9800998ecf8427e", + "testXd.out.bam:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ] + ], + "5": [ + [ + { + "id": "test", + "single_end": false + }, + "test.sortedByCoord.out.bam:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "6": [ + [ + { + "id": "test", + "single_end": false + }, + "test.Aligned.sortedByCoord.out.bam:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "7": [ + [ + { + "id": "test", + "single_end": false + }, + "test.toTranscriptome.out.bam:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "8": [ + [ + { + "id": "test", + "single_end": false + }, + "test.Aligned.unsort.out.bam:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "9": [ + [ + { + "id": "test", + "single_end": false + }, + [ + "test.unmapped_1.fastq.gz:md5,68b329da9893e34099c7d8ad5cb9c940", + "test.unmapped_2.fastq.gz:md5,68b329da9893e34099c7d8ad5cb9c940" + ] + ] + ], + "bam": [ + [ + { + "id": "test", + "single_end": false + }, + [ + "test.Aligned.sortedByCoord.out.bam:md5,d41d8cd98f00b204e9800998ecf8427e", + "test.sortedByCoord.out.bam:md5,d41d8cd98f00b204e9800998ecf8427e", + "testXd.out.bam:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ] + ], + "bam_sorted": [ + [ + { + "id": "test", + "single_end": false + }, + "test.sortedByCoord.out.bam:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "bam_sorted_aligned": [ + [ + { + "id": "test", + "single_end": false + }, + "test.Aligned.sortedByCoord.out.bam:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "bam_transcript": [ + [ + { + "id": "test", + "single_end": false + }, + "test.toTranscriptome.out.bam:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "bam_unsorted": [ + [ + { + "id": "test", + "single_end": false + }, + "test.Aligned.unsort.out.bam:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "bedgraph": [ + [ + { + "id": "test", + "single_end": false + }, + "test.Signal.UniqueMultiple.str1.out.bg:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "fastq": [ + [ + { + "id": "test", + "single_end": false + }, + [ + "test.unmapped_1.fastq.gz:md5,68b329da9893e34099c7d8ad5cb9c940", + "test.unmapped_2.fastq.gz:md5,68b329da9893e34099c7d8ad5cb9c940" + ] + ] + ], + "junction": [ + [ + { + "id": "test", + "single_end": false + }, + "test.Chimeric.out.junction:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "log_final": [ + [ + { + "id": "test", + "single_end": false + }, + "test.Log.final.out:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "log_out": [ + [ + { + "id": "test", + "single_end": false + }, + "test.Log.out:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "log_progress": [ + [ + { + "id": "test", + "single_end": false + }, + "test.Log.progress.out:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "read_per_gene_tab": [ + [ + { + "id": "test", + "single_end": false + }, + "test.ReadsPerGene.out.tab:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "sam": [ + [ + { + "id": "test", + "single_end": false + }, + "test.out.sam:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "spl_junc_tab": [ + [ + { + "id": "test", + "single_end": false + }, + "test.SJ.out.tab:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "tab": [ + [ + { + "id": "test", + "single_end": false + }, + [ + "test.ReadsPerGene.out.tab:md5,d41d8cd98f00b204e9800998ecf8427e", + "test.SJ.out.tab:md5,d41d8cd98f00b204e9800998ecf8427e", + "test.tab:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ] + ], + "versions": [ + "versions.yml:md5,a149bba1dbb5194560abdd813c7848e3" + ], + "wig": [ + [ + { + "id": "test", + "single_end": false + }, + "test.Signal.UniqueMultiple.str1.out.wig:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ] + } + ], + "meta": { + "nf-test": "0.9.0", + "nextflow": "24.04.4" + }, + "timestamp": "2024-10-20T17:10:12.005468781" + }, + "homo_sapiens - paired_end - multiple": { + "content": [ + "test.Log.final.out", + "test.Log.out", + "test.Log.progress.out", + "3e54e45f5dc3e9c1f2fc55bc41531a87", + "3e54e45f5dc3e9c1f2fc55bc41531a87", + [ + [ + { + "id": "test", + "single_end": false + }, + [ + "test.Signal.Unique.str1.out.bg:md5,d7bf8b70b436ca048a62513e1d0ece3a", + "test.Signal.UniqueMultiple.str1.out.bg:md5,686d58493b9eb445b56ace4d67f76ef6" + ] + ] + ], + [ + + ], + [ + + ], + [ + + ], + [ + [ + { + "id": "test", + "single_end": false + }, + "test.SJ.out.tab:md5,069877e053714e23010fe4e1c003b4a2" + ] + ], + [ + [ + { + "id": "test", + "single_end": false + }, + "test.SJ.out.tab:md5,069877e053714e23010fe4e1c003b4a2" + ] + ], + [ + + ], + [ + "versions.yml:md5,a149bba1dbb5194560abdd813c7848e3" + ] + ], + "meta": { + "nf-test": "0.9.0", + "nextflow": "24.04.4" + }, + "timestamp": "2024-10-20T17:08:54.877286681" + }, + "homo_sapiens - paired_end - stub": { + "content": [ + { + "0": [ + [ + { + "id": "test", + "single_end": false + }, + "test.Log.final.out:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "1": [ + [ + { + "id": "test", + "single_end": false + }, + "test.Log.out:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "10": [ + [ + { + "id": "test", + "single_end": false + }, + [ + "test.ReadsPerGene.out.tab:md5,d41d8cd98f00b204e9800998ecf8427e", + "test.SJ.out.tab:md5,d41d8cd98f00b204e9800998ecf8427e", + "test.tab:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ] + ], + "11": [ + [ + { + "id": "test", + "single_end": false + }, + "test.SJ.out.tab:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "12": [ + [ + { + "id": "test", + "single_end": false + }, + "test.ReadsPerGene.out.tab:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "13": [ + [ + { + "id": "test", + "single_end": false + }, + "test.Chimeric.out.junction:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "14": [ + [ + { + "id": "test", + "single_end": false + }, + "test.out.sam:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "15": [ + [ + { + "id": "test", + "single_end": false + }, + "test.Signal.UniqueMultiple.str1.out.wig:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "16": [ + [ + { + "id": "test", + "single_end": false + }, + "test.Signal.UniqueMultiple.str1.out.bg:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "2": [ + [ + { + "id": "test", + "single_end": false + }, + "test.Log.progress.out:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "3": [ + "versions.yml:md5,a149bba1dbb5194560abdd813c7848e3" + ], + "4": [ + [ + { + "id": "test", + "single_end": false + }, + [ + "test.Aligned.sortedByCoord.out.bam:md5,d41d8cd98f00b204e9800998ecf8427e", + "test.sortedByCoord.out.bam:md5,d41d8cd98f00b204e9800998ecf8427e", + "testXd.out.bam:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ] + ], + "5": [ + [ + { + "id": "test", + "single_end": false + }, + "test.sortedByCoord.out.bam:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "6": [ + [ + { + "id": "test", + "single_end": false + }, + "test.Aligned.sortedByCoord.out.bam:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "7": [ + [ + { + "id": "test", + "single_end": false + }, + "test.toTranscriptome.out.bam:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "8": [ + [ + { + "id": "test", + "single_end": false + }, + "test.Aligned.unsort.out.bam:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "9": [ + [ + { + "id": "test", + "single_end": false + }, + [ + "test.unmapped_1.fastq.gz:md5,68b329da9893e34099c7d8ad5cb9c940", + "test.unmapped_2.fastq.gz:md5,68b329da9893e34099c7d8ad5cb9c940" + ] + ] + ], + "bam": [ + [ + { + "id": "test", + "single_end": false + }, + [ + "test.Aligned.sortedByCoord.out.bam:md5,d41d8cd98f00b204e9800998ecf8427e", + "test.sortedByCoord.out.bam:md5,d41d8cd98f00b204e9800998ecf8427e", + "testXd.out.bam:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ] + ], + "bam_sorted": [ + [ + { + "id": "test", + "single_end": false + }, + "test.sortedByCoord.out.bam:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "bam_sorted_aligned": [ + [ + { + "id": "test", + "single_end": false + }, + "test.Aligned.sortedByCoord.out.bam:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "bam_transcript": [ + [ + { + "id": "test", + "single_end": false + }, + "test.toTranscriptome.out.bam:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "bam_unsorted": [ + [ + { + "id": "test", + "single_end": false + }, + "test.Aligned.unsort.out.bam:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "bedgraph": [ + [ + { + "id": "test", + "single_end": false + }, + "test.Signal.UniqueMultiple.str1.out.bg:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "fastq": [ + [ + { + "id": "test", + "single_end": false + }, + [ + "test.unmapped_1.fastq.gz:md5,68b329da9893e34099c7d8ad5cb9c940", + "test.unmapped_2.fastq.gz:md5,68b329da9893e34099c7d8ad5cb9c940" + ] + ] + ], + "junction": [ + [ + { + "id": "test", + "single_end": false + }, + "test.Chimeric.out.junction:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "log_final": [ + [ + { + "id": "test", + "single_end": false + }, + "test.Log.final.out:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "log_out": [ + [ + { + "id": "test", + "single_end": false + }, + "test.Log.out:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "log_progress": [ + [ + { + "id": "test", + "single_end": false + }, + "test.Log.progress.out:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "read_per_gene_tab": [ + [ + { + "id": "test", + "single_end": false + }, + "test.ReadsPerGene.out.tab:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "sam": [ + [ + { + "id": "test", + "single_end": false + }, + "test.out.sam:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "spl_junc_tab": [ + [ + { + "id": "test", + "single_end": false + }, + "test.SJ.out.tab:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "tab": [ + [ + { + "id": "test", + "single_end": false + }, + [ + "test.ReadsPerGene.out.tab:md5,d41d8cd98f00b204e9800998ecf8427e", + "test.SJ.out.tab:md5,d41d8cd98f00b204e9800998ecf8427e", + "test.tab:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ] + ], + "versions": [ + "versions.yml:md5,a149bba1dbb5194560abdd813c7848e3" + ], + "wig": [ + [ + { + "id": "test", + "single_end": false + }, + "test.Signal.UniqueMultiple.str1.out.wig:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ] + } + ], + "meta": { + "nf-test": "0.9.0", + "nextflow": "24.04.4" + }, + "timestamp": "2024-10-20T17:09:20.911466345" + }, + "homo_sapiens - paired_end - starfusion": { + "content": [ + "test.Log.final.out", + "test.Log.out", + "test.Log.progress.out", + "test.Chimeric.out.junction", + "caee9dcda13882d4913456973c25b57a", + [ + + ], + [ + + ], + [ + + ], + [ + + ], + [ + [ + { + "id": "test", + "single_end": false + }, + "test.SJ.out.tab:md5,19c3faa1bfa9a0cc5e4c45f17065b53a" + ] + ], + [ + [ + { + "id": "test", + "single_end": false + }, + "test.SJ.out.tab:md5,19c3faa1bfa9a0cc5e4c45f17065b53a" + ] + ], + [ + + ], + [ + "versions.yml:md5,a149bba1dbb5194560abdd813c7848e3" + ] + ], + "meta": { + "nf-test": "0.9.0", + "nextflow": "24.04.4" + }, + "timestamp": "2024-10-20T17:07:25.0639914" + }, + "homo_sapiens - paired_end - arriba": { + "content": [ + "test.Log.final.out", + "test.Log.out", + "test.Log.progress.out", + "1a3abe88fb2490589c58497d39921bcc", + [ + + ], + [ + + ], + [ + + ], + [ + + ], + [ + [ + { + "id": "test", + "single_end": false + }, + "test.SJ.out.tab:md5,5155c9fd1f787ad6d7d80987fb06219c" + ] + ], + [ + [ + { + "id": "test", + "single_end": false + }, + "test.SJ.out.tab:md5,5155c9fd1f787ad6d7d80987fb06219c" + ] + ], + [ + + ], + [ + "versions.yml:md5,a149bba1dbb5194560abdd813c7848e3" + ] + ], + "meta": { + "nf-test": "0.9.0", + "nextflow": "24.04.4" + }, + "timestamp": "2024-10-20T17:04:00.685784211" + }, + "homo_sapiens - paired_end - starfusion - stub": { + "content": [ + { + "0": [ + [ + { + "id": "test", + "single_end": false + }, + "test.Log.final.out:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "1": [ + [ + { + "id": "test", + "single_end": false + }, + "test.Log.out:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "10": [ + [ + { + "id": "test", + "single_end": false + }, + [ + "test.ReadsPerGene.out.tab:md5,d41d8cd98f00b204e9800998ecf8427e", + "test.SJ.out.tab:md5,d41d8cd98f00b204e9800998ecf8427e", + "test.tab:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ] + ], + "11": [ + [ + { + "id": "test", + "single_end": false + }, + "test.SJ.out.tab:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "12": [ + [ + { + "id": "test", + "single_end": false + }, + "test.ReadsPerGene.out.tab:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "13": [ + [ + { + "id": "test", + "single_end": false + }, + "test.Chimeric.out.junction:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "14": [ + [ + { + "id": "test", + "single_end": false + }, + "test.out.sam:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "15": [ + [ + { + "id": "test", + "single_end": false + }, + "test.Signal.UniqueMultiple.str1.out.wig:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "16": [ + [ + { + "id": "test", + "single_end": false + }, + "test.Signal.UniqueMultiple.str1.out.bg:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "2": [ + [ + { + "id": "test", + "single_end": false + }, + "test.Log.progress.out:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "3": [ + "versions.yml:md5,a149bba1dbb5194560abdd813c7848e3" + ], + "4": [ + [ + { + "id": "test", + "single_end": false + }, + [ + "test.Aligned.sortedByCoord.out.bam:md5,d41d8cd98f00b204e9800998ecf8427e", + "test.sortedByCoord.out.bam:md5,d41d8cd98f00b204e9800998ecf8427e", + "testXd.out.bam:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ] + ], + "5": [ + [ + { + "id": "test", + "single_end": false + }, + "test.sortedByCoord.out.bam:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "6": [ + [ + { + "id": "test", + "single_end": false + }, + "test.Aligned.sortedByCoord.out.bam:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "7": [ + [ + { + "id": "test", + "single_end": false + }, + "test.toTranscriptome.out.bam:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "8": [ + [ + { + "id": "test", + "single_end": false + }, + "test.Aligned.unsort.out.bam:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "9": [ + [ + { + "id": "test", + "single_end": false + }, + [ + "test.unmapped_1.fastq.gz:md5,68b329da9893e34099c7d8ad5cb9c940", + "test.unmapped_2.fastq.gz:md5,68b329da9893e34099c7d8ad5cb9c940" + ] + ] + ], + "bam": [ + [ + { + "id": "test", + "single_end": false + }, + [ + "test.Aligned.sortedByCoord.out.bam:md5,d41d8cd98f00b204e9800998ecf8427e", + "test.sortedByCoord.out.bam:md5,d41d8cd98f00b204e9800998ecf8427e", + "testXd.out.bam:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ] + ], + "bam_sorted": [ + [ + { + "id": "test", + "single_end": false + }, + "test.sortedByCoord.out.bam:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "bam_sorted_aligned": [ + [ + { + "id": "test", + "single_end": false + }, + "test.Aligned.sortedByCoord.out.bam:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "bam_transcript": [ + [ + { + "id": "test", + "single_end": false + }, + "test.toTranscriptome.out.bam:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "bam_unsorted": [ + [ + { + "id": "test", + "single_end": false + }, + "test.Aligned.unsort.out.bam:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "bedgraph": [ + [ + { + "id": "test", + "single_end": false + }, + "test.Signal.UniqueMultiple.str1.out.bg:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "fastq": [ + [ + { + "id": "test", + "single_end": false + }, + [ + "test.unmapped_1.fastq.gz:md5,68b329da9893e34099c7d8ad5cb9c940", + "test.unmapped_2.fastq.gz:md5,68b329da9893e34099c7d8ad5cb9c940" + ] + ] + ], + "junction": [ + [ + { + "id": "test", + "single_end": false + }, + "test.Chimeric.out.junction:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "log_final": [ + [ + { + "id": "test", + "single_end": false + }, + "test.Log.final.out:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "log_out": [ + [ + { + "id": "test", + "single_end": false + }, + "test.Log.out:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "log_progress": [ + [ + { + "id": "test", + "single_end": false + }, + "test.Log.progress.out:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "read_per_gene_tab": [ + [ + { + "id": "test", + "single_end": false + }, + "test.ReadsPerGene.out.tab:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "sam": [ + [ + { + "id": "test", + "single_end": false + }, + "test.out.sam:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "spl_junc_tab": [ + [ + { + "id": "test", + "single_end": false + }, + "test.SJ.out.tab:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "tab": [ + [ + { + "id": "test", + "single_end": false + }, + [ + "test.ReadsPerGene.out.tab:md5,d41d8cd98f00b204e9800998ecf8427e", + "test.SJ.out.tab:md5,d41d8cd98f00b204e9800998ecf8427e", + "test.tab:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ] + ], + "versions": [ + "versions.yml:md5,a149bba1dbb5194560abdd813c7848e3" + ], + "wig": [ + [ + { + "id": "test", + "single_end": false + }, + "test.Signal.UniqueMultiple.str1.out.wig:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ] + } + ], + "meta": { + "nf-test": "0.9.0", + "nextflow": "24.04.4" + }, + "timestamp": "2024-10-20T17:09:53.173671551" + } +} \ No newline at end of file diff --git a/modules/nf-core/star/align/tests/nextflow.arriba.config b/modules/nf-core/star/align/tests/nextflow.arriba.config new file mode 100644 index 00000000..cf09323f --- /dev/null +++ b/modules/nf-core/star/align/tests/nextflow.arriba.config @@ -0,0 +1,11 @@ +process { + + withName: STAR_GENOMEGENERATE { + ext.args = '--genomeSAindexNbases 9' + } + + withName: STAR_ALIGN { + ext.args = '--readFilesCommand zcat --outSAMtype BAM Unsorted --outSAMunmapped Within --outBAMcompression 0 --outFilterMultimapNmax 50 --peOverlapNbasesMin 10 --alignSplicedMateMapLminOverLmate 0.5 --alignSJstitchMismatchNmax 5 -1 5 5 --chimSegmentMin 10 --chimOutType WithinBAM HardClip --chimJunctionOverhangMin 10 --chimScoreDropMax 30 --chimScoreJunctionNonGTAG 0 --chimScoreSeparation 1 --chimSegmentReadGapMax 3 --chimMultimapNmax 50' + } + +} diff --git a/modules/nf-core/star/align/tests/nextflow.config b/modules/nf-core/star/align/tests/nextflow.config new file mode 100644 index 00000000..18bc2ee8 --- /dev/null +++ b/modules/nf-core/star/align/tests/nextflow.config @@ -0,0 +1,11 @@ +process { + + withName: STAR_GENOMEGENERATE { + ext.args = '--genomeSAindexNbases 9' + } + + withName: STAR_ALIGN { + ext.args = '--readFilesCommand zcat --outSAMtype BAM SortedByCoordinate --outWigType bedGraph --outWigStrand Unstranded' + } + +} diff --git a/modules/nf-core/star/align/tests/nextflow.starfusion.config b/modules/nf-core/star/align/tests/nextflow.starfusion.config new file mode 100644 index 00000000..7880bfcf --- /dev/null +++ b/modules/nf-core/star/align/tests/nextflow.starfusion.config @@ -0,0 +1,11 @@ +process { + + withName: STAR_GENOMEGENERATE { + ext.args = '--genomeSAindexNbases 9' + } + + withName: STAR_ALIGN { + ext.args = '--readFilesCommand zcat --outSAMtype BAM Unsorted --outReadsUnmapped None --twopassMode Basic --outSAMstrandField intronMotif --outSAMunmapped Within --chimSegmentMin 12 --chimJunctionOverhangMin 8 --chimOutJunctionFormat 1 --alignSJDBoverhangMin 10 --alignMatesGapMax 100000 --alignIntronMax 100000 --alignSJstitchMismatchNmax 5 -1 5 5 --chimMultimapScoreRange 3 --chimScoreJunctionNonGTAG -4 --chimMultimapNmax 20 --chimNonchimScoreDropMin 10 --peOverlapNbasesMin 12 --peOverlapMMp 0.1 --alignInsertionFlush Right --alignSplicedMateMapLminOverLmate 0 --alignSplicedMateMapLmin 30' + } + +} diff --git a/modules/nf-core/star/align/tests/tags.yml b/modules/nf-core/star/align/tests/tags.yml new file mode 100644 index 00000000..8beace16 --- /dev/null +++ b/modules/nf-core/star/align/tests/tags.yml @@ -0,0 +1,2 @@ +star/align: + - modules/nf-core/star/align/** diff --git a/modules/nf-core/star/genomegenerate/environment.yml b/modules/nf-core/star/genomegenerate/environment.yml new file mode 100644 index 00000000..7c57530a --- /dev/null +++ b/modules/nf-core/star/genomegenerate/environment.yml @@ -0,0 +1,9 @@ +channels: + - conda-forge + - bioconda + +dependencies: + - bioconda::htslib=1.20 + - bioconda::samtools=1.20 + - bioconda::star=2.7.11b + - conda-forge::gawk=5.1.0 diff --git a/modules/nf-core/star/genomegenerate/main.nf b/modules/nf-core/star/genomegenerate/main.nf new file mode 100644 index 00000000..8f0c67e7 --- /dev/null +++ b/modules/nf-core/star/genomegenerate/main.nf @@ -0,0 +1,119 @@ +process STAR_GENOMEGENERATE { + tag "$fasta" + label 'process_high' + + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://community-cr-prod.seqera.io/docker/registry/v2/blobs/sha256/b4/b425bc2a95806d878993f9a66dae3ae80ac2dafff4c208c5ae01b7a90a32fa91/data' : + 'community.wave.seqera.io/library/star_samtools_htslib_gawk:10c6e8c834460019' }" + + input: + tuple val(meta), path(fasta) + tuple val(meta2), path(gtf) + + output: + tuple val(meta), path("star") , emit: index + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def args_list = args.tokenize() + def memory = task.memory ? "--limitGenomeGenerateRAM ${task.memory.toBytes() - 100000000}" : '' + def include_gtf = gtf ? "--sjdbGTFfile $gtf" : '' + if (args_list.contains('--genomeSAindexNbases')) { + """ + mkdir star + STAR \\ + --runMode genomeGenerate \\ + --genomeDir star/ \\ + --genomeFastaFiles $fasta \\ + $include_gtf \\ + --runThreadN $task.cpus \\ + $memory \\ + $args + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + star: \$(STAR --version | sed -e "s/STAR_//g") + samtools: \$(echo \$(samtools --version 2>&1) | sed 's/^.*samtools //; s/Using.*\$//') + gawk: \$(echo \$(gawk --version 2>&1) | sed 's/^.*GNU Awk //; s/, .*\$//') + END_VERSIONS + """ + } else { + """ + samtools faidx $fasta + NUM_BASES=`gawk '{sum = sum + \$2}END{if ((log(sum)/log(2))/2 - 1 > 14) {printf "%.0f", 14} else {printf "%.0f", (log(sum)/log(2))/2 - 1}}' ${fasta}.fai` + + mkdir star + STAR \\ + --runMode genomeGenerate \\ + --genomeDir star/ \\ + --genomeFastaFiles $fasta \\ + $include_gtf \\ + --runThreadN $task.cpus \\ + --genomeSAindexNbases \$NUM_BASES \\ + $memory \\ + $args + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + star: \$(STAR --version | sed -e "s/STAR_//g") + samtools: \$(echo \$(samtools --version 2>&1) | sed 's/^.*samtools //; s/Using.*\$//') + gawk: \$(echo \$(gawk --version 2>&1) | sed 's/^.*GNU Awk //; s/, .*\$//') + END_VERSIONS + """ + } + + stub: + if (gtf) { + """ + mkdir star + touch star/Genome + touch star/Log.out + touch star/SA + touch star/SAindex + touch star/chrLength.txt + touch star/chrName.txt + touch star/chrNameLength.txt + touch star/chrStart.txt + touch star/exonGeTrInfo.tab + touch star/exonInfo.tab + touch star/geneInfo.tab + touch star/genomeParameters.txt + touch star/sjdbInfo.txt + touch star/sjdbList.fromGTF.out.tab + touch star/sjdbList.out.tab + touch star/transcriptInfo.tab + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + star: \$(STAR --version | sed -e "s/STAR_//g") + samtools: \$(echo \$(samtools --version 2>&1) | sed 's/^.*samtools //; s/Using.*\$//') + gawk: \$(echo \$(gawk --version 2>&1) | sed 's/^.*GNU Awk //; s/, .*\$//') + END_VERSIONS + """ + } else { + """ + mkdir star + touch star/Genome + touch star/Log.out + touch star/SA + touch star/SAindex + touch star/chrLength.txt + touch star/chrName.txt + touch star/chrNameLength.txt + touch star/chrStart.txt + touch star/genomeParameters.txt + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + star: \$(STAR --version | sed -e "s/STAR_//g") + samtools: \$(echo \$(samtools --version 2>&1) | sed 's/^.*samtools //; s/Using.*\$//') + gawk: \$(echo \$(gawk --version 2>&1) | sed 's/^.*GNU Awk //; s/, .*\$//') + END_VERSIONS + """ + } +} diff --git a/modules/nf-core/star/genomegenerate/meta.yml b/modules/nf-core/star/genomegenerate/meta.yml new file mode 100644 index 00000000..33c1f65f --- /dev/null +++ b/modules/nf-core/star/genomegenerate/meta.yml @@ -0,0 +1,56 @@ +name: star_genomegenerate +description: Create index for STAR +keywords: + - index + - fasta + - genome + - reference +tools: + - star: + description: | + STAR is a software package for mapping DNA sequences against + a large reference genome, such as the human genome. + homepage: https://github.com/alexdobin/STAR + manual: https://github.com/alexdobin/STAR/blob/master/doc/STARmanual.pdf + doi: 10.1093/bioinformatics/bts635 + licence: ["MIT"] + identifier: biotools:star +input: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - fasta: + type: file + description: Fasta file of the reference genome + - - meta2: + type: map + description: | + Groovy Map containing reference information + e.g. [ id:'test' ] + - gtf: + type: file + description: GTF file of the reference genome +output: + - index: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - star: + type: directory + description: Folder containing the star index files + pattern: "star" + - versions: + - versions.yml: + type: file + description: File containing software versions + pattern: "versions.yml" +authors: + - "@kevinmenden" + - "@drpatelh" +maintainers: + - "@kevinmenden" + - "@drpatelh" diff --git a/modules/nf-core/star/genomegenerate/tests/main.nf.test b/modules/nf-core/star/genomegenerate/tests/main.nf.test new file mode 100644 index 00000000..4d619c47 --- /dev/null +++ b/modules/nf-core/star/genomegenerate/tests/main.nf.test @@ -0,0 +1,114 @@ +nextflow_process { + + name "Test Process STAR_GENOMEGENERATE" + script "../main.nf" + process "STAR_GENOMEGENERATE" + tag "modules" + tag "modules_nfcore" + tag "star" + tag "star/genomegenerate" + + test("fasta_gtf") { + + when { + process { + """ + input[0] = Channel.of([ + [ id:'test_fasta' ], + [ file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.fasta', checkIfExists: true) ] + ]) + input[1] = Channel.of([ + [ id:'test_gtf' ], + [ file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.gtf', checkIfExists: true) ] + ]) + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot( + file(process.out.index[0][1]).listFiles().collect { it.getName() }.sort().toString(), + process.out.versions) + .match() } + ) + } + } + + test("fasta") { + + when { + process { + """ + input[0] = Channel.of([ + [ id:'test_fasta' ], + [ file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.fasta', checkIfExists: true) ] + ]) + input[1] = Channel.of([ [], [] ]) + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot( + file(process.out.index[0][1]).listFiles().collect { it.getName() }.sort().toString(), + process.out.versions + ).match() } + ) + } + } + + test("fasta_gtf_stub") { + + options '-stub' + + when { + process { + """ + input[0] = Channel.of([ + [ id:'test_fasta' ], + [ file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.fasta', checkIfExists: true) ] + ]) + input[1] = Channel.of([ + [ id:'test_gtf' ], + [ file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.gtf', checkIfExists: true) ] + ]) + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + } + + test("fasta_stub") { + + options '-stub' + + when { + process { + """ + input[0] = Channel.of([ + [ id:'test_fasta' ], + [ file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.fasta', checkIfExists: true) ] + ]) + input[1] = Channel.of([ [], [] ]) + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + } +} diff --git a/modules/nf-core/star/genomegenerate/tests/main.nf.test.snap b/modules/nf-core/star/genomegenerate/tests/main.nf.test.snap new file mode 100644 index 00000000..3db25678 --- /dev/null +++ b/modules/nf-core/star/genomegenerate/tests/main.nf.test.snap @@ -0,0 +1,148 @@ +{ + "fasta_gtf": { + "content": [ + "[Genome, Log.out, SA, SAindex, chrLength.txt, chrName.txt, chrNameLength.txt, chrStart.txt, exonGeTrInfo.tab, exonInfo.tab, geneInfo.tab, genomeParameters.txt, sjdbInfo.txt, sjdbList.fromGTF.out.tab, sjdbList.out.tab, transcriptInfo.tab]", + [ + "versions.yml:md5,14b05d04c9eca568e9ed4888aaf26fa6" + ] + ], + "meta": { + "nf-test": "0.9.0", + "nextflow": "24.04.4" + }, + "timestamp": "2024-10-19T20:37:47.410432728" + }, + "fasta_gtf_stub": { + "content": [ + { + "0": [ + [ + { + "id": "test_fasta" + }, + [ + "Genome:md5,d41d8cd98f00b204e9800998ecf8427e", + "Log.out:md5,d41d8cd98f00b204e9800998ecf8427e", + "SA:md5,d41d8cd98f00b204e9800998ecf8427e", + "SAindex:md5,d41d8cd98f00b204e9800998ecf8427e", + "chrLength.txt:md5,d41d8cd98f00b204e9800998ecf8427e", + "chrName.txt:md5,d41d8cd98f00b204e9800998ecf8427e", + "chrNameLength.txt:md5,d41d8cd98f00b204e9800998ecf8427e", + "chrStart.txt:md5,d41d8cd98f00b204e9800998ecf8427e", + "exonGeTrInfo.tab:md5,d41d8cd98f00b204e9800998ecf8427e", + "exonInfo.tab:md5,d41d8cd98f00b204e9800998ecf8427e", + "geneInfo.tab:md5,d41d8cd98f00b204e9800998ecf8427e", + "genomeParameters.txt:md5,d41d8cd98f00b204e9800998ecf8427e", + "sjdbInfo.txt:md5,d41d8cd98f00b204e9800998ecf8427e", + "sjdbList.fromGTF.out.tab:md5,d41d8cd98f00b204e9800998ecf8427e", + "sjdbList.out.tab:md5,d41d8cd98f00b204e9800998ecf8427e", + "transcriptInfo.tab:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ] + ], + "1": [ + "versions.yml:md5,14b05d04c9eca568e9ed4888aaf26fa6" + ], + "index": [ + [ + { + "id": "test_fasta" + }, + [ + "Genome:md5,d41d8cd98f00b204e9800998ecf8427e", + "Log.out:md5,d41d8cd98f00b204e9800998ecf8427e", + "SA:md5,d41d8cd98f00b204e9800998ecf8427e", + "SAindex:md5,d41d8cd98f00b204e9800998ecf8427e", + "chrLength.txt:md5,d41d8cd98f00b204e9800998ecf8427e", + "chrName.txt:md5,d41d8cd98f00b204e9800998ecf8427e", + "chrNameLength.txt:md5,d41d8cd98f00b204e9800998ecf8427e", + "chrStart.txt:md5,d41d8cd98f00b204e9800998ecf8427e", + "exonGeTrInfo.tab:md5,d41d8cd98f00b204e9800998ecf8427e", + "exonInfo.tab:md5,d41d8cd98f00b204e9800998ecf8427e", + "geneInfo.tab:md5,d41d8cd98f00b204e9800998ecf8427e", + "genomeParameters.txt:md5,d41d8cd98f00b204e9800998ecf8427e", + "sjdbInfo.txt:md5,d41d8cd98f00b204e9800998ecf8427e", + "sjdbList.fromGTF.out.tab:md5,d41d8cd98f00b204e9800998ecf8427e", + "sjdbList.out.tab:md5,d41d8cd98f00b204e9800998ecf8427e", + "transcriptInfo.tab:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ] + ], + "versions": [ + "versions.yml:md5,14b05d04c9eca568e9ed4888aaf26fa6" + ] + } + ], + "meta": { + "nf-test": "0.9.0", + "nextflow": "24.04.4" + }, + "timestamp": "2024-10-19T20:38:09.165234795" + }, + "fasta_stub": { + "content": [ + { + "0": [ + [ + { + "id": "test_fasta" + }, + [ + "Genome:md5,d41d8cd98f00b204e9800998ecf8427e", + "Log.out:md5,d41d8cd98f00b204e9800998ecf8427e", + "SA:md5,d41d8cd98f00b204e9800998ecf8427e", + "SAindex:md5,d41d8cd98f00b204e9800998ecf8427e", + "chrLength.txt:md5,d41d8cd98f00b204e9800998ecf8427e", + "chrName.txt:md5,d41d8cd98f00b204e9800998ecf8427e", + "chrNameLength.txt:md5,d41d8cd98f00b204e9800998ecf8427e", + "chrStart.txt:md5,d41d8cd98f00b204e9800998ecf8427e", + "genomeParameters.txt:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ] + ], + "1": [ + "versions.yml:md5,14b05d04c9eca568e9ed4888aaf26fa6" + ], + "index": [ + [ + { + "id": "test_fasta" + }, + [ + "Genome:md5,d41d8cd98f00b204e9800998ecf8427e", + "Log.out:md5,d41d8cd98f00b204e9800998ecf8427e", + "SA:md5,d41d8cd98f00b204e9800998ecf8427e", + "SAindex:md5,d41d8cd98f00b204e9800998ecf8427e", + "chrLength.txt:md5,d41d8cd98f00b204e9800998ecf8427e", + "chrName.txt:md5,d41d8cd98f00b204e9800998ecf8427e", + "chrNameLength.txt:md5,d41d8cd98f00b204e9800998ecf8427e", + "chrStart.txt:md5,d41d8cd98f00b204e9800998ecf8427e", + "genomeParameters.txt:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ] + ], + "versions": [ + "versions.yml:md5,14b05d04c9eca568e9ed4888aaf26fa6" + ] + } + ], + "meta": { + "nf-test": "0.9.0", + "nextflow": "24.04.4" + }, + "timestamp": "2024-10-19T20:38:19.530862664" + }, + "fasta": { + "content": [ + "[Genome, Log.out, SA, SAindex, chrLength.txt, chrName.txt, chrNameLength.txt, chrStart.txt, genomeParameters.txt]", + [ + "versions.yml:md5,14b05d04c9eca568e9ed4888aaf26fa6" + ] + ], + "meta": { + "nf-test": "0.9.0", + "nextflow": "24.04.4" + }, + "timestamp": "2024-10-19T20:37:58.667436398" + } +} \ No newline at end of file diff --git a/modules/nf-core/star/genomegenerate/tests/tags.yml b/modules/nf-core/star/genomegenerate/tests/tags.yml new file mode 100644 index 00000000..79f619bf --- /dev/null +++ b/modules/nf-core/star/genomegenerate/tests/tags.yml @@ -0,0 +1,2 @@ +star/genomegenerate: + - modules/nf-core/star/genomegenerate/** diff --git a/modules/nf-core/stringtie/merge/environment.yml b/modules/nf-core/stringtie/merge/environment.yml new file mode 100644 index 00000000..0556de41 --- /dev/null +++ b/modules/nf-core/stringtie/merge/environment.yml @@ -0,0 +1,5 @@ +channels: + - conda-forge + - bioconda +dependencies: + - bioconda::stringtie=2.2.1 diff --git a/modules/nf-core/stringtie/merge/main.nf b/modules/nf-core/stringtie/merge/main.nf new file mode 100644 index 00000000..c2568219 --- /dev/null +++ b/modules/nf-core/stringtie/merge/main.nf @@ -0,0 +1,46 @@ +process STRINGTIE_MERGE { + label 'process_medium' + + // Note: 2.7X indices incompatible with AWS iGenomes. + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/stringtie:2.2.1--hecb563c_2' : + 'biocontainers/stringtie:2.2.1--hecb563c_2' }" + + input: + path stringtie_gtf + path annotation_gtf + + output: + path "stringtie.merged.gtf", emit: gtf + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def reference = annotation_gtf ? "-G $annotation_gtf" : "" + """ + stringtie \\ + --merge $stringtie_gtf \\ + $reference \\ + -o stringtie.merged.gtf \\ + $args + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + stringtie: \$(stringtie --version 2>&1) + END_VERSIONS + """ + + stub: + """ + touch stringtie.merged.gtf + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + stringtie: \$(stringtie --version 2>&1) + END_VERSIONS + """ +} diff --git a/modules/nf-core/stringtie/merge/meta.yml b/modules/nf-core/stringtie/merge/meta.yml new file mode 100644 index 00000000..cf6902b3 --- /dev/null +++ b/modules/nf-core/stringtie/merge/meta.yml @@ -0,0 +1,40 @@ +name: stringtie_merge +description: Merges the annotation gtf file and the stringtie output gtf files +keywords: + - merge + - gtf + - reference +tools: + - stringtie2: + description: | + Transcript assembly and quantification for RNA-Seq + homepage: https://ccb.jhu.edu/software/stringtie/index.shtml + documentation: https://ccb.jhu.edu/software/stringtie/index.shtml?t=manual + licence: ["MIT"] + identifier: biotools:stringtie +input: + - - stringtie_gtf: + type: file + description: | + Stringtie transcript gtf output(s). + pattern: "*.gtf" + - - annotation_gtf: + type: file + description: | + Annotation gtf file (optional). + pattern: "*.gtf" +output: + - gtf: + - stringtie.merged.gtf: + type: file + description: Merged gtf file + pattern: "stringtie.merged.gtf" + - versions: + - versions.yml: + type: file + description: File containing software versions + pattern: "versions.yml" +authors: + - "@yuukiiwa" +maintainers: + - "@yuukiiwa" diff --git a/modules/nf-core/stringtie/merge/tests/main.nf.test b/modules/nf-core/stringtie/merge/tests/main.nf.test new file mode 100644 index 00000000..bcc648bc --- /dev/null +++ b/modules/nf-core/stringtie/merge/tests/main.nf.test @@ -0,0 +1,66 @@ +nextflow_process { + + name "Test Process STRINGTIE_MERGE" + script "../main.nf" + process "STRINGTIE_MERGE" + tag "modules" + tag "modules_nfcore" + tag "stringtie" + tag "stringtie/merge" + tag "stringtie/stringtie" + + setup { + run("STRINGTIE_STRINGTIE") { + script "../../stringtie/main.nf" + process { + """ + input[0] = [ + [ id:'test', strandedness:'reverse' ], // meta map + [ file(params.modules_testdata_base_path + "genomics/homo_sapiens/illumina/bam/test.paired_end.sorted.bam", checkIfExists: true) ] + ] + input[1] = file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.gtf', checkIfExists: true) + """ + } + } + } + + test("homo_sapiens - forward strandedness") { + + when { + process { + """ + input[0] = STRINGTIE_STRINGTIE.out.transcript_gtf.map { it -> it[1] } + input[1] = file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.gtf', checkIfExists: true) + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out.gtf).match("fs_gtf") }, + { assert snapshot(process.out.versions).match("fs_versions") } + ) + } + } + + test("homo_sapiens - reverse strandedness") { + + when { + process { + """ + input[0] = STRINGTIE_STRINGTIE.out.transcript_gtf.map { it -> it[1] } + input[1] = file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.gtf', checkIfExists: true) + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out.gtf).match("rs_gtf") }, + { assert snapshot(process.out.versions).match("rs_versions") } + ) + } + } +} diff --git a/modules/nf-core/stringtie/merge/tests/main.nf.test.snap b/modules/nf-core/stringtie/merge/tests/main.nf.test.snap new file mode 100644 index 00000000..e1040696 --- /dev/null +++ b/modules/nf-core/stringtie/merge/tests/main.nf.test.snap @@ -0,0 +1,50 @@ +{ + "rs_versions": { + "content": [ + [ + "versions.yml:md5,b73d45fdebf4c8c446bb01817db1665d" + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2023-11-23T14:14:39.697712988" + }, + "rs_gtf": { + "content": [ + [ + "stringtie.merged.gtf:md5,6da479298d73d5b3216d4e1576a2bdf4" + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2023-11-23T14:14:39.691894799" + }, + "fs_gtf": { + "content": [ + [ + "stringtie.merged.gtf:md5,6da479298d73d5b3216d4e1576a2bdf4" + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-02-07T16:43:48.130184" + }, + "fs_versions": { + "content": [ + [ + "versions.yml:md5,b73d45fdebf4c8c446bb01817db1665d" + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2023-11-23T14:14:20.883140097" + } +} \ No newline at end of file diff --git a/modules/nf-core/stringtie/merge/tests/tags.yml b/modules/nf-core/stringtie/merge/tests/tags.yml new file mode 100644 index 00000000..58cef46b --- /dev/null +++ b/modules/nf-core/stringtie/merge/tests/tags.yml @@ -0,0 +1,2 @@ +stringtie/merge: + - modules/nf-core/stringtie/merge/** diff --git a/modules/nf-core/stringtie/stringtie/environment.yml b/modules/nf-core/stringtie/stringtie/environment.yml new file mode 100644 index 00000000..906b7486 --- /dev/null +++ b/modules/nf-core/stringtie/stringtie/environment.yml @@ -0,0 +1,5 @@ +channels: + - conda-forge + - bioconda +dependencies: + - bioconda::stringtie=2.2.3 diff --git a/modules/nf-core/stringtie/stringtie/main.nf b/modules/nf-core/stringtie/stringtie/main.nf new file mode 100644 index 00000000..4635c8c5 --- /dev/null +++ b/modules/nf-core/stringtie/stringtie/main.nf @@ -0,0 +1,68 @@ +process STRINGTIE_STRINGTIE { + tag "$meta.id" + label 'process_medium' + + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/stringtie:2.2.3--h43eeafb_0' : + 'biocontainers/stringtie:2.2.3--h43eeafb_0' }" + + input: + tuple val(meta), path(bam) + path annotation_gtf + + output: + tuple val(meta), path("*.transcripts.gtf"), emit: transcript_gtf + tuple val(meta), path("*.abundance.txt") , emit: abundance + tuple val(meta), path("*.coverage.gtf") , optional: true, emit: coverage_gtf + tuple val(meta), path("*.ballgown") , optional: true, emit: ballgown + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + def reference = annotation_gtf ? "-G $annotation_gtf" : "" + def ballgown = annotation_gtf ? "-b ${prefix}.ballgown" : "" + def coverage = annotation_gtf ? "-C ${prefix}.coverage.gtf" : "" + + def strandedness = '' + if (meta.strandedness == 'forward') { + strandedness = '--fr' + } else if (meta.strandedness == 'reverse') { + strandedness = '--rf' + } + """ + stringtie \\ + $bam \\ + $strandedness \\ + $reference \\ + -o ${prefix}.transcripts.gtf \\ + -A ${prefix}.gene.abundance.txt \\ + $coverage \\ + $ballgown \\ + -p $task.cpus \\ + $args + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + stringtie: \$(stringtie --version 2>&1) + END_VERSIONS + """ + + stub: + def prefix = task.ext.prefix ?: "${meta.id}" + """ + touch ${prefix}.transcripts.gtf + touch ${prefix}.gene.abundance.txt + touch ${prefix}.coverage.gtf + touch ${prefix}.ballgown + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + stringtie: \$(stringtie --version 2>&1) + END_VERSIONS + """ +} diff --git a/modules/nf-core/stringtie/stringtie/meta.yml b/modules/nf-core/stringtie/stringtie/meta.yml new file mode 100644 index 00000000..e55b2abf --- /dev/null +++ b/modules/nf-core/stringtie/stringtie/meta.yml @@ -0,0 +1,79 @@ +name: stringtie_stringtie +description: Transcript assembly and quantification for RNA-Se +keywords: + - transcript + - assembly + - quantification + - gtf +tools: + - stringtie2: + description: | + Transcript assembly and quantification for RNA-Seq + homepage: https://ccb.jhu.edu/software/stringtie/index.shtml + documentation: https://ccb.jhu.edu/software/stringtie/index.shtml?t=manual + licence: ["MIT"] + identifier: biotools:stringtie +input: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - bam: + type: file + description: | + Stringtie transcript gtf output(s). + - - annotation_gtf: + type: file + description: | + Annotation gtf file (optional). +output: + - transcript_gtf: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*.transcripts.gtf": + type: file + description: transcript gtf + pattern: "*.{transcripts.gtf}" + - abundance: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*.abundance.txt": + type: file + description: abundance + pattern: "*.{abundance.txt}" + - coverage_gtf: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*.coverage.gtf": + type: file + description: coverage gtf + pattern: "*.{coverage.gtf}" + - ballgown: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*.ballgown": + type: file + description: for running ballgown + pattern: "*.{ballgown}" + - versions: + - versions.yml: + type: file + description: File containing software versions + pattern: "versions.yml" +authors: + - "@drpatelh" +maintainers: + - "@drpatelh" diff --git a/modules/nf-core/stringtie/stringtie/tests/main.nf.test b/modules/nf-core/stringtie/stringtie/tests/main.nf.test new file mode 100644 index 00000000..2204e849 --- /dev/null +++ b/modules/nf-core/stringtie/stringtie/tests/main.nf.test @@ -0,0 +1,213 @@ +nextflow_process { + + name "Test Process STRINGTIE_STRINGTIE" + script "../main.nf" + process "STRINGTIE_STRINGTIE" + config "./nextflow.config" + tag "modules" + tag "modules_nfcore" + tag "stringtie" + tag "stringtie/stringtie" + + test("sarscov2 [bam] - forward strandedness") { + + when { + process { + """ + input[0] = [ + [ id:'test', strandedness:'forward' ], // meta map + [ file(params.modules_testdata_base_path + "genomics/sarscov2/illumina/bam/test.paired_end.sorted.bam", checkIfExists: true) ] + ] + input[1] = [] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot( + process.out.abundance, + process.out.transcript_gtf, + process.out.versions + ).match() } + ) + } + } + + test("sarscov2 [bam] - forward strandedness + reference annotation") { + + when { + process { + """ + input[0] = [ + [ id:'test', strandedness:'forward' ], // meta map + [ file(params.modules_testdata_base_path + "genomics/sarscov2/illumina/bam/test.paired_end.sorted.bam", checkIfExists: true) ] + ] + input[1] = file(params.modules_testdata_base_path + "genomics/sarscov2/genome/genome.gtf", checkIfExists: true) + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot( + process.out.abundance, + process.out.ballgown, + process.out.transcript_gtf, + process.out.versions + ).match() } + ) + } + } + + test("sarscov2 [bam] - reverse strandedness") { + + when { + process { + """ + input[0] = [ + [ id:'test', strandedness:'reverse' ], // meta map + [ file(params.modules_testdata_base_path + "genomics/sarscov2/illumina/bam/test.paired_end.sorted.bam", checkIfExists: true) ] + ] + input[1] = [] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot( + process.out.abundance, + process.out.transcript_gtf, + process.out.versions + ).match() } + ) + } + } + + test("sarscov2 [bam] - reverse strandedness + reference annotation") { + + when { + process { + """ + input[0] = [ + [ id:'test', strandedness:'reverse' ], // meta map + [ file(params.modules_testdata_base_path + "genomics/sarscov2/illumina/bam/test.paired_end.sorted.bam", checkIfExists: true) ] + ] + input[1] = file(params.modules_testdata_base_path + "genomics/sarscov2/genome/genome.gtf", checkIfExists: true) + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot( + process.out.abundance, + process.out.ballgown, + process.out.transcript_gtf, + process.out.versions + ).match() } + ) + } + } + + test("sarscov2 [bam] - forward strandedness - stub") { + + options "-stub" + + when { + process { + """ + input[0] = [ + [ id:'test', strandedness:'forward' ], // meta map + [ file(params.modules_testdata_base_path + "genomics/sarscov2/illumina/bam/test.paired_end.sorted.bam", checkIfExists: true) ] + ] + input[1] = [] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + } + + test("sarscov2 [bam] - forward strandedness + reference annotation - stub") { + + options "-stub" + + when { + process { + """ + input[0] = [ + [ id:'test', strandedness:'forward' ], // meta map + [ file(params.modules_testdata_base_path + "genomics/sarscov2/illumina/bam/test.paired_end.sorted.bam", checkIfExists: true) ] + ] + input[1] = file(params.modules_testdata_base_path + "genomics/sarscov2/genome/genome.gtf", checkIfExists: true) + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + } + + test("sarscov2 [bam] - reverse strandedness - stub") { + + options "-stub" + + when { + process { + """ + input[0] = [ + [ id:'test', strandedness:'reverse' ], // meta map + [ file(params.modules_testdata_base_path + "genomics/sarscov2/illumina/bam/test.paired_end.sorted.bam", checkIfExists: true) ] + ] + input[1] = [] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + } + + test("sarscov2 [bam] - reverse strandedness + reference annotation - stub") { + + options "-stub" + + when { + process { + """ + input[0] = [ + [ id:'test', strandedness:'reverse' ], // meta map + [ file(params.modules_testdata_base_path + "genomics/sarscov2/illumina/bam/test.paired_end.sorted.bam", checkIfExists: true) ] + ] + input[1] = file(params.modules_testdata_base_path + "genomics/sarscov2/genome/genome.gtf", checkIfExists: true) + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + } +} diff --git a/modules/nf-core/stringtie/stringtie/tests/main.nf.test.snap b/modules/nf-core/stringtie/stringtie/tests/main.nf.test.snap new file mode 100644 index 00000000..d4645de3 --- /dev/null +++ b/modules/nf-core/stringtie/stringtie/tests/main.nf.test.snap @@ -0,0 +1,508 @@ +{ + "sarscov2 [bam] - forward strandedness + reference annotation": { + "content": [ + [ + [ + { + "id": "test", + "strandedness": "forward" + }, + "test.gene.abundance.txt:md5,7d8bce7f2a922e367cedccae7267c22e" + ] + ], + [ + [ + { + "id": "test", + "strandedness": "forward" + }, + [ + "e2t.ctab:md5,e981c0038295ae54b63cedb1083f1540", + "e_data.ctab:md5,6b4cf69bc03f3f69890f972a0e8b7471", + "i2t.ctab:md5,8a117c8aa4334b4c2d4711932b006fb4", + "i_data.ctab:md5,be3abe09740603213f83d50dcf81427f", + "t_data.ctab:md5,3b66c065da73ae0dd41cc332eff6a818" + ] + ] + ], + [ + [ + { + "id": "test", + "strandedness": "forward" + }, + "test.transcripts.gtf:md5,37154e7bda96544f24506ee902bb561d" + ] + ], + [ + "versions.yml:md5,06593ea00cc35bf06f2de2753e0c3913" + ] + ], + "meta": { + "nf-test": "0.9.0", + "nextflow": "24.04.4" + }, + "timestamp": "2024-10-18T09:56:50.294157199" + }, + "sarscov2 [bam] - forward strandedness": { + "content": [ + [ + [ + { + "id": "test", + "strandedness": "forward" + }, + "test.gene.abundance.txt:md5,d6f5c8cadb8458f1df0427cf790246e3" + ] + ], + [ + [ + { + "id": "test", + "strandedness": "forward" + }, + "test.transcripts.gtf:md5,6087dfc9700a52d9e4a1ae3fcd1d1dfd" + ] + ], + [ + "versions.yml:md5,06593ea00cc35bf06f2de2753e0c3913" + ] + ], + "meta": { + "nf-test": "0.9.0", + "nextflow": "24.04.4" + }, + "timestamp": "2024-10-18T09:56:39.4249133" + }, + "sarscov2 [bam] - forward strandedness - stub": { + "content": [ + { + "0": [ + [ + { + "id": "test", + "strandedness": "forward" + }, + "test.transcripts.gtf:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "1": [ + [ + { + "id": "test", + "strandedness": "forward" + }, + "test.gene.abundance.txt:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "2": [ + [ + { + "id": "test", + "strandedness": "forward" + }, + "test.coverage.gtf:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "3": [ + [ + { + "id": "test", + "strandedness": "forward" + }, + "test.ballgown:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "4": [ + "versions.yml:md5,06593ea00cc35bf06f2de2753e0c3913" + ], + "abundance": [ + [ + { + "id": "test", + "strandedness": "forward" + }, + "test.gene.abundance.txt:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "ballgown": [ + [ + { + "id": "test", + "strandedness": "forward" + }, + "test.ballgown:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "coverage_gtf": [ + [ + { + "id": "test", + "strandedness": "forward" + }, + "test.coverage.gtf:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "transcript_gtf": [ + [ + { + "id": "test", + "strandedness": "forward" + }, + "test.transcripts.gtf:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "versions": [ + "versions.yml:md5,06593ea00cc35bf06f2de2753e0c3913" + ] + } + ], + "meta": { + "nf-test": "0.9.0", + "nextflow": "24.04.4" + }, + "timestamp": "2024-10-18T09:57:23.008470065" + }, + "sarscov2 [bam] - forward strandedness + reference annotation - stub": { + "content": [ + { + "0": [ + [ + { + "id": "test", + "strandedness": "forward" + }, + "test.transcripts.gtf:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "1": [ + [ + { + "id": "test", + "strandedness": "forward" + }, + "test.gene.abundance.txt:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "2": [ + [ + { + "id": "test", + "strandedness": "forward" + }, + "test.coverage.gtf:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "3": [ + [ + { + "id": "test", + "strandedness": "forward" + }, + "test.ballgown:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "4": [ + "versions.yml:md5,06593ea00cc35bf06f2de2753e0c3913" + ], + "abundance": [ + [ + { + "id": "test", + "strandedness": "forward" + }, + "test.gene.abundance.txt:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "ballgown": [ + [ + { + "id": "test", + "strandedness": "forward" + }, + "test.ballgown:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "coverage_gtf": [ + [ + { + "id": "test", + "strandedness": "forward" + }, + "test.coverage.gtf:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "transcript_gtf": [ + [ + { + "id": "test", + "strandedness": "forward" + }, + "test.transcripts.gtf:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "versions": [ + "versions.yml:md5,06593ea00cc35bf06f2de2753e0c3913" + ] + } + ], + "meta": { + "nf-test": "0.9.0", + "nextflow": "24.04.4" + }, + "timestamp": "2024-10-18T09:57:33.622824981" + }, + "sarscov2 [bam] - reverse strandedness + reference annotation - stub": { + "content": [ + { + "0": [ + [ + { + "id": "test", + "strandedness": "reverse" + }, + "test.transcripts.gtf:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "1": [ + [ + { + "id": "test", + "strandedness": "reverse" + }, + "test.gene.abundance.txt:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "2": [ + [ + { + "id": "test", + "strandedness": "reverse" + }, + "test.coverage.gtf:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "3": [ + [ + { + "id": "test", + "strandedness": "reverse" + }, + "test.ballgown:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "4": [ + "versions.yml:md5,06593ea00cc35bf06f2de2753e0c3913" + ], + "abundance": [ + [ + { + "id": "test", + "strandedness": "reverse" + }, + "test.gene.abundance.txt:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "ballgown": [ + [ + { + "id": "test", + "strandedness": "reverse" + }, + "test.ballgown:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "coverage_gtf": [ + [ + { + "id": "test", + "strandedness": "reverse" + }, + "test.coverage.gtf:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "transcript_gtf": [ + [ + { + "id": "test", + "strandedness": "reverse" + }, + "test.transcripts.gtf:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "versions": [ + "versions.yml:md5,06593ea00cc35bf06f2de2753e0c3913" + ] + } + ], + "meta": { + "nf-test": "0.9.0", + "nextflow": "24.04.4" + }, + "timestamp": "2024-10-18T09:57:55.803421433" + }, + "sarscov2 [bam] - reverse strandedness - stub": { + "content": [ + { + "0": [ + [ + { + "id": "test", + "strandedness": "reverse" + }, + "test.transcripts.gtf:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "1": [ + [ + { + "id": "test", + "strandedness": "reverse" + }, + "test.gene.abundance.txt:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "2": [ + [ + { + "id": "test", + "strandedness": "reverse" + }, + "test.coverage.gtf:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "3": [ + [ + { + "id": "test", + "strandedness": "reverse" + }, + "test.ballgown:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "4": [ + "versions.yml:md5,06593ea00cc35bf06f2de2753e0c3913" + ], + "abundance": [ + [ + { + "id": "test", + "strandedness": "reverse" + }, + "test.gene.abundance.txt:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "ballgown": [ + [ + { + "id": "test", + "strandedness": "reverse" + }, + "test.ballgown:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "coverage_gtf": [ + [ + { + "id": "test", + "strandedness": "reverse" + }, + "test.coverage.gtf:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "transcript_gtf": [ + [ + { + "id": "test", + "strandedness": "reverse" + }, + "test.transcripts.gtf:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "versions": [ + "versions.yml:md5,06593ea00cc35bf06f2de2753e0c3913" + ] + } + ], + "meta": { + "nf-test": "0.9.0", + "nextflow": "24.04.4" + }, + "timestamp": "2024-10-18T09:57:44.825389635" + }, + "sarscov2 [bam] - reverse strandedness + reference annotation": { + "content": [ + [ + [ + { + "id": "test", + "strandedness": "reverse" + }, + "test.gene.abundance.txt:md5,7385b870b955dae2c2ab78a70cf05cce" + ] + ], + [ + [ + { + "id": "test", + "strandedness": "reverse" + }, + [ + "e2t.ctab:md5,e981c0038295ae54b63cedb1083f1540", + "e_data.ctab:md5,879b6696029d19c4737b562e9d149218", + "i2t.ctab:md5,8a117c8aa4334b4c2d4711932b006fb4", + "i_data.ctab:md5,be3abe09740603213f83d50dcf81427f", + "t_data.ctab:md5,3b66c065da73ae0dd41cc332eff6a818" + ] + ] + ], + [ + [ + { + "id": "test", + "strandedness": "reverse" + }, + "test.transcripts.gtf:md5,fbabb4e3888bbede67f11f692e484880" + ] + ], + [ + "versions.yml:md5,06593ea00cc35bf06f2de2753e0c3913" + ] + ], + "meta": { + "nf-test": "0.9.0", + "nextflow": "24.04.4" + }, + "timestamp": "2024-10-18T09:57:11.793664242" + }, + "sarscov2 [bam] - reverse strandedness": { + "content": [ + [ + [ + { + "id": "test", + "strandedness": "reverse" + }, + "test.gene.abundance.txt:md5,d6f5c8cadb8458f1df0427cf790246e3" + ] + ], + [ + [ + { + "id": "test", + "strandedness": "reverse" + }, + "test.transcripts.gtf:md5,01d6da00a3c458420841e57427297183" + ] + ], + [ + "versions.yml:md5,06593ea00cc35bf06f2de2753e0c3913" + ] + ], + "meta": { + "nf-test": "0.9.0", + "nextflow": "24.04.4" + }, + "timestamp": "2024-10-18T09:57:01.166309777" + } +} \ No newline at end of file diff --git a/modules/nf-core/stringtie/stringtie/tests/nextflow.config b/modules/nf-core/stringtie/stringtie/tests/nextflow.config new file mode 100644 index 00000000..e3aaa099 --- /dev/null +++ b/modules/nf-core/stringtie/stringtie/tests/nextflow.config @@ -0,0 +1,5 @@ +process { + withName: 'STRINGTIE_STRINGTIE' { + ext.args = '' + } +} diff --git a/modules/nf-core/stringtie/stringtie/tests/tags.yml b/modules/nf-core/stringtie/stringtie/tests/tags.yml new file mode 100644 index 00000000..da9b051c --- /dev/null +++ b/modules/nf-core/stringtie/stringtie/tests/tags.yml @@ -0,0 +1,2 @@ +stringtie/stringtie: + - modules/nf-core/stringtie/stringtie/** diff --git a/modules/nf-core/ucsc/gtftogenepred/environment.yml b/modules/nf-core/ucsc/gtftogenepred/environment.yml new file mode 100644 index 00000000..5c4f6c2f --- /dev/null +++ b/modules/nf-core/ucsc/gtftogenepred/environment.yml @@ -0,0 +1,5 @@ +channels: + - conda-forge + - bioconda +dependencies: + - bioconda::ucsc-gtftogenepred=447 diff --git a/modules/nf-core/ucsc/gtftogenepred/main.nf b/modules/nf-core/ucsc/gtftogenepred/main.nf new file mode 100644 index 00000000..afbb5f3f --- /dev/null +++ b/modules/nf-core/ucsc/gtftogenepred/main.nf @@ -0,0 +1,54 @@ +process UCSC_GTFTOGENEPRED { + tag "${meta.id}" + label 'process_low' + + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/ucsc-gtftogenepred:447--h954228d_0': + 'biocontainers/ucsc-gtftogenepred:447--h954228d_0' }" + + input: + tuple val(meta), path(gtf) + + output: + tuple val(meta), path("*.genepred"), emit: genepred + tuple val(meta), path("*.refflat") , emit: refflat , optional: true + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def gen_refflat = args.contains("-genePredExt") && args.contains("-geneNameAsName2") ? "true" : "false" + def prefix = task.ext.prefix ?: "${meta.id}" + def VERSION = '447' // WARN: Version information not provided by tool on CLI. Please update this string when bumping container versions. + """ + gtfToGenePred \\ + $args \\ + $gtf \\ + ${prefix}.genepred + + if [ "${gen_refflat}" == "true" ] ; then + awk 'BEGIN { OFS="\\t"} {print \$12, \$1, \$2, \$3, \$4, \$5, \$6, \$7, \$8, \$9, \$10}' ${prefix}.genepred > ${prefix}.refflat + fi + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + ucsc: $VERSION + END_VERSIONS + """ + + stub: + def prefix = task.ext.prefix ?: "${meta.id}" + def VERSION = '447' + """ + touch ${prefix}.genepred + touch ${prefix}.refflat + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + ucsc: $VERSION + END_VERSIONS + """ +} diff --git a/modules/nf-core/ucsc/gtftogenepred/meta.yml b/modules/nf-core/ucsc/gtftogenepred/meta.yml new file mode 100644 index 00000000..cf04154d --- /dev/null +++ b/modules/nf-core/ucsc/gtftogenepred/meta.yml @@ -0,0 +1,56 @@ +name: ucsc_gtftogenepred +description: compute average score of bigwig over bed file +keywords: + - gtf + - genepred + - refflat + - ucsc + - gtftogenepred +tools: + - ucsc: + description: Convert GTF files to GenePred format + homepage: http://hgdownload.cse.ucsc.edu/admin/exe/ + licence: ["varies; see http://genome.ucsc.edu/license"] + identifier: "" +input: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - gtf: + type: file + description: GTF file + pattern: "*.{gtf}" +output: + - genepred: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*.genepred": + type: file + description: genepred file + pattern: "*.{genepred}" + - refflat: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*.refflat": + type: file + description: refflat file + pattern: "*.{refflat}" + - versions: + - versions.yml: + type: file + description: File containing software versions + pattern: "versions.yml" +authors: + - "@BarryDigby" + - "@anoronh4" +maintainers: + - "@BarryDigby" + - "@anoronh4" diff --git a/modules/nf-core/ucsc/gtftogenepred/tests/main.nf.test b/modules/nf-core/ucsc/gtftogenepred/tests/main.nf.test new file mode 100644 index 00000000..e0396a63 --- /dev/null +++ b/modules/nf-core/ucsc/gtftogenepred/tests/main.nf.test @@ -0,0 +1,36 @@ + +nextflow_process { + + name "Test Process UCSC_GTFTOGENEPRED" + script "../main.nf" + process "UCSC_GTFTOGENEPRED" + config "./nextflow.config" + + tag "modules" + tag "modules_nfcore" + tag "ucsc" + tag "ucsc/gtftogenepred" + + test("test-ucsc-gtftogenepred") { + + when { + process { + """ + input[0] = [ + [ id: 'test' ], // meta map + [ file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.gtf', checkIfExists: true) ] + ] + + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + } + +} diff --git a/modules/nf-core/ucsc/gtftogenepred/tests/main.nf.test.snap b/modules/nf-core/ucsc/gtftogenepred/tests/main.nf.test.snap new file mode 100644 index 00000000..f021f823 --- /dev/null +++ b/modules/nf-core/ucsc/gtftogenepred/tests/main.nf.test.snap @@ -0,0 +1,51 @@ +{ + "test-ucsc-gtftogenepred": { + "content": [ + { + "0": [ + [ + { + "id": "test" + }, + "test.genepred:md5,779e4749efaf38da3443ddfde30cc76c" + ] + ], + "1": [ + [ + { + "id": "test" + }, + "test.refflat:md5,4101802f41d4cf7ee2667587da11bf42" + ] + ], + "2": [ + "versions.yml:md5,fd95365619a316eb451190365b1b799e" + ], + "genepred": [ + [ + { + "id": "test" + }, + "test.genepred:md5,779e4749efaf38da3443ddfde30cc76c" + ] + ], + "refflat": [ + [ + { + "id": "test" + }, + "test.refflat:md5,4101802f41d4cf7ee2667587da11bf42" + ] + ], + "versions": [ + "versions.yml:md5,fd95365619a316eb451190365b1b799e" + ] + } + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.4" + }, + "timestamp": "2024-08-23T08:55:50.58172" + } +} \ No newline at end of file diff --git a/modules/nf-core/ucsc/gtftogenepred/tests/nextflow.config b/modules/nf-core/ucsc/gtftogenepred/tests/nextflow.config new file mode 100644 index 00000000..889bb6ce --- /dev/null +++ b/modules/nf-core/ucsc/gtftogenepred/tests/nextflow.config @@ -0,0 +1,8 @@ +process { + withName: UCSC_GTFTOGENEPRED { + ext.args = [ + "-genePredExt", + "-geneNameAsName2" + ].join(' ').trim() + } +} diff --git a/nextflow.config b/nextflow.config index a309f87e..f1e66fa4 100644 --- a/nextflow.config +++ b/nextflow.config @@ -9,14 +9,13 @@ // Global default params, used in configs params { - // TODO nf-core: Specify your pipeline's command line flags // Input options - input = null - - // References - genome = null - igenomes_base = 's3://ngi-igenomes/igenomes/' - igenomes_ignore = false + input = 'https://raw.githubusercontent.com/nf-core/test-datasets/rnafusion/testdata/human/samplesheet_valid.csv' + references_only = false + cosmic_username = null + cosmic_passwd = null + qiagen = false + outdir = null // MultiQC options multiqc_config = null @@ -25,8 +24,90 @@ params { max_multiqc_email_size = '25.MB' multiqc_methods_description = null + // Genome + genome = 'GRCh38' + genomes_base = "${params.outdir}/references" + genome_gencode_version = 46 + read_length = 100 + starfusion_build = true + genomes = [:] + fusion_annot_lib = "https://github.com/FusionAnnotator/CTAT_HumanFusionLib/releases/download/v0.3.0/fusion_lib.Mar2021.dat.gz" // path to dat.gz CTAT genome lib // TODO: Update to latest with s3 link when available + species = "human" + + // Filtering + tools_cutoff = 1 + + // Trimming + fastp_trim = false + trim_tail = null + adapter_fasta = [] + + // Compression + cram = [] + + // Alignment options + star_ignore_sjdbgtf = false + seq_center = null + seq_platform = null + fusioncatcher_limitSjdbInsertNsj = 2000000 + fusioninspector_limitSjdbInsertNsj = 1000000 + + // Enable or disable tools + all = false + arriba = false + ctatsplicing = false + fusioncatcher = false + starindex = false + starfusion = false + stringtie = false + fusionreport = false + fusioninspector_only = false + + // Skip steps + skip_qc = false + skip_vis = false + skip_vcf = false + skip_salmon_index = false + + // Download references option + download_refs = false + + // Path to references + fasta = "${params.genomes_base}/gencode/Homo_sapiens_${params.genome}_${params.genome_gencode_version}_dna_primary_assembly.fa" + fai = "${params.genomes_base}/gencode/Homo_sapiens_${params.genome}_${params.genome_gencode_version}_dna_primary_assembly.fa.fai" + gtf = "${params.genomes_base}/gencode/Homo_sapiens_${params.genome}_${params.genome_gencode_version}.gtf" + refflat = "${params.genomes_base}/gencode/Homo_sapiens_${params.genome}_${params.genome_gencode_version}.gtf.refflat" + rrna_intervals = "${params.genomes_base}/gencode/Homo_sapiens_${params.genome}_${params.genome_gencode_version}.interval_list" + gencode_ref = "${params.genomes_base}/gencode" + no_cosmic = false + arriba_ref_blacklist = "${params.genomes_base}/arriba/blacklist_hg38_GRCh38_v2.4.0.tsv.gz" + arriba_ref_cytobands = "${params.genomes_base}/arriba/cytobands_hg38_GRCh38_v2.4.0.tsv" + arriba_ref_known_fusions = "${params.genomes_base}/arriba/known_fusions_hg38_GRCh38_v2.4.0.tsv.gz" + arriba_ref_protein_domains = "${params.genomes_base}/arriba/protein_domains_hg38_GRCh38_v2.4.0.gff3" + fusioncatcher_ref = "${params.genomes_base}/fusioncatcher/human_v${params.genome_gencode_version}" + hgnc_ref = "${params.genomes_base}/hgnc/hgnc_complete_set.txt" + hgnc_date = "${params.genomes_base}/hgnc/HGNC-DB-timestamp.txt" + salmon_index = "${params.genomes_base}/salmon/salmon" + starfusion_ref = "${params.genomes_base}/starfusion/ctat_genome_lib_build_dir" + starindex_ref = "${params.genomes_base}/star" + fusionreport_ref = "${params.genomes_base}/fusion_report_db" + + + // Internal file presence checks + salmon_index_stub_check = "${params.genomes_base}/salmon/salmon/complete_ref_lens.bin" + starindex_ref_stub_check = "${params.genomes_base}/star/star/Genome" + fusionreport_ref_stub_check = "${params.genomes_base}/fusion_report_db/mitelman.db" + fusioncatcher_ref_stub_check = "${params.genomes_base}/fusioncatcher/human_v${params.genome_gencode_version}/ensembl_fully_overlapping_genes.txt" + starfusion_ref_stub_check = "${params.genomes_base}/starfusion/Pfam-A.hmm" + + // Path to fusion outputs + arriba_fusions = null + starfusion_fusions = null + fusioncatcher_fusions = null + fusioninspector_fusions = null + whitelist = null + // Boilerplate options - outdir = null publish_dir_mode = 'copy' email = null email_on_fail = null @@ -148,6 +229,19 @@ profiles { wave.freeze = true wave.strategy = 'conda,container' } + test { + includeConfig 'conf/test.config' + } + test_build { + includeConfig 'conf/test_build.config' + } + test_cosmic { + includeConfig 'conf/test_cosmic.config' + } + test_full { + includeConfig 'conf/test_full.config' + } + gitpod { executor.name = 'local' executor.cpus = 4 @@ -160,16 +254,13 @@ profiles { ] } } - test { includeConfig 'conf/test.config' } - test_full { includeConfig 'conf/test_full.config' } } // Load nf-core custom profiles from different Institutions includeConfig !System.getenv('NXF_OFFLINE') && params.custom_config_base ? "${params.custom_config_base}/nfcore_custom.config" : "/dev/null" // Load nf-core/rnafusion custom profiles from different institutions. -// TODO nf-core: Optionally, you can add a pipeline-specific nf-core config at https://github.com/nf-core/configs -// includeConfig !System.getenv('NXF_OFFLINE') && params.custom_config_base ? "${params.custom_config_base}/pipeline/rnafusion.config" : "/dev/null" +includeConfig !System.getenv('NXF_OFFLINE') && params.custom_config_base ? "${params.custom_config_base}/pipeline/rnafusion.config" : "/dev/null" // Set default registry for Apptainer, Docker, Podman, Charliecloud and Singularity independent of -profile // Will not be used unless Apptainer / Docker / Podman / Charliecloud / Singularity are enabled @@ -180,9 +271,6 @@ podman.registry = 'quay.io' singularity.registry = 'quay.io' charliecloud.registry = 'quay.io' -// Load igenomes.config if required -includeConfig !params.igenomes_ignore ? 'conf/igenomes.config' : 'conf/igenomes_ignored.config' - // Export these variables to prevent local Python/R libraries from conflicting with those in the container // The JULIA depot path has been adjusted to a fixed path `/usr/local/share/julia` that needs to be used for packages in the container. // See https://apeltzer.github.io/post/03-julia-lang-nextflow/ for details on that. Once we have a common agreement on where to keep Julia packages, this is adjustable. diff --git a/nextflow_schema.json b/nextflow_schema.json index 69a4e5db..0f59e214 100644 --- a/nextflow_schema.json +++ b/nextflow_schema.json @@ -5,17 +5,42 @@ "description": "Nextflow rnafusion analysis pipeline, part of the nf-core community.", "type": "object", "$defs": { + "skip_steps": { + "title": "Skip steps", + "type": "object", + "description": "Skip analysis steps", + "default": "", + "properties": { + "skip_qc": { + "type": "boolean", + "description": "Skip QC steps" + }, + "skip_vcf": { + "type": "boolean", + "description": "Skip vcf creation step" + }, + "skip_vis": { + "type": "boolean", + "description": "Skip visualisation steps" + }, + "skip_salmon_index": { + "type": "boolean", + "description": "Skip salmon index generation step", + "hidden": true + } + }, + "fa_icon": "fas fa-fast-forward" + }, "input_output_options": { "title": "Input/output options", "type": "object", "fa_icon": "fas fa-terminal", "description": "Define where the pipeline should find input data and save output data.", - "required": ["input", "outdir"], + "required": ["genomes_base", "outdir"], "properties": { "input": { "type": "string", "format": "file-path", - "exists": true, "schema": "assets/schema_input.json", "mimetype": "text/csv", "pattern": "^\\S+\\.csv$", @@ -40,6 +65,253 @@ "type": "string", "description": "MultiQC report title. Printed as page header, used for filename if not otherwise specified.", "fa_icon": "fas fa-file-signature" + }, + "cosmic_username": { + "type": "string", + "fa_icon": "far fa-file-code", + "description": "COSMIC username" + }, + "cosmic_passwd": { + "type": "string", + "fa_icon": "far fa-file-code", + "description": "COSMIC password" + }, + "genomes_base": { + "type": "string", + "fa_icon": "far fa-file-code", + "description": "Path to reference folder" + }, + "genome_gencode_version": { + "type": "integer", + "fa_icon": "far fa-file-code", + "description": "gencode version" + }, + "starfusion_build": { + "type": "boolean", + "fa_icon": "far fa-file-code", + "description": "If set, starfusion references are built from scratch instead of downloaded (default)" + }, + "read_length": { + "type": "integer", + "fa_icon": "far fa-file-code", + "description": "Read length", + "default": 100 + }, + "all": { + "type": "boolean", + "fa_icon": "far fa-file-code", + "description": "Build or run all references/analyses" + }, + "arriba": { + "type": "boolean", + "fa_icon": "far fa-file-code", + "description": "Build or run arriba references/analyses" + }, + "arriba_ref_blacklist": { + "type": "string", + "fa_icon": "far fa-file-code", + "description": "Path to arriba reference blacklist" + }, + "arriba_ref_cytobands": { + "type": "string", + "fa_icon": "far fa-file-code", + "description": "Path to arriba reference cytobands" + }, + "arriba_ref_known_fusions": { + "type": "string", + "fa_icon": "far fa-file-code", + "description": "Path to arriba reference known fusions" + }, + "arriba_ref_protein_domains": { + "type": "string", + "fa_icon": "far fa-file-code", + "description": "Path to arriba reference protein domain" + }, + "arriba_fusions": { + "type": "string", + "fa_icon": "far fa-file-code", + "description": "Path to arriba output" + }, + "download_refs": { + "type": "boolean", + "fa_icon": "far fa-file-code", + "description": "Download references instead of building them (for fusioncatcher and starfusion)" + }, + "ctatsplicing": { + "type": "boolean", + "fa_icon": "far fa-file-code", + "description": "Run CTAT-splicing to detect abberant cancer splicing introns. Needs --arriba and/or --starfusion to run." + }, + "gencode_ref": { + "type": "string", + "fa_icon": "far fa-file-code", + "description": "Path to gencode references" + }, + "fusioncatcher": { + "type": "boolean", + "fa_icon": "far fa-file-code", + "description": "Build or run fusioncatcher references/analyses" + }, + "fusioncatcher_fusions": { + "type": "string", + "fa_icon": "far fa-file-code", + "description": "Path to fusioncatcher output" + }, + "fusioncatcher_limitSjdbInsertNsj": { + "type": "integer", + "fa_icon": "far fa-file-code", + "description": "Use limitSjdbInsertNsj with int for fusioncatcher" + }, + "fusioncatcher_ref": { + "type": "string", + "fa_icon": "far fa-file-code", + "description": "Path to fusioncatcher references" + }, + "fusioncatcher_ref_stub_check": { + "type": "string", + "fa_icon": "far fa-file-code", + "description": "Path to file in fusioncatcher references" + }, + "fusioninspector_limitSjdbInsertNsj": { + "type": "integer", + "fa_icon": "far fa-file-code", + "description": "Use limitSjdbInsertNsj with int for fusioninspector STAR process" + }, + "fusioninspector_only": { + "type": "boolean", + "fa_icon": "far fa-file-code", + "description": "Skip fusion-report. --fusioninspector_fusions PATH needed to provide a fusion list as input" + }, + "fusioninspector_fusions": { + "type": "string", + "fa_icon": "far fa-file-code", + "description": "Path to a fusion list file built with format GENE1--GENE2" + }, + "fusionreport": { + "type": "boolean", + "fa_icon": "far fa-file-code", + "description": "Build fusionreport references" + }, + "fusionreport_ref": { + "type": "string", + "fa_icon": "far fa-file-code", + "description": "Path to fusionreport references" + }, + "fusionreport_ref_stub_check": { + "type": "string", + "fa_icon": "far fa-file-code", + "description": "Path to file in fusionreport references" + }, + "hgnc_ref": { + "type": "string", + "fa_icon": "far fa-file-code", + "description": "Path to HGNC database file" + }, + "hgnc_date": { + "type": "string", + "fa_icon": "far fa-file-code", + "description": "Path to HGNC timestamp file for database retrieval" + }, + "qiagen": { + "type": "boolean", + "fa_icon": "far fa-file-code", + "description": "Use QIAGEN instead of SANGER to download COSMIC database" + }, + "salmon_index": { + "type": "string", + "fa_icon": "far fa-file-code", + "description": "Path to salmon index" + }, + "salmon_index_stub_check": { + "type": "string", + "fa_icon": "far fa-file-code", + "description": "Path to file in salmon index" + }, + "starfusion": { + "type": "boolean", + "fa_icon": "far fa-file-code", + "description": "Build or run starfusion references/analyses" + }, + "starfusion_fusions": { + "type": "string", + "fa_icon": "far fa-file-code", + "description": "Path to starfusion output" + }, + "starfusion_ref": { + "type": "string", + "fa_icon": "far fa-file-code", + "description": "Path to starfusion references" + }, + "starfusion_ref_stub_check": { + "type": "string", + "fa_icon": "far fa-file-code", + "description": "Path to file in starfusion references" + }, + "starindex": { + "type": "boolean", + "fa_icon": "far fa-file-code", + "description": "Build or run starindex references/analyses" + }, + "starindex_ref": { + "type": "string", + "fa_icon": "far fa-file-code", + "description": "Path to starindex references" + }, + "starindex_ref_stub_check": { + "type": "string", + "fa_icon": "far fa-file-code", + "description": "Path to file in starindex references" + }, + "stringtie": { + "type": "boolean", + "fa_icon": "far fa-file-code", + "description": "Run stringtie analysis" + }, + "tools_cutoff": { + "type": "integer", + "fa_icon": "far fa-file-code", + "description": "Discard fusions identified by less than INT tools" + }, + "whitelist": { + "type": "string", + "fa_icon": "far fa-file-code", + "description": "Path to fusions to add to the input of fusioninspector" + } + } + }, + "read_trimming_options": { + "title": "Read trimming options", + "type": "object", + "fa_icon": "fas fa-cut", + "description": "Options to adjust read trimming criteria.", + "properties": { + "fastp_trim": { + "type": "boolean", + "description": "Preform fastp trimming of reads, default: false", + "fa_icon": "fas fa-cut" + }, + "trim_tail": { + "type": "integer", + "description": "Preform tail trimming of reads, default: null", + "fa_icon": "fas fa-cut" + }, + "adapter_fasta": { + "type": "string", + "description": "Path to adapter fasta file: default: []", + "fa_icon": "fas fa-cut" + } + } + }, + "compression_options": { + "title": "Alignment compression options", + "type": "object", + "fa_icon": "fas fa-cut", + "description": "Option to compress BAM files to CRAM.", + "properties": { + "cram": { + "type": "string", + "description": "List of tools for which to compress BAM file to CRAM,default: [], options: arriba, starfusion. Leave no space between options", + "fa_icon": "fas fa-cut" } } }, @@ -49,36 +321,70 @@ "fa_icon": "fas fa-dna", "description": "Reference genome related files and options required for the workflow.", "properties": { - "genome": { - "type": "string", - "description": "Name of iGenomes reference.", - "fa_icon": "fas fa-book", - "help_text": "If using a reference genome configured in the pipeline using iGenomes, use this parameter to give the ID for the reference. This is then used to build the full paths for all required reference genome files e.g. `--genome GRCh38`. \n\nSee the [nf-core website docs](https://nf-co.re/usage/reference_genomes) for more details." + "references_only": { + "type": "boolean", + "description": "Skip running the analysis, only builds the references", + "fa_icon": "fas fa-book" }, "fasta": { "type": "string", "format": "file-path", - "exists": true, "mimetype": "text/plain", "pattern": "^\\S+\\.fn?a(sta)?(\\.gz)?$", "description": "Path to FASTA genome file.", - "help_text": "This parameter is *mandatory* if `--genome` is not specified. If you don't have a BWA index available this will be generated for you automatically. Combine with `--save_reference` to save BWA index for future runs.", "fa_icon": "far fa-file-code" }, - "igenomes_ignore": { + "fai": { + "type": "string", + "format": "file-path", + "mimetype": "text/plain", + "pattern": "^\\S+\\.fn?ai(\\.gz)?$", + "description": "Path to FASTA genome index file.", + "fa_icon": "far fa-file-code" + }, + "genome": { + "type": "string", + "description": "Name of iGenomes reference.", + "fa_icon": "fas fa-book" + }, + "gtf": { + "type": "string", + "format": "file-path", + "mimetype": "text/plain", + "pattern": "^\\S+\\.gtf?(\\.gz)?$", + "description": "Path to GTF genome file.", + "fa_icon": "far fa-file-code" + }, + "refflat": { + "type": "string", + "format": "file-path", + "mimetype": "text/plain", + "pattern": "^\\S+\\.refflat?$", + "description": "Path to GTF genome file.", + "fa_icon": "far fa-file-code" + }, + "rrna_intervals": { + "type": "string", + "format": "file-path", + "mimetype": "text/plain", + "pattern": "^\\S+\\.interval_list?$", + "description": "Path to ribosomal interval list.", + "fa_icon": "far fa-file-code" + }, + "no_cosmic": { "type": "boolean", - "description": "Do not load the iGenomes reference config.", - "fa_icon": "fas fa-ban", - "hidden": true, - "help_text": "Do not load `igenomes.config` when running the pipeline. You may choose this option if you observe clashes between custom parameters and those supplied in `igenomes.config`." + "fa_icon": "far fa-file-code", + "description": "Avoid using Cosmic DB (for example in clinical case applications where a paid license applies." }, - "igenomes_base": { + "fusion_annot_lib": { "type": "string", - "format": "directory-path", - "description": "The base path to the igenomes reference files", - "fa_icon": "fas fa-ban", - "hidden": true, - "default": "s3://ngi-igenomes/igenomes/" + "description": "Path to Fusion Annotation Library to be used in STARFUSION_BUILD.", + "fa_icon": "far fa-file-code" + }, + "species": { + "type": "string", + "description": "Which species dfam should automatically download, default: human.", + "fa_icon": "far fa-file-code" } } }, @@ -224,6 +530,27 @@ "fa_icon": "far calendar", "description": "Suffix to add to the trace report filename. Default is the date and time in the format yyyy-MM-dd_HH-mm-ss.", "hidden": true + }, + "seq_center": { + "type": "string", + "description": "Sequencing center", + "hidden": true, + "fa_icon": "fas fa-toolbox", + "help_text": "This will reported in the BAM header as CN" + }, + "seq_platform": { + "type": "string", + "description": "Sequencing platform", + "hidden": true, + "fa_icon": "fas fa-toolbox", + "help_text": "This will reported in the BAM header as PL." + }, + "star_ignore_sjdbgtf": { + "type": "boolean", + "description": "Whether to ignore the GTF in STAR alignment", + "hidden": true, + "fa_icon": "fas fa-toolbox", + "help_text": "Setting false will use GTF file for STAR alignment" } } } @@ -232,6 +559,15 @@ { "$ref": "#/$defs/input_output_options" }, + { + "$ref": "#/$defs/skip_steps" + }, + { + "$ref": "#/$defs/read_trimming_options" + }, + { + "$ref": "#/$defs/compression_options" + }, { "$ref": "#/$defs/reference_genome_options" }, diff --git a/nf-test.config b/nf-test.config new file mode 100644 index 00000000..3ee9cc13 --- /dev/null +++ b/nf-test.config @@ -0,0 +1,14 @@ +config { + // location for all nf-tests + testsDir "tests" + + // nf-test directory including temporary files for each test + workDir ".nf-test" + + // location of an optional nextflow.config file specific for executing tests + configFile "tests/nextflow.config" + + plugins { + load "nft-utils@0.0.3" + } +} diff --git a/subworkflows/local/arriba_workflow/main.nf b/subworkflows/local/arriba_workflow/main.nf new file mode 100644 index 00000000..84ba3604 --- /dev/null +++ b/subworkflows/local/arriba_workflow/main.nf @@ -0,0 +1,111 @@ +include { ARRIBA_ARRIBA } from '../../../modules/nf-core/arriba/arriba/main' +include { SAMTOOLS_INDEX as SAMTOOLS_INDEX_FOR_ARRIBA } from '../../../modules/nf-core/samtools/index/main' +include { SAMTOOLS_SORT as SAMTOOLS_SORT_FOR_ARRIBA } from '../../../modules/nf-core/samtools/sort/main' +include { SAMTOOLS_VIEW as SAMTOOLS_VIEW_FOR_ARRIBA } from '../../../modules/nf-core/samtools/view/main' +include { STAR_ALIGN as STAR_FOR_ARRIBA } from '../../../modules/nf-core/star/align/main' + +include { CTATSPLICING_WORKFLOW } from '../ctatsplicing_workflow' + +workflow ARRIBA_WORKFLOW { + take: + reads // channel [ meta, [ fastqs ] ] + ch_gtf // channel [ meta, path_gtf ] + ch_fasta // channel [ meta, path_fasta ] + ch_starindex_ref // channel [ meta, path_index ] + ch_arriba_ref_blacklist // channel [ meta, path_blacklist ] + ch_arriba_ref_cytobands // channel [ meta, path_cytobands ] + ch_arriba_ref_known_fusions // channel [ meta, path_known_fusions ] + ch_arriba_ref_protein_domains // channel [ meta, path_proteins ] + ch_starfusion_ref // channel [ meta, path_starfusion_ref ] + arriba // boolean + all // boolean + fusioninspector_only // boolean + star_ignore_sjdbgtf // boolean + ctatsplicing // boolean + seq_center // string + arriba_fusions // path + cram // array + + main: + + def ch_versions = Channel.empty() + def ch_cram_index = Channel.empty() + def ch_dummy_file = file("$projectDir/assets/dummy_file_arriba.txt", checkIfExists: true) + + if (( arriba || all ) && !fusioninspector_only) { + + STAR_FOR_ARRIBA( + reads, + ch_starindex_ref, + ch_gtf, + star_ignore_sjdbgtf, + '', + seq_center + ) + ch_versions = ch_versions.mix(STAR_FOR_ARRIBA.out.versions) + + if ( ctatsplicing || all ) { + CTATSPLICING_WORKFLOW( + STAR_FOR_ARRIBA.out.spl_junc_tab, + STAR_FOR_ARRIBA.out.junction, + STAR_FOR_ARRIBA.out.bam, + ch_starfusion_ref + ) + ch_versions = ch_versions.mix(CTATSPLICING_WORKFLOW.out.versions) + } + + if ( arriba_fusions ) { + + ch_arriba_fusions = reads.combine( Channel.value( file( arriba_fusions, checkIfExists: true ) ) ) + .map { it -> [ it[0], it[2] ] } + ch_arriba_fusion_fail = ch_dummy_file + + } else { + + ARRIBA_ARRIBA ( + STAR_FOR_ARRIBA.out.bam, + ch_fasta, + ch_gtf, + ch_arriba_ref_blacklist, + ch_arriba_ref_known_fusions, + ch_arriba_ref_cytobands, + ch_arriba_ref_protein_domains + ) + + ch_versions = ch_versions.mix(ARRIBA_ARRIBA.out.versions) + + ch_arriba_fusions = ARRIBA_ARRIBA.out.fusions + ch_arriba_fusion_fail = ARRIBA_ARRIBA.out.fusions_fail.map{ it -> return it[1] } + } + + if ( cram.contains('arriba') ) { + + SAMTOOLS_SORT_FOR_ARRIBA(STAR_FOR_ARRIBA.out.bam, ch_fasta) + ch_versions = ch_versions.mix(SAMTOOLS_SORT_FOR_ARRIBA.out.versions ) + + SAMTOOLS_VIEW_FOR_ARRIBA(SAMTOOLS_SORT_FOR_ARRIBA.out.bam.map { meta, bam -> [ meta, bam, [] ] }, ch_fasta, []) + ch_versions = ch_versions.mix(SAMTOOLS_VIEW_FOR_ARRIBA.out.versions ) + + SAMTOOLS_INDEX_FOR_ARRIBA(SAMTOOLS_VIEW_FOR_ARRIBA.out.cram) + ch_versions = ch_versions.mix(SAMTOOLS_INDEX_FOR_ARRIBA.out.versions ) + + // Join cram and index files + ch_cram_index = SAMTOOLS_VIEW_FOR_ARRIBA.out.cram.join(SAMTOOLS_INDEX_FOR_ARRIBA.out.crai) + } + + } else { + + ch_arriba_fusions = reads + .combine(Channel.value( file(ch_dummy_file, checkIfExists: true ) ) ) + .map { it -> [ it[0], it[2] ] } + + ch_arriba_fusion_fail = ch_dummy_file + } + + emit: + fusions = ch_arriba_fusions // channel [ meta, path_fusions ] + fusions_fail = ch_arriba_fusion_fail // channel [ path, fusions_failed ] + cram_index = ch_cram_index // channel [ meta, cram, crai ] + versions = ch_versions // channel [ versions ] + } + diff --git a/subworkflows/local/arriba_workflow/tests/main.nf.test b/subworkflows/local/arriba_workflow/tests/main.nf.test new file mode 100644 index 00000000..e49a3768 --- /dev/null +++ b/subworkflows/local/arriba_workflow/tests/main.nf.test @@ -0,0 +1,390 @@ +nextflow_workflow { + + name "Test Subworkflow ARRIBA_WORKFLOW" + script "../main.nf" + workflow "ARRIBA_WORKFLOW" + tag "subworkflow" + tag "arriba" + tag "arriba/arriba" + tag "samtools" + tag "samtools/index" + tag "samtools/sort" + tag "samtools/view" + tag "star" + tag "star/genomegenerate" + tag "star/align" + + + // Test #1 Indexing + test("ARRIBA_WORKFLOW - Homo sapiens - FASTQs chr4") { + + setup { + // Create genome index for STAR + run("STAR_GENOMEGENERATE") { + script "../../../../modules/nf-core/star/genomegenerate/main.nf" + process { + """ + // FASTA + input[0] = Channel.fromPath( + "https://raw.githubusercontent.com/nf-core/test-datasets/refs/heads/rnafusion/testdata/reference/ensembl/Homo_sapiens.GRCh38.102.chr4.1700000-54900000.fa", checkIfExists: true + ) + .map{ [[id: it.getName() ], it ]} + + // GTF + input[1] = Channel.fromPath( + "https://raw.githubusercontent.com/nf-core/test-datasets/refs/heads/rnafusion/testdata/reference/ensembl/Homo_sapiens.GRCh38.102.chr4.gtf", checkIfExists: true + ) + .map{ [[id: it.getName() ], it ]} + """ + } + } + } + + when { + workflow { + """ + // ch_reads + input[0] = Channel.of( + [ + [ id: "test_fastqs" ], + [ + file("https://raw.githubusercontent.com/nf-core/test-datasets/refs/heads/rnafusion/testdata/human/reads_1.fq.gz", checkIfExists: true), + file("https://raw.githubusercontent.com/nf-core/test-datasets/refs/heads/rnafusion/testdata/human/reads_2.fq.gz", checkIfExists: true) + ] + ] ) + + // ch_gtf + input[1] = + Channel.fromPath( + "https://raw.githubusercontent.com/nf-core/test-datasets/refs/heads/rnafusion/testdata/reference/ensembl/Homo_sapiens.GRCh38.102.chr4.gtf", checkIfExists: true + ) + .map{ [ [ id: it.name ], it ] } + + // ch_fasta + input[2] = + Channel.fromPath( + "https://raw.githubusercontent.com/nf-core/test-datasets/refs/heads/rnafusion/testdata/reference/ensembl/Homo_sapiens.GRCh38.102.chr4.1700000-54900000.fa", checkIfExists: true + ) + .map{ [ [ id: it.name ], it ] } + + // ch_starindex_ref + input[3] = STAR_GENOMEGENERATE.out.index + + // ch_arriba_ref_blacklist + input[4] = + Channel.fromPath( + "https://raw.githubusercontent.com/nf-core/test-datasets/refs/heads/rnafusion/testdata/reference/arriba/blacklist_hg38_GRCh38_v2.4.0.tsv.gz", checkIfExists: true + ) + .map{ [ [ id: it.name ], it ] } + + // ch_arriba_ref_known_fusions + input[5] = + Channel.fromPath( + "https://raw.githubusercontent.com/nf-core/test-datasets/refs/heads/rnafusion/testdata/reference/arriba/known_fusions_hg38_GRCh38_v2.4.0.tsv.gz", checkIfExists: true + ) + .map{ [ [ id: it.name ], it ] } + + // ch_arriba_ref_cytobands + input[6] = + Channel.fromPath( + "https://raw.githubusercontent.com/nf-core/test-datasets/refs/heads/rnafusion/testdata/reference/arriba/cytobands_hg38_GRCh38_v2.4.0.tsv", checkIfExists: true + ) + .map{ [ [ id: it.name ], it ] } + + // ch_arriba_ref_protein_domains + input[7] = + Channel.fromPath( + "https://raw.githubusercontent.com/nf-core/test-datasets/refs/heads/rnafusion/testdata/reference/arriba/protein_domains_hg38_GRCh38_v2.4.0.gff3", checkIfExists: true + ) + .map{ [ [ id: it.name ], it ] } + + // arriba (boolean) + input[8] = true + + // all (boolean) + input[9] = true + + // fusioninspector_only (boolean) + input[10] = false + + // star_ignore_sjdbgtf (boolean) + input[11] = false + + // seq_center (string) + input[12] = 'test_center' + + // arriba_fusions (path) + input[13] = null + + // cram (array) + input[14] = [ 'arriba' ] + """ + } + } + + then { + assertAll( + { assert workflow.success }, + { with(workflow.out) { + assert snapshot( + file(fusions[0][1]), + file(fusions_fail[0]), + file(cram_index[0][1]).name, + file(cram_index[0][2]).name, + versions.collect{ file(it) } + ).match() + } + } + ) + } + } + + + // Test #2 With arriba_fusions file + test("ARRIBA_WORKFLOW - Homo sapiens - FASTQs chr4 - External fusion file") { + + setup { + // Create genome index for STAR + run("STAR_GENOMEGENERATE") { + script "../../../../modules/nf-core/star/genomegenerate/main.nf" + process { + """ + // FASTA + input[0] = Channel.fromPath( + "https://raw.githubusercontent.com/nf-core/test-datasets/refs/heads/rnafusion/testdata/reference/ensembl/Homo_sapiens.GRCh38.102.chr4.1700000-54900000.fa", checkIfExists: true + ) + .map{ [[id: it.getName() ], it ]} + + // GTF + input[1] = Channel.fromPath( + "https://raw.githubusercontent.com/nf-core/test-datasets/refs/heads/rnafusion/testdata/reference/ensembl/Homo_sapiens.GRCh38.102.chr4.gtf", checkIfExists: true + ) + .map{ [[id: it.getName() ], it ]} + """ + } + } + } + + when { + workflow { + """ + // ch_reads + input[0] = Channel.of( + [ + [ id: "test_fastqs" ], + [ + file("https://raw.githubusercontent.com/nf-core/test-datasets/refs/heads/rnafusion/testdata/human/reads_1.fq.gz", checkIfExists: true), + file("https://raw.githubusercontent.com/nf-core/test-datasets/refs/heads/rnafusion/testdata/human/reads_2.fq.gz", checkIfExists: true) + ] + ] ) + + // ch_gtf + input[1] = + Channel.fromPath( + "https://raw.githubusercontent.com/nf-core/test-datasets/refs/heads/rnafusion/testdata/reference/ensembl/Homo_sapiens.GRCh38.102.chr4.gtf", checkIfExists: true + ) + .map{ [ [ id: it.name ], it ] } + + // ch_fasta + input[2] = + Channel.fromPath( + "https://raw.githubusercontent.com/nf-core/test-datasets/refs/heads/rnafusion/testdata/reference/ensembl/Homo_sapiens.GRCh38.102.chr4.1700000-54900000.fa", checkIfExists: true + ) + .map{ [ [ id: it.name ], it ] } + + // ch_starindex_ref + input[3] = STAR_GENOMEGENERATE.out.index + + // ch_arriba_ref_blacklist + input[4] = + Channel.fromPath( + "https://raw.githubusercontent.com/nf-core/test-datasets/refs/heads/rnafusion/testdata/reference/arriba/blacklist_hg38_GRCh38_v2.4.0.tsv.gz", checkIfExists: true + ) + .map{ [ [ id: it.name ], it ] } + + // ch_arriba_ref_known_fusions + input[5] = + Channel.fromPath( + "https://raw.githubusercontent.com/nf-core/test-datasets/refs/heads/rnafusion/testdata/reference/arriba/known_fusions_hg38_GRCh38_v2.4.0.tsv.gz", checkIfExists: true + ) + .map{ [ [ id: it.name ], it ] } + + // ch_arriba_ref_cytobands + input[6] = + Channel.fromPath( + "https://raw.githubusercontent.com/nf-core/test-datasets/refs/heads/rnafusion/testdata/reference/arriba/cytobands_hg38_GRCh38_v2.4.0.tsv", checkIfExists: true + ) + .map{ [ [ id: it.name ], it ] } + + // ch_arriba_ref_protein_domains + input[7] = + Channel.fromPath( + "https://raw.githubusercontent.com/nf-core/test-datasets/refs/heads/rnafusion/testdata/reference/arriba/protein_domains_hg38_GRCh38_v2.4.0.gff3", checkIfExists: true + ) + .map{ [ [ id: it.name ], it ] } + + // arriba (boolean) + input[8] = true + + // all (boolean) + input[9] = true + + // fusioninspector_only (boolean) + input[10] = false + + // star_ignore_sjdbgtf (boolean) + input[11] = false + + // seq_center (string) + input[12] = 'test_center' + + // arriba_fusions (string path) + input[13] = "https://raw.githubusercontent.com/nf-core/test-datasets/refs/heads/rnafusion/testdata/reference/arriba/test_fastqs.arriba.fusions.tsv" + + // cram (array) + input[14] = [ 'arriba' ] + """ + } + } + + then { + assertAll( + { assert workflow.success }, + { with(workflow.out) { + assert snapshot( + fusions[0].size() == 2, + fusions_fail.size() == 1, + file(cram_index[0][1]).name, + file(cram_index[0][2]).name, + versions.collect{ file(it) } + ).match() + } + } + ) + } + } + + // TEST #3 WITHOUT INDEXING + test("ARRIBA_WORKFLOW - Homo sapiens - FASTQs chr4 - cram = []") { + + setup { + // Create genome index for STAR + run("STAR_GENOMEGENERATE") { + script "../../../../modules/nf-core/star/genomegenerate/main.nf" + process { + """ + // FASTA + input[0] = Channel.fromPath( + "https://raw.githubusercontent.com/nf-core/test-datasets/refs/heads/rnafusion/testdata/reference/ensembl/Homo_sapiens.GRCh38.102.chr4.1700000-54900000.fa", checkIfExists: true + ) + .map{ [[id: it.getName() ], it ]} + + // GTF + input[1] = Channel.fromPath( + "https://raw.githubusercontent.com/nf-core/test-datasets/refs/heads/rnafusion/testdata/reference/ensembl/Homo_sapiens.GRCh38.102.chr4.gtf", checkIfExists: true + ) + .map{ [[id: it.getName() ], it ]} + """ + } + } + } + + when { + workflow { + """ + // ch_reads + input[0] = Channel.of( + [ + [ id: "test_fastqs" ], + [ + file("https://raw.githubusercontent.com/nf-core/test-datasets/refs/heads/rnafusion/testdata/human/reads_1.fq.gz", checkIfExists: true), + file("https://raw.githubusercontent.com/nf-core/test-datasets/refs/heads/rnafusion/testdata/human/reads_2.fq.gz", checkIfExists: true) + ] + ] ) + + // ch_gtf + input[1] = + Channel.fromPath( + "https://raw.githubusercontent.com/nf-core/test-datasets/refs/heads/rnafusion/testdata/reference/ensembl/Homo_sapiens.GRCh38.102.chr4.gtf", checkIfExists: true + ) + .map{ [ [ id: it.name ], it ] } + + // ch_fasta + input[2] = + Channel.fromPath( + "https://raw.githubusercontent.com/nf-core/test-datasets/refs/heads/rnafusion/testdata/reference/ensembl/Homo_sapiens.GRCh38.102.chr4.1700000-54900000.fa", checkIfExists: true + ) + .map{ [ [ id: it.name ], it ] } + + // ch_starindex_ref + input[3] = STAR_GENOMEGENERATE.out.index + + // ch_arriba_ref_blacklist + input[4] = + Channel.fromPath( + "https://raw.githubusercontent.com/nf-core/test-datasets/refs/heads/rnafusion/testdata/reference/arriba/blacklist_hg38_GRCh38_v2.4.0.tsv.gz", checkIfExists: true + ) + .map{ [ [ id: it.name ], it ] } + + // ch_arriba_ref_known_fusions + input[5] = + Channel.fromPath( + "https://raw.githubusercontent.com/nf-core/test-datasets/refs/heads/rnafusion/testdata/reference/arriba/known_fusions_hg38_GRCh38_v2.4.0.tsv.gz", checkIfExists: true + ) + .map{ [ [ id: it.name ], it ] } + + // ch_arriba_ref_cytobands + input[6] = + Channel.fromPath( + "https://raw.githubusercontent.com/nf-core/test-datasets/refs/heads/rnafusion/testdata/reference/arriba/cytobands_hg38_GRCh38_v2.4.0.tsv", checkIfExists: true + ) + .map{ [ [ id: it.name ], it ] } + + // ch_arriba_ref_protein_domains + input[7] = + Channel.fromPath( + "https://raw.githubusercontent.com/nf-core/test-datasets/refs/heads/rnafusion/testdata/reference/arriba/protein_domains_hg38_GRCh38_v2.4.0.gff3", checkIfExists: true + ) + .map{ [ [ id: it.name ], it ] } + + // arriba (boolean) + input[8] = true + + // all (boolean) + input[9] = true + + // fusioninspector_only (boolean) + input[10] = false + + // star_ignore_sjdbgtf (boolean) + input[11] = false + + // seq_center (string) + input[12] = 'test_center' + + // arriba_fusions (path) + input[13] = null + + // cram (array) + input[14] = [ ] + """ + } + } + + then { + assertAll( + { assert workflow.success }, + { with(workflow.out) { + assert snapshot( + file(fusions[0][1]), + file(fusions_fail[0]), + cram_index.size() == 0, + versions.collect{ file(it) } + ).match() + } + } + ) + } + } + +} diff --git a/subworkflows/local/arriba_workflow/tests/main.nf.test.snap b/subworkflows/local/arriba_workflow/tests/main.nf.test.snap new file mode 100644 index 00000000..2057827a --- /dev/null +++ b/subworkflows/local/arriba_workflow/tests/main.nf.test.snap @@ -0,0 +1,57 @@ +{ + "ARRIBA_WORKFLOW - Homo sapiens - FASTQs chr4 - External fusion file": { + "content": [ + true, + true, + "test_fastqs_star_for_arriba_sorted.cram", + "test_fastqs_star_for_arriba_sorted.cram.crai", + [ + "versions.yml:md5,439bbb92ff0a83f1e278fc396e9d8ce9", + "versions.yml:md5,85458747b55f37c1a5afd39ee7a3a4aa", + "versions.yml:md5,bfc5d96804f2991c7f7c705f1ddf81ec", + "versions.yml:md5,f4f64d3f1fd867d5afa51e03f7cf2824" + ] + ], + "meta": { + "nf-test": "0.9.0", + "nextflow": "24.10.2" + }, + "timestamp": "2024-12-11T15:53:59.18258718" + }, + "ARRIBA_WORKFLOW - Homo sapiens - FASTQs chr4": { + "content": [ + "test_fastqs.arriba.fusions.tsv:md5,8f39789c4428e81eb9a8d0e54c34c43d", + "test_fastqs.arriba.fusions.discarded.tsv:md5,b804c1ed5b01d34163f5c0b2f6810f98", + "test_fastqs_star_for_arriba_sorted.cram", + "test_fastqs_star_for_arriba_sorted.cram.crai", + [ + "versions.yml:md5,24030f38976402fad0861e6ec99ee6b6", + "versions.yml:md5,439bbb92ff0a83f1e278fc396e9d8ce9", + "versions.yml:md5,85458747b55f37c1a5afd39ee7a3a4aa", + "versions.yml:md5,bfc5d96804f2991c7f7c705f1ddf81ec", + "versions.yml:md5,f4f64d3f1fd867d5afa51e03f7cf2824" + ] + ], + "meta": { + "nf-test": "0.9.0", + "nextflow": "24.10.2" + }, + "timestamp": "2024-12-11T15:43:48.053656601" + }, + "ARRIBA_WORKFLOW - Homo sapiens - FASTQs chr4 - cram = []": { + "content": [ + "test_fastqs.arriba.fusions.tsv:md5,8f39789c4428e81eb9a8d0e54c34c43d", + "test_fastqs.arriba.fusions.discarded.tsv:md5,b804c1ed5b01d34163f5c0b2f6810f98", + true, + [ + "versions.yml:md5,24030f38976402fad0861e6ec99ee6b6", + "versions.yml:md5,439bbb92ff0a83f1e278fc396e9d8ce9" + ] + ], + "meta": { + "nf-test": "0.9.0", + "nextflow": "24.10.2" + }, + "timestamp": "2024-12-11T16:07:37.079418154" + } +} \ No newline at end of file diff --git a/subworkflows/local/build_references.nf b/subworkflows/local/build_references.nf new file mode 100644 index 00000000..56fd8eb2 --- /dev/null +++ b/subworkflows/local/build_references.nf @@ -0,0 +1,187 @@ +/* +======================================================================================== + IMPORT LOCAL MODULES/SUBWORKFLOWS +======================================================================================== +*/ + +include { GENCODE_DOWNLOAD } from '../../modules/local/gencode_download/main' +include { FUSIONCATCHER_BUILD } from '../../modules/local/fusioncatcher/build/main' +include { FUSIONREPORT_DOWNLOAD } from '../../modules/local/fusionreport/download/main' +include { HGNC_DOWNLOAD } from '../../modules/local/hgnc/main' +include { STARFUSION_BUILD } from '../../modules/local/starfusion/build/main' +include { GTF_TO_REFFLAT } from '../../modules/local/uscs/custom_gtftogenepred/main' +include { GET_RRNA_TRANSCRIPTS } from '../../modules/local/get_rrna_transcript/main' + +/* +======================================================================================== + IMPORT NF-CORE MODULES/SUBWORKFLOWS +======================================================================================== +*/ +include { ARRIBA_DOWNLOAD } from '../../modules/nf-core/arriba/download/main' +include { SAMTOOLS_FAIDX } from '../../modules/nf-core/samtools/faidx/main' +include { STAR_GENOMEGENERATE } from '../../modules/nf-core/star/genomegenerate/main' +include { GATK4_CREATESEQUENCEDICTIONARY } from '../../modules/nf-core/gatk4/createsequencedictionary/main' +include { GATK4_BEDTOINTERVALLIST } from '../../modules/nf-core/gatk4/bedtointervallist/main' +include { SALMON_INDEX } from '../../modules/nf-core/salmon/index/main' +include { GFFREAD } from '../../modules/nf-core/gffread/main' + +/* +======================================================================================== + RUN MAIN WORKFLOW +======================================================================================== +*/ + +workflow BUILD_REFERENCES { + + main: + ch_versions = Channel.empty() + + if (!file(params.fasta).exists() || file(params.fasta).isEmpty() || + !file(params.gtf).exists() || file(params.gtf).isEmpty()){ + GENCODE_DOWNLOAD(params.genome_gencode_version, params.genome) + ch_versions = ch_versions.mix(GENCODE_DOWNLOAD.out.versions) + ch_fasta = GENCODE_DOWNLOAD.out.fasta.map { that -> [[id:that.Name], that] } + ch_gtf = GENCODE_DOWNLOAD.out.gtf.map { that -> [[id:that.Name], that] } + } else { + ch_fasta = Channel.fromPath(params.fasta).map { that -> [[id:that.Name], that] } + ch_gtf = Channel.fromPath(params.gtf).map { that -> [[id:that.Name], that] } + } + + if (!file(params.fai).exists() || file(params.fai).isEmpty()){ + SAMTOOLS_FAIDX(ch_fasta, [[],[]]) + ch_versions = ch_versions.mix(SAMTOOLS_FAIDX.out.versions) + ch_fai = SAMTOOLS_FAIDX.out.fai + } else { + ch_fai = Channel.fromPath(params.fai).map { that -> [[id:that.Name], that] } + } + + if ((!file(params.hgnc_ref).exists() || file(params.hgnc_ref).isEmpty() || + !file(params.hgnc_date).exists() || file(params.hgnc_date).isEmpty()) && !params.skip_vcf){ + HGNC_DOWNLOAD( ) + ch_versions = ch_versions.mix(HGNC_DOWNLOAD.out.versions) + ch_hgnc_ref = HGNC_DOWNLOAD.out.hgnc_ref + ch_hgnc_date = HGNC_DOWNLOAD.out.hgnc_date + } else { + ch_hgnc_ref = Channel.fromPath(params.hgnc_ref).map { that -> [[id:that.Name], that] } + ch_hgnc_date = Channel.fromPath(params.hgnc_date).map { that -> [[id:that.Name], that] } + } + + if (!file(params.rrna_intervals).exists() || file(params.rrna_intervals).isEmpty()){ + GATK4_CREATESEQUENCEDICTIONARY(ch_fasta) + ch_versions = ch_versions.mix(GATK4_CREATESEQUENCEDICTIONARY.out.versions) + GET_RRNA_TRANSCRIPTS(ch_gtf) + ch_versions = ch_versions.mix(GET_RRNA_TRANSCRIPTS.out.versions) + GATK4_BEDTOINTERVALLIST(GET_RRNA_TRANSCRIPTS.out.bed, GATK4_CREATESEQUENCEDICTIONARY.out.dict ) + ch_versions = ch_versions.mix(GATK4_BEDTOINTERVALLIST.out.versions) + ch_rrna_interval = GATK4_BEDTOINTERVALLIST.out.interval_list + } else { + ch_rrna_interval = Channel.fromPath(params.rrna_intervals).map { that -> [[id:that.Name], that] } + } + + if (!file(params.refflat).exists() || file(params.refflat).isEmpty()){ + GTF_TO_REFFLAT(ch_gtf) + ch_versions = ch_versions.mix(GTF_TO_REFFLAT.out.versions) + ch_refflat = GTF_TO_REFFLAT.out.refflat.map { that -> [[id:that.Name], that] } + } else { + ch_refflat = Channel.fromPath(params.refflat).map { that -> [[id:that.Name], that] } + } + + if (!file(params.salmon_index).exists() || file(params.salmon_index).isEmpty() || + !file(params.salmon_index_stub_check).exists() || file(params.salmon_index_stub_check).isEmpty()){ // add condition for qc + GFFREAD(ch_gtf, ch_fasta.map{ it -> it[1] }) + ch_versions = ch_versions.mix(GFFREAD.out.versions) + SALMON_INDEX(ch_fasta.map{ it -> it[1] }, GFFREAD.out.gffread_fasta.map{ it -> it[1] }) + ch_versions = ch_versions.mix(SALMON_INDEX.out.versions) + ch_salmon_index = SALMON_INDEX.out.index + } else { + ch_salmon_index = Channel.fromPath({params.salmon_index}) + } + + if ((params.starindex || params.all || params.starfusion || params.arriba) && + (!file(params.starindex_ref).exists() || file(params.starindex_ref).isEmpty() || + !file(params.starindex_ref_stub_check).exists() || file(params.starindex_ref_stub_check).isEmpty() )) { + STAR_GENOMEGENERATE(ch_fasta, ch_gtf) + ch_versions = ch_versions.mix(STAR_GENOMEGENERATE.out.versions) + ch_starindex_ref = STAR_GENOMEGENERATE.out.index + } else { + ch_starindex_ref = Channel.fromPath(params.starindex_ref).map { that -> [[id:that.Name], that] } + } + + if ((params.arriba || params.all) && + (!file(params.arriba_ref_blacklist).exists() || file(params.arriba_ref_blacklist).isEmpty() || + !file(params.arriba_ref_known_fusions).exists() || file(params.arriba_ref_known_fusions).isEmpty() || + !file(params.arriba_ref_protein_domains).exists() || file(params.arriba_ref_protein_domains).isEmpty())) { + ARRIBA_DOWNLOAD(params.genome) + ch_versions = ch_versions.mix(ARRIBA_DOWNLOAD.out.versions) + ch_arriba_ref_blacklist = ARRIBA_DOWNLOAD.out.blacklist + ch_arriba_ref_cytobands = ARRIBA_DOWNLOAD.out.cytobands + ch_arriba_ref_known_fusions = ARRIBA_DOWNLOAD.out.known_fusions + ch_arriba_ref_protein_domains = ARRIBA_DOWNLOAD.out.protein_domains + } else { + ch_arriba_ref_blacklist = Channel.fromPath(params.arriba_ref_blacklist) + ch_arriba_ref_cytobands = Channel.fromPath(params.arriba_ref_cytobands) + ch_arriba_ref_known_fusions = Channel.fromPath(params.arriba_ref_known_fusions) + ch_arriba_ref_protein_domains = Channel.fromPath(params.arriba_ref_protein_domains) + } + + + if ((params.fusioncatcher || params.all) && + (!file(params.fusioncatcher_ref).exists() || file(params.fusioncatcher_ref).isEmpty() || + !file(params.fusioncatcher_ref_stub_check).exists() || file(params.fusioncatcher_ref_stub_check).isEmpty() )) { + FUSIONCATCHER_BUILD(params.genome_gencode_version) + ch_versions = ch_versions.mix(FUSIONCATCHER_BUILD.out.versions) + ch_fusioncatcher_ref = FUSIONCATCHER_BUILD.out.reference + } + else { + ch_fusioncatcher_ref = Channel.fromPath(params.fusioncatcher_ref) + } + + + if ((params.starfusion || params.all) && + (!file(params.starfusion_ref).exists() || file(params.starfusion_ref).isEmpty() || + !file(params.starfusion_ref_stub_check).exists() || file(params.starfusion_ref_stub_check).isEmpty() )) { + STARFUSION_BUILD(ch_fasta, ch_gtf, params.fusion_annot_lib, params.species) + ch_versions = ch_versions.mix(STARFUSION_BUILD.out.versions) + ch_starfusion_ref = STARFUSION_BUILD.out.reference + } + else { + ch_starfusion_ref = Channel.fromPath(params.starfusion_ref) + } + + + if ((params.fusionreport || params.all) && + (!file(params.fusionreport_ref).exists() || file(params.fusionreport_ref).isEmpty() || + !file(params.fusionreport_ref_stub_check).exists() || file(params.fusionreport_ref_stub_check).isEmpty())) { + if (!params.no_cosmic && (!params.cosmic_username || !params.cosmic_passwd)) { exit 1, 'COSMIC username and/or password missing' } + FUSIONREPORT_DOWNLOAD() + ch_versions = ch_versions.mix(FUSIONREPORT_DOWNLOAD.out.versions) + ch_fusionreport_ref = FUSIONREPORT_DOWNLOAD.out.fusionreport_ref + } else { + ch_fusionreport_ref = Channel.fromPath(params.fusionreport_ref).map { that -> [[id:that.Name], that] } + } + + emit: + ch_fasta + ch_gtf + ch_fai + ch_hgnc_ref + ch_hgnc_date + ch_rrna_interval + ch_refflat + ch_salmon_index + ch_starindex_ref + ch_arriba_ref_blacklist + ch_arriba_ref_cytobands + ch_arriba_ref_known_fusions + ch_arriba_ref_protein_domains + ch_fusioncatcher_ref + ch_starfusion_ref + ch_fusionreport_ref + versions = ch_versions +} + +/* +======================================================================================== + THE END +======================================================================================== +*/ diff --git a/subworkflows/local/ctatsplicing_workflow/main.nf b/subworkflows/local/ctatsplicing_workflow/main.nf new file mode 100644 index 00000000..8b279033 --- /dev/null +++ b/subworkflows/local/ctatsplicing_workflow/main.nf @@ -0,0 +1,31 @@ +include { CTATSPLICING_STARTOCANCERINTRONS } from '../../../modules/local/ctatsplicing/startocancerintrons' + +workflow CTATSPLICING_WORKFLOW { + take: + split_junctions // [ val(meta), path(split_junctions.SJ.out.tab) ] + junctions // [ val(meta), path(junctions.Chimeric.out.junction) ] + aligned_bams // [ val(meta), path(aligned_bams.Aligned.sortedByCoord.out.bam) ] + ctat_genome_lib // [ val(meta2), path(path/to/ctat_genome_lib) ] + + main: + def ch_versions = Channel.empty() + + if (params.ctatsplicing || params.all) { + def ch_ctatsplicing_input = split_junctions + .join(junctions, failOnMismatch:true, failOnDuplicate:true) + .join(aligned_bams, failOnMismatch:true, failOnDuplicate:true) + .map { meta, split_junction, junction, bam -> + [ meta, split_junction, junction, bam, [] ] + } + + CTATSPLICING_STARTOCANCERINTRONS( + ch_ctatsplicing_input, + ctat_genome_lib + ) + ch_versions = ch_versions.mix(CTATSPLICING_STARTOCANCERINTRONS.out.versions.first()) + + } + + emit: + versions = ch_versions +} diff --git a/subworkflows/local/fusioncatcher_workflow/main.nf b/subworkflows/local/fusioncatcher_workflow/main.nf new file mode 100644 index 00000000..fd9ce34b --- /dev/null +++ b/subworkflows/local/fusioncatcher_workflow/main.nf @@ -0,0 +1,45 @@ +include { FUSIONCATCHER_DETECT } from '../../../modules/local/fusioncatcher/detect/main' + +// TODO: Remove fusioncatcher_fusions as parameter. +// TODO: remove dummy file. Work with Channel.empty() +// TODO: if the files were already produced and the user want to skip the module because of this, they should be taken them from the sample sheet +// TODO: harmonize `run_fusioncatcher` and `fusioncatcher_only` parameters at main workflow level to activate/skip this one. + +workflow FUSIONCATCHER_WORKFLOW { + take: + reads // channel [ meta, [ fastqs ] ] + fusioncatcher_ref // channel [ meta, path ] + run_fusioncatcher // boolean + all // boolean + fusioninspector_only // boolean + fusioncatcher_fusions // path, string + + main: + ch_versions = Channel.empty() + ch_dummy_file = file("$baseDir/assets/dummy_file_fusioncatcher.txt", checkIfExists: true) + + if (( run_fusioncatcher || all) && !fusioninspector_only ) { + if (fusioncatcher_fusions){ + + ch_fusioncatcher_fusions = reads.combine(Channel.value(file(fusioncatcher_fusions, checkIfExists:true))) + .map { meta, reads, fusions -> [ meta, fusions ] } + } else { + + FUSIONCATCHER_DETECT ( + reads, + fusioncatcher_ref + ) + ch_fusioncatcher_fusions = FUSIONCATCHER_DETECT.out.fusions + ch_versions = ch_versions.mix(FUSIONCATCHER_DETECT.out.versions) + } + } + else { + ch_fusioncatcher_fusions = reads.combine(Channel.value(file(ch_dummy_file, checkIfExists:true))) + .map { meta, reads, fusions -> [ meta, fusions ] } + } + + emit: + fusions = ch_fusioncatcher_fusions // channel [ meta, fusions ] + versions = ch_versions // channel [ versions ] + } + diff --git a/subworkflows/local/fusioncatcher_workflow/tests/main.nf.test b/subworkflows/local/fusioncatcher_workflow/tests/main.nf.test new file mode 100644 index 00000000..ef6cecc7 --- /dev/null +++ b/subworkflows/local/fusioncatcher_workflow/tests/main.nf.test @@ -0,0 +1,66 @@ +nextflow_workflow { + + name "Test Subworkflow FUSIONCATCHER_WORKFLOW" + script "../main.nf" + workflow "FUSIONCATCHER_WORKFLOW" + tag "subworkflow" + tag "fusioncatcher" + tag "fusioncatcher/build" + tag "fusioncatcher/detect" + + // Test + test("FUSIONCATCHER_WORKFLOW - Homo sapiens - FASTQs chr4") { + + setup { + // Download reference files for fusioncatch + run("FUSIONCATCHER_BUILD") { + script "../../../../modules/local/fusioncatcher/build/main.nf" + process { + """ + input[0] = Channel.value('46') + """ + } + } + } + + // TODO: get smaller reference files for fusioncatcher + when { + workflow { + """ + // ch_reads + input[0] = Channel.of( + [ + [ id: "test_fastqs", single_end: false ], + [ + file("https://raw.githubusercontent.com/nf-core/test-datasets/refs/heads/rnafusion/testdata/human/reads_1.fq.gz", checkIfExists: true), + file("https://raw.githubusercontent.com/nf-core/test-datasets/refs/heads/rnafusion/testdata/human/reads_2.fq.gz", checkIfExists: true) + ] + ] ) + + // ch_references + input[1] = FUSIONCATCHER_BUILD.out.reference + + // fusioncatcher (boolean) + input[2] = true + + // all (boolean) + input[3] = true + + // fusioninspector_only (boolean) + input[4] = false + + // fusioncatcher_fusions (string path) + input[5] = null + """ + } + } + + then { + assertAll( + { assert workflow.success }, + { assert snapshot(workflow.out).match() } + ) + } + } + +} diff --git a/subworkflows/local/fusioncatcher_workflow/tests/main.nf.test.snap b/subworkflows/local/fusioncatcher_workflow/tests/main.nf.test.snap new file mode 100644 index 00000000..47ffc5af --- /dev/null +++ b/subworkflows/local/fusioncatcher_workflow/tests/main.nf.test.snap @@ -0,0 +1,37 @@ +{ + "FUSIONCATCHER_WORKFLOW - Homo sapiens - FASTQs chr4": { + "content": [ + { + "0": [ + [ + { + "id": "test_fastqs", + "single_end": false + }, + "test_fastqs.fusioncatcher.fusion-genes.txt:md5,c826a24c49abfcec8164c478e1e74892" + ] + ], + "1": [ + "versions.yml:md5,05bd93a243728a293211ce52e5f97282" + ], + "fusions": [ + [ + { + "id": "test_fastqs", + "single_end": false + }, + "test_fastqs.fusioncatcher.fusion-genes.txt:md5,c826a24c49abfcec8164c478e1e74892" + ] + ], + "versions": [ + "versions.yml:md5,05bd93a243728a293211ce52e5f97282" + ] + } + ], + "meta": { + "nf-test": "0.9.2", + "nextflow": "24.10.3" + }, + "timestamp": "2025-01-03T19:29:54.767628" + } +} \ No newline at end of file diff --git a/subworkflows/local/fusioninspector_workflow.nf b/subworkflows/local/fusioninspector_workflow.nf new file mode 100644 index 00000000..f521ae8c --- /dev/null +++ b/subworkflows/local/fusioninspector_workflow.nf @@ -0,0 +1,65 @@ +include { AGAT_CONVERTSPGFF2TSV } from '../../modules/nf-core/agat/convertspgff2tsv/main' +include { ARRIBA_VISUALISATION } from '../../modules/local/arriba/visualisation/main' +include { CAT_CAT } from '../../modules/nf-core/cat/cat/main' +include { VCF_COLLECT } from '../../modules/local/vcf_collect/main' +include { FUSIONINSPECTOR } from '../../modules/local/fusioninspector/main' + +workflow FUSIONINSPECTOR_WORKFLOW { + take: + reads + fusion_list + fusion_list_filtered + fusionreport_out + fusionreport_csv + bam_sorted_indexed + ch_gtf + ch_arriba_ref_protein_domains + ch_arriba_ref_cytobands + ch_hgnc_ref + ch_hgnc_date + + main: + ch_versions = Channel.empty() + ch_arriba_visualisation = Channel.empty() + index ="${params.starfusion_ref}" + + ch_fusion_list = ( params.tools_cutoff > 1 ? fusion_list_filtered : fusion_list ) + .branch{ + no_fusions: it[1].size() == 0 + fusions: it[1].size() > 0 + } + + if (params.whitelist) { + ch_whitelist = ch_fusion_list.fusions.combine(Channel.value(file(params.whitelist, checkIfExists:true))) + .map { meta, fusions, whitelist -> [ meta, [fusions, whitelist] ] } + + CAT_CAT(ch_whitelist) // fusioninspector takes care of possible duplicates + ch_versions = ch_versions.mix(CAT_CAT.out.versions) + ch_reads_fusion = reads.join(CAT_CAT.out.file_out ) + } + else { + ch_reads_fusion = reads.join(ch_fusion_list.fusions ) + } + + FUSIONINSPECTOR( ch_reads_fusion, index) + ch_versions = ch_versions.mix(FUSIONINSPECTOR.out.versions) + + AGAT_CONVERTSPGFF2TSV(FUSIONINSPECTOR.out.out_gtf) + ch_versions = ch_versions.mix(AGAT_CONVERTSPGFF2TSV.out.versions) + + fusion_data = FUSIONINSPECTOR.out.tsv_coding_effect.join(AGAT_CONVERTSPGFF2TSV.out.tsv).join(fusionreport_out).join(fusionreport_csv) + VCF_COLLECT(fusion_data, ch_hgnc_ref, ch_hgnc_date) + ch_versions = ch_versions.mix(VCF_COLLECT.out.versions) + + if ((params.starfusion || params.all || params.stringtie) && !params.fusioninspector_only && !params.skip_vis) { + ch_bam_sorted_indexed_fusions = bam_sorted_indexed.join(FUSIONINSPECTOR.out.tsv) + ARRIBA_VISUALISATION(ch_bam_sorted_indexed_fusions, ch_gtf, ch_arriba_ref_protein_domains, ch_arriba_ref_cytobands) + ch_versions = ch_versions.mix(ARRIBA_VISUALISATION.out.versions) + ch_arriba_visualisation = ARRIBA_VISUALISATION.out.pdf + } + + emit: + ch_arriba_visualisation + versions = ch_versions +} + diff --git a/subworkflows/local/fusionreport_workflow/main.nf b/subworkflows/local/fusionreport_workflow/main.nf new file mode 100644 index 00000000..eac40e32 --- /dev/null +++ b/subworkflows/local/fusionreport_workflow/main.nf @@ -0,0 +1,44 @@ +include { FUSIONREPORT } from '../../../modules/local/fusionreport/detect/main' + + +workflow FUSIONREPORT_WORKFLOW { + take: + reads + fusionreport_ref + arriba_fusions + starfusion_fusions + fusioncatcher_fusions + + main: + ch_versions = Channel.empty() + ch_report = Channel.empty() + ch_csv = Channel.empty() + + if (!params.fusioninspector_only) { + reads_fusions = reads + .join(arriba_fusions, failOnMismatch:true, failOnDuplicate:true) + .join(starfusion_fusions, failOnMismatch:true, failOnDuplicate:true) + .join(fusioncatcher_fusions, failOnMismatch:true, failOnDuplicate:true) + + FUSIONREPORT(reads_fusions, fusionreport_ref, params.tools_cutoff) + ch_fusion_list = FUSIONREPORT.out.fusion_list + ch_fusion_list_filtered = FUSIONREPORT.out.fusion_list_filtered + ch_versions = ch_versions.mix(FUSIONREPORT.out.versions) + ch_report = FUSIONREPORT.out.report + ch_csv = FUSIONREPORT.out.csv + } else { + ch_fusion_list = reads.combine(Channel.value(file(params.fusioninspector_fusions, checkIfExists:true))) + .map { it -> [ it[0], it[1] ] } + + ch_fusion_list_filtered = ch_fusion_list + } + + emit: + versions = ch_versions + fusion_list = ch_fusion_list + fusion_list_filtered = ch_fusion_list_filtered + report = ch_report.ifEmpty(null) + csv = ch_csv.ifEmpty(null) + +} + diff --git a/subworkflows/local/fusionreport_workflow/tests/main.nf.test b/subworkflows/local/fusionreport_workflow/tests/main.nf.test new file mode 100644 index 00000000..61aaf023 --- /dev/null +++ b/subworkflows/local/fusionreport_workflow/tests/main.nf.test @@ -0,0 +1,77 @@ +nextflow_workflow { + + name "Test Subworkflow FUSIONREPORT_WORKFLOW" + script "../main.nf" + workflow "FUSIONREPORT_WORKFLOW" + + tag "subworkflow" + tag "fusionreport_workflow" + tag "fusionreport" + + test("FUSIONREPORT_WORKFLOW - Full Test") { + + setup { + run("FUSIONREPORT_DOWNLOAD") { + script "../../../../modules/local/fusionreport/download/main.nf" + process { + """ + """ + } + } + } + + when { + workflow { + """ + // Input channels + input[0] = Channel.of( + [ [ id:'test_sample' ], + file("https://github.com/nf-core/test-datasets/raw/rnafusion/testdata/human/reads_1.fq.gz") ] + ) + + input[1] = FUSIONREPORT_DOWNLOAD.out.fusionreport_ref + + input[2] = Channel.of( + [[ id:'test_sample' ], + file("https://github.com/Clinical-Genomics/fusion-report/raw/master/tests/test_data/arriba.tsv") ] + ) + + input[3] = Channel.of( + [[ id:'test_sample' ], + file("https://github.com/Clinical-Genomics/fusion-report/raw/master/tests/test_data/starfusion.tsv") ] + ) + + input[4] = Channel.of( + [[ id:'test_sample' ], + file("https://github.com/Clinical-Genomics/fusion-report/raw/master/tests/test_data/fusioncatcher.txt") ] + ) + + """ + } + params { + fusioninspector_only = false + tools_cutoff = 1 + arriba = true + starfusion = true + fusioncatcher = true + no_cosmic = true + outdir = "$outputDir" + } + } + + then { + assertAll( + { assert workflow.success }, + { assert snapshot( + workflow.out.fusion_list, + workflow.out.fusion_list_filtered, + workflow.out.report, + workflow.out.csv, + workflow.out.versions + ).match() } + ) + } + } + + +} diff --git a/subworkflows/local/fusionreport_workflow/tests/main.nf.test.snap b/subworkflows/local/fusionreport_workflow/tests/main.nf.test.snap new file mode 100644 index 00000000..2c384e5a --- /dev/null +++ b/subworkflows/local/fusionreport_workflow/tests/main.nf.test.snap @@ -0,0 +1,46 @@ +{ + "FUSIONREPORT_WORKFLOW - Full Test": { + "content": [ + [ + [ + { + "id": "test_sample" + }, + "test_sample.fusionreport.tsv:md5,3593b7021f26cc5427fdc96f0d1c72f0" + ] + ], + [ + [ + { + "id": "test_sample" + }, + "test_sample.fusionreport_filtered.tsv:md5,3593b7021f26cc5427fdc96f0d1c72f0" + ] + ], + [ + [ + { + "id": "test_sample" + }, + "test_sample_fusionreport_index.html:md5,3513bcaa58446399c0957db69402d3bd" + ] + ], + [ + [ + { + "id": "test_sample" + }, + "test_sample.fusions.csv:md5,49f378c2112d7e0b3b17d9095c79e6bd" + ] + ], + [ + "versions.yml:md5,90749dbf8e3e7b259c935eabb8c6ce1e" + ] + ], + "meta": { + "nf-test": "0.9.2", + "nextflow": "24.10.3" + }, + "timestamp": "2025-01-07T13:31:41.215356596" + } +} \ No newline at end of file diff --git a/subworkflows/local/qc_workflow/main.nf b/subworkflows/local/qc_workflow/main.nf new file mode 100644 index 00000000..4b635242 --- /dev/null +++ b/subworkflows/local/qc_workflow/main.nf @@ -0,0 +1,39 @@ +// +// Extract descriptive values from BAMs +// + +include { PICARD_COLLECTRNASEQMETRICS } from '../../../modules/nf-core/picard/collectrnaseqmetrics' +include { GATK4_MARKDUPLICATES } from '../../../modules/nf-core/gatk4/markduplicates' +include { PICARD_COLLECTINSERTSIZEMETRICS } from '../../../modules/nf-core/picard/collectinsertsizemetrics' + +workflow QC_WORKFLOW { + take: + ch_bam_sorted // channel [ meta, bam ] + ch_refflat // channel [ meta, refflat ] + ch_fasta // channel [ meta, fasta ] + ch_fai // channel [ meta, fai ] + ch_rrna_interval // channel [ meta, interval ] + + main: + ch_versions = Channel.empty() + + PICARD_COLLECTRNASEQMETRICS(ch_bam_sorted, ch_refflat.map{ meta, refflat -> [ refflat ] }, ch_fasta.map{ meta, fasta -> [ fasta ] }, ch_rrna_interval.map{ meta, intervals -> [ intervals ] }.ifEmpty([]) ) // Some chromosome or annotation may not have rRNA genes + ch_versions = ch_versions.mix(PICARD_COLLECTRNASEQMETRICS.out.versions) + ch_rnaseq_metrics = PICARD_COLLECTRNASEQMETRICS.out.metrics + + GATK4_MARKDUPLICATES(ch_bam_sorted, ch_fasta.map { meta, fasta -> [ fasta ]}, ch_fai.map { meta, fasta_fai -> [ fasta_fai ]}) + ch_versions = ch_versions.mix(GATK4_MARKDUPLICATES.out.versions) + ch_duplicate_metrics = GATK4_MARKDUPLICATES.out.metrics + + PICARD_COLLECTINSERTSIZEMETRICS(ch_bam_sorted) + ch_versions = ch_versions.mix(PICARD_COLLECTINSERTSIZEMETRICS.out.versions) + ch_insertsize_metrics = PICARD_COLLECTINSERTSIZEMETRICS.out.metrics + + emit: + versions = ch_versions // channel [ path ] + rnaseq_metrics = ch_rnaseq_metrics // channel [ meta, path ] + duplicate_metrics = ch_duplicate_metrics // channel [ meta, path ] + insertsize_metrics = ch_insertsize_metrics // channel [ meta, path ] + +} + diff --git a/subworkflows/local/qc_workflow/test/main.nf.test b/subworkflows/local/qc_workflow/test/main.nf.test new file mode 100644 index 00000000..cfb48e06 --- /dev/null +++ b/subworkflows/local/qc_workflow/test/main.nf.test @@ -0,0 +1,127 @@ +nextflow_workflow { + + name "Test Subworkflow QC_WORKFLOW" + script "../main.nf" + config "./nextflow.config" + workflow "QC_WORKFLOW" + tag "qc" + tag "subworkflow" + + test("QC_WORKFLOW - Homo sapiens chr22") { + + // Generate refflat file + setup { + + // Create refflat reference + run("UCSC_GTFTOGENEPRED") { + script "../../../../modules/nf-core/ucsc/gtftogenepred/main.nf" + process { + """ + input[0] = + Channel.fromPath( + "https://raw.githubusercontent.com/nf-core/test-datasets/refs/heads/modules/data/genomics/homo_sapiens/genome/genome.gtf", + checkIfExists: true + ) + .map{ [ [id:it.Name], it ] } + """ + } + } + + // Filter GTF to extract rRNA genes + run("RRNATRANSCRIPTS") { + script "../../../../modules/nf-core/rrnatranscripts/main.nf" + process { + """ + input[0] = Channel.fromPath("https://raw.githubusercontent.com/nf-core/test-datasets/refs/heads/modules/data/genomics/homo_sapiens/genome/genome.gtf", checkIfExists: true) + """ + } + } + + // Convert rRNA GTF to BED + run("BEDOPS_CONVERT2BED") { + script "../../../../modules/nf-core/bedops/convert2bed/main.nf" + process { + """ + input[0] = RRNATRANSCRIPTS.out.rrna_gtf.map{ it -> [ [id:it.Name], it ] } + """ + } + } + + // Convert rRNA BED to interval list (the necessary file) + run("GATK4_BEDTOINTERVALLIST") { + script "../../../../modules/nf-core/gatk4/bedtointervallist/main.nf" + process { + """ + input[0] = BEDOPS_CONVERT2BED.out.bed + input[1] = Channel.of( + [ + [id: 'chr22_dic'], + file("https://raw.githubusercontent.com/nf-core/test-datasets/refs/heads/modules/data/genomics/homo_sapiens/genome/genome.dic", checkIfExist: true) + ] + ) + """ + } + } + + } + + when { + // Params to activate modules ext.when condition + params { + skip_qc = false + fusioninspector_only = false + starfusion = true + all = true + } + + workflow { + """ + // ch_bam_sorted + input[0] = Channel.of( + [ + [id: "chr22_bam"], + file("https://raw.githubusercontent.com/nf-core/test-datasets/refs/heads/modules/data/genomics/homo_sapiens/illumina/bam/test.paired_end.sorted.bam", checkIfExists: true) + ]) + + // ch_chrgtf + input[1] = Channel.of( + [ + [ id: "chr22_gtf" ], + file("https://raw.githubusercontent.com/nf-core/test-datasets/refs/heads/modules/data/genomics/homo_sapiens/genome/genome.gtf", checkIfExists: true) + ]) + + // ch_refflat + input[2] = UCSC_GTFTOGENEPRED.out.refflat + + // ch_fasta + input[3] = Channel.of( + [ + [ id: "test_ref" ], + file("https://raw.githubusercontent.com/nf-core/test-datasets/refs/heads/modules/data/genomics/homo_sapiens/genome/genome.fasta", checkIfExist: true) + ] ) + + // ch_fai + input[4] = Channel.of( + [ + [ id: "test_ref" ], + file("https://raw.githubusercontent.com/nf-core/test-datasets/refs/heads/modules/data/genomics/homo_sapiens/genome/genome.fasta.fai", checkIfExist: true) + ] ) + + // ch rRNA interval list + input[5] = GATK4_BEDTOINTERVALLIST.out.interval_list + """ + } + } + + then { + assertAll( + { assert workflow.success }, + { assert snapshot(file( workflow.out.versions[0] )).match('versions' ) }, + { assert snapshot(file( workflow.out.rnaseq_metrics[0][1] ).readLines()[4..-1]).md5().match('rnaseq_metrics' ) }, + { assert snapshot(file( workflow.out.duplicate_metrics[0][1] ).readLines()[4..-1]).md5().match('duplicate_metrics' ) }, + { assert snapshot(file( workflow.out.insertsize_metrics[0][1] ).readLines()[4..-1]).md5().match('insertsize_metrics') } + ) + } + } + +} diff --git a/subworkflows/local/qc_workflow/test/main.nf.test.snap b/subworkflows/local/qc_workflow/test/main.nf.test.snap new file mode 100644 index 00000000..b56c12b9 --- /dev/null +++ b/subworkflows/local/qc_workflow/test/main.nf.test.snap @@ -0,0 +1,36 @@ +{ + "duplicate_metrics": { + "content": "651d8a4702f9f9871e94afbce3e50e34", + "meta": { + "nf-test": "0.9.0", + "nextflow": "24.10.2" + }, + "timestamp": "2024-12-04T19:29:09.711230835" + }, + "versions": { + "content": [ + "versions.yml:md5,3f13b395c67e317f74194b3b6c89f139" + ], + "meta": { + "nf-test": "0.9.0", + "nextflow": "24.10.2" + }, + "timestamp": "2024-12-04T19:29:09.686297468" + }, + "rnaseq_metrics": { + "content": "84a348c3735ed2f6c47f346eeed661f4", + "meta": { + "nf-test": "0.9.0", + "nextflow": "24.10.2" + }, + "timestamp": "2024-12-04T19:29:09.704632753" + }, + "insertsize_metrics": { + "content": "160db81b19843c4d46fe74ac61f9f013", + "meta": { + "nf-test": "0.9.0", + "nextflow": "24.10.2" + }, + "timestamp": "2024-12-04T19:29:09.717929716" + } +} \ No newline at end of file diff --git a/subworkflows/local/qc_workflow/test/nextflow.config b/subworkflows/local/qc_workflow/test/nextflow.config new file mode 100644 index 00000000..498c456f --- /dev/null +++ b/subworkflows/local/qc_workflow/test/nextflow.config @@ -0,0 +1,10 @@ +process { + withName: PICARD_COLLECTRNASEQMETRICS { + ext.args = "--STRAND_SPECIFICITY SECOND_READ_TRANSCRIPTION_STRAND" + } + + withName: GATK4_BEDTOINTERVALLIST { + ext.args = "--KEEP_LENGTH_ZERO_INTERVALS true" + } + +} diff --git a/subworkflows/local/starfusion_workflow/main.nf b/subworkflows/local/starfusion_workflow/main.nf new file mode 100644 index 00000000..938832c9 --- /dev/null +++ b/subworkflows/local/starfusion_workflow/main.nf @@ -0,0 +1,80 @@ +include { SAMTOOLS_INDEX as SAMTOOLS_INDEX_FOR_STARFUSION } from '../../../modules/nf-core/samtools/index/main' +include { SAMTOOLS_INDEX as SAMTOOLS_INDEX_FOR_STARFUSION_CRAM } from '../../../modules/nf-core/samtools/index/main' +include { SAMTOOLS_VIEW as SAMTOOLS_VIEW_FOR_STARFUSION } from '../../../modules/nf-core/samtools/view/main' +include { STAR_ALIGN as STAR_FOR_STARFUSION } from '../../../modules/nf-core/star/align/main' +include { STARFUSION } from '../../../modules/local/starfusion/detect/main' +include { CTATSPLICING_WORKFLOW } from '../ctatsplicing_workflow' + +workflow STARFUSION_WORKFLOW { + take: + reads + ch_gtf + ch_starindex_ref + ch_fasta + ch_starfusion_ref + + main: + def ch_versions = Channel.empty() + def ch_align = Channel.empty() + def ch_starfusion_fusions = Channel.empty() + def bam_sorted_indexed = Channel.empty() + + ch_dummy_file = file("$baseDir/assets/dummy_file_starfusion.txt", checkIfExists: true) + + if ((params.starfusion || params.all || params.stringtie) && !params.fusioninspector_only) { + if (params.starfusion_fusions){ + ch_starfusion_fusions = reads.combine(Channel.value(file(params.starfusion_fusions, checkIfExists:true))) + .map { it -> [ it[0], it[2] ] } + } else { + STAR_FOR_STARFUSION( reads, ch_starindex_ref, ch_gtf, params.star_ignore_sjdbgtf, '', params.seq_center ?: '') + ch_versions = ch_versions.mix(STAR_FOR_STARFUSION.out.versions) + ch_align = STAR_FOR_STARFUSION.out.bam_sorted // TODO: This does not seem to be captured and used as the output is bam_sorted_aligned and not bam_sorted + + + SAMTOOLS_INDEX_FOR_STARFUSION(STAR_FOR_STARFUSION.out.bam_sorted) + ch_versions = ch_versions.mix(SAMTOOLS_INDEX_FOR_STARFUSION.out.versions) + bam_sorted_indexed = STAR_FOR_STARFUSION.out.bam_sorted.join(SAMTOOLS_INDEX_FOR_STARFUSION.out.bai) + reads_junction = reads.join(STAR_FOR_STARFUSION.out.junction ) // TODO: This join is not needed as STARFUSION can simply read from the junction file: https://github.com/STAR-Fusion/STAR-Fusion/wiki#alternatively-kickstart-mode-running-star-yourself-and-then-running-star-fusion-using-the-existing-outputs + + if (params.ctatsplicing || params.all) { + CTATSPLICING_WORKFLOW( + STAR_FOR_STARFUSION.out.spl_junc_tab, + STAR_FOR_STARFUSION.out.junction, + STAR_FOR_STARFUSION.out.bam, + ch_starfusion_ref + ) + ch_versions = ch_versions.mix(CTATSPLICING_WORKFLOW.out.versions) + } + + if (params.cram.contains('starfusion')){ + SAMTOOLS_VIEW_FOR_STARFUSION (bam_sorted_indexed, ch_fasta, [] ) + ch_versions = ch_versions.mix(SAMTOOLS_VIEW_FOR_STARFUSION.out.versions) + + SAMTOOLS_INDEX_FOR_STARFUSION_CRAM (SAMTOOLS_VIEW_FOR_STARFUSION.out.cram) + ch_versions = ch_versions.mix(SAMTOOLS_INDEX_FOR_STARFUSION_CRAM.out.versions) + } + if (params.starfusion || params.all){ + STARFUSION( reads_junction, ch_starfusion_ref.map { it -> it[1] }) + ch_versions = ch_versions.mix(STARFUSION.out.versions) + ch_starfusion_fusions = STARFUSION.out.fusions + } + + ch_star_stats = STAR_FOR_STARFUSION.out.log_final + ch_star_gene_count = STAR_FOR_STARFUSION.out.read_per_gene_tab + } + } + else { + ch_starfusion_fusions = reads.combine(Channel.value(file(ch_dummy_file, checkIfExists:true))) + .map { it -> [ it[0], it[2] ] } + ch_star_stats = Channel.empty() + ch_star_gene_count = Channel.empty() + } + emit: + fusions = ch_starfusion_fusions + star_stats = ch_star_stats + star_gene_count = ch_star_gene_count + ch_bam_sorted = ch_align + ch_bam_sorted_indexed = bam_sorted_indexed + versions = ch_versions + } + diff --git a/subworkflows/local/starfusion_workflow/tests/main.nf.test b/subworkflows/local/starfusion_workflow/tests/main.nf.test new file mode 100644 index 00000000..ac0e7798 --- /dev/null +++ b/subworkflows/local/starfusion_workflow/tests/main.nf.test @@ -0,0 +1,115 @@ +nextflow_workflow { + + name "Test Subworkflow STARFUSION_WORKFLOW" + script "../main.nf" + workflow "STARFUSION_WORKFLOW" + tag "subworkflow" + tag "star" + tag "star/genomegenerate" + tag "star/align" + + + test("STARFUSION_WORKFLOW - Homo sapiens - FASTQs minigenome") { + config './nextflow.config' + + setup { + run("STAR_GENOMEGENERATE") { + script "../../../../modules/nf-core/star/genomegenerate/main.nf" + process { + """ + // FASTA + input[0] = Channel.fromPath( + "https://github.com/STAR-Fusion/STAR-Fusion-Tutorial/raw/master/minigenome.fa", checkIfExists: true + ) + .map{ [[id: it.getName() ], it ]} + + // GTF + input[1] = Channel.fromPath( + "https://github.com/STAR-Fusion/STAR-Fusion-Tutorial/raw/master/minigenome.gtf", checkIfExists: true + ) + .map{ [[id: it.getName() ], it ]} + """ + } + } + + run("STARFUSION_BUILD") { + script "../../../../modules/local/starfusion/build/main.nf" + process { + """ + input[0] = [ + [ id:'minigenome fasta' ], + file("https://github.com/STAR-Fusion/STAR-Fusion-Tutorial/raw/master/minigenome.fa") + ] + input[1] = [ + [ id:'minigenome gtf' ], + file("https://github.com/STAR-Fusion/STAR-Fusion-Tutorial/raw/master/minigenome.gtf") + ] + + input [2] = file("https://github.com/STAR-Fusion/STAR-Fusion-Tutorial/raw/master/CTAT_HumanFusionLib.mini.dat.gz") + input [3] = "human" + """ + } + } + } + + when { + workflow { + """ + // ch_reads + input[0] = Channel.of( + [ + [ id: "test_fastqs" ], + [ + file("https://github.com/STAR-Fusion/STAR-Fusion-Tutorial/raw/v0.0.1/rnaseq_1.fastq.gz", checkIfExists: true), + file("https://github.com/STAR-Fusion/STAR-Fusion-Tutorial/raw/v0.0.1/rnaseq_2.fastq.gz", checkIfExists: true) + ] + ] ) + + // ch_gtf + input[1] = + Channel.fromPath( + "https://github.com/STAR-Fusion/STAR-Fusion-Tutorial/raw/master/minigenome.gtf", checkIfExists: true + ) + .map{ [ [ id: it.name ], it ] } + + // ch_starindex_ref + input[2] = STAR_GENOMEGENERATE.out.index + + // ch_fasta + input[3] = + Channel.fromPath( + "https://github.com/STAR-Fusion/STAR-Fusion-Tutorial/raw/master/minigenome.fa", checkIfExists: true + ) + .map{ [ [ id: it.name ], it ] } + + // ch_starfusion_ref + input[4] = STARFUSION_BUILD.out.reference + + """ + } + params { + starfusion = true + cram = 'starfusion' + outdir = "$outputDir" + } + } + + then { + assertAll( + { assert workflow.success }, + { assert snapshot( + workflow.out.fusions, + file(workflow.out.star_stats[0][1]).name, + workflow.out.star_gene_count, + workflow.out.ch_bam_sorted, + workflow.out.ch_bam_sorted_indexed, + workflow.out.versions + ).match() } + ) + } + } + + + + +} diff --git a/subworkflows/local/starfusion_workflow/tests/main.nf.test.snap b/subworkflows/local/starfusion_workflow/tests/main.nf.test.snap new file mode 100644 index 00000000..5bc897e7 --- /dev/null +++ b/subworkflows/local/starfusion_workflow/tests/main.nf.test.snap @@ -0,0 +1,38 @@ +{ + "STARFUSION_WORKFLOW - Homo sapiens - FASTQs minigenome": { + "content": [ + [ + [ + { + "id": "test_fastqs" + }, + "test_fastqs.starfusion.fusion_predictions.tsv:md5,abe17134a231642edf9351e4964e8a97" + ] + ], + "test_fastqs.Log.final.out", + [ + [ + { + "id": "test_fastqs" + }, + "test_fastqs.ReadsPerGene.out.tab:md5,8e0d42deeea09924d5c7ba3147bbfd78" + ] + ], + [ + + ], + [ + + ], + [ + "versions.yml:md5,afde4f2fd6056df81e322b3c35ab7a8a", + "versions.yml:md5,e53f1ec32bc78a33f99892e42274833a" + ] + ], + "meta": { + "nf-test": "0.9.2", + "nextflow": "24.10.3" + }, + "timestamp": "2024-12-23T18:25:52.838411738" + } +} \ No newline at end of file diff --git a/subworkflows/local/starfusion_workflow/tests/nextflow.config b/subworkflows/local/starfusion_workflow/tests/nextflow.config new file mode 100644 index 00000000..718add69 --- /dev/null +++ b/subworkflows/local/starfusion_workflow/tests/nextflow.config @@ -0,0 +1,37 @@ +process { + withName: 'STAR_FOR_STARFUSION' { + ext.args = '--twopassMode Basic \ + --outReadsUnmapped None \ + --readFilesCommand zcat \ + --outSAMtype BAM SortedByCoordinate \ + --outSAMstrandField intronMotif \ + --outSAMunmapped Within \ + --chimSegmentMin 12 \ + --chimJunctionOverhangMin 8 \ + --chimOutJunctionFormat 1 \ + --alignSJDBoverhangMin 10 \ + --alignMatesGapMax 100000 \ + --alignIntronMax 100000 \ + --alignSJstitchMismatchNmax 5 -1 5 5 \ + --chimMultimapScoreRange 3 \ + --chimScoreJunctionNonGTAG -4 \ + --chimMultimapNmax 20 \ + --chimNonchimScoreDropMin 10 \ + --peOverlapNbasesMin 12 \ + --peOverlapMMp 0.1 \ + --alignInsertionFlush Right \ + --alignSplicedMateMapLminOverLmate 0 \ + --alignSplicedMateMapLmin 30 \ + --chimOutType Junctions \ + --quantMode GeneCounts' + } + + withName: 'SAMTOOLS_INDEX_FOR_STARFUSION_CRAM' { + ext.prefix = { "${meta.id}.star_for_starfusion.Aligned.sortedByCoord.out" } + } + + withName: 'SAMTOOLS_VIEW_FOR_STARFUSION' { + ext.args = { "--output-fmt cram" } + ext.prefix = { "${meta.id}.star_for_starfusion.Aligned.sortedByCoord.out" } + } +} diff --git a/subworkflows/local/stringtie_workflow.nf b/subworkflows/local/stringtie_workflow.nf new file mode 100644 index 00000000..d66ec0ee --- /dev/null +++ b/subworkflows/local/stringtie_workflow.nf @@ -0,0 +1,36 @@ +include { STRINGTIE_STRINGTIE } from '../../modules/nf-core/stringtie/stringtie/main' +include { STRINGTIE_MERGE } from '../../modules/nf-core/stringtie/merge/main' + + +workflow STRINGTIE_WORKFLOW { + take: + bam_sorted + ch_gtf + + main: + ch_versions = Channel.empty() + ch_stringtie_gtf = Channel.empty() + + if ((params.stringtie || params.all) && !params.fusioninspector_only) { + STRINGTIE_STRINGTIE(bam_sorted, ch_gtf.map { meta, gtf -> [ gtf ]}) + ch_versions = ch_versions.mix(STRINGTIE_STRINGTIE.out.versions) + + STRINGTIE_STRINGTIE + .out + .transcript_gtf + .map { it -> it[1] } + .set { stringtie_gtf } + ch_versions = ch_versions.mix(STRINGTIE_STRINGTIE.out.versions) + + + STRINGTIE_MERGE (stringtie_gtf, ch_gtf.map { meta, gtf -> [ gtf ]}) + ch_versions = ch_versions.mix(STRINGTIE_MERGE.out.versions) + ch_stringtie_gtf = STRINGTIE_MERGE.out.gtf + } + + emit: + stringtie_gtf = ch_stringtie_gtf.ifEmpty(null) + versions = ch_versions + + } + diff --git a/subworkflows/local/stringtie_workflow/main.nf b/subworkflows/local/stringtie_workflow/main.nf new file mode 100644 index 00000000..25ff26ee --- /dev/null +++ b/subworkflows/local/stringtie_workflow/main.nf @@ -0,0 +1,36 @@ +include { STRINGTIE_STRINGTIE } from '../../../modules/nf-core/stringtie/stringtie/main' +include { STRINGTIE_MERGE } from '../../../modules/nf-core/stringtie/merge/main' + + +workflow STRINGTIE_WORKFLOW { + take: + bam_sorted // channel: [meta, bam] + ch_chrgtf // channel: [meta, gtf] + + main: + ch_versions = Channel.empty() + ch_stringtie_gtf = Channel.empty() + + if ((params.stringtie || params.all) && !params.fusioninspector_only) { + STRINGTIE_STRINGTIE(bam_sorted, ch_chrgtf.map { meta, gtf -> [ gtf ]}) + ch_versions = ch_versions.mix(STRINGTIE_STRINGTIE.out.versions) + + STRINGTIE_STRINGTIE + .out + .transcript_gtf + .map { it -> it[1] } + .set { stringtie_gtf } + ch_versions = ch_versions.mix(STRINGTIE_STRINGTIE.out.versions) + + + STRINGTIE_MERGE (stringtie_gtf, ch_chrgtf.map { meta, gtf -> [ gtf ]}) + ch_versions = ch_versions.mix(STRINGTIE_MERGE.out.versions) + ch_stringtie_gtf = STRINGTIE_MERGE.out.gtf + } + + emit: + stringtie_gtf = ch_stringtie_gtf.ifEmpty(null) // channel: [meta, gtf] + versions = ch_versions // channel: [ path(versions.yml) ] + + } + diff --git a/subworkflows/local/stringtie_workflow/tests/main.nf.test b/subworkflows/local/stringtie_workflow/tests/main.nf.test new file mode 100644 index 00000000..da423ed5 --- /dev/null +++ b/subworkflows/local/stringtie_workflow/tests/main.nf.test @@ -0,0 +1,70 @@ +nextflow_workflow { + + name "Test STRINGTIE_WORKFLOW" + script "../main.nf" + workflow "STRINGTIE_WORKFLOW" + tag "subworkflows" + tag "subworkflows_nfcore" + tag "stringtie" + + test("Should run stringtie workflow with stringtie = true") { + + when { + workflow { + """ + input[0] = Channel.of([ + [ id:'test', strandedness:'reverse' ], // meta map + file("${params.test_data_base}/genomics/homo_sapiens/illumina/bam/test.paired_end.sorted.bam", checkIfExists: true) + ]) + input[1] = Channel.of([ + [ id:'test' ], // meta map + file("${params.test_data_base}/genomics/homo_sapiens/genome/genome.gtf", checkIfExists: true) + ]) + """ + } + params { + stringtie = true + outdir = "$outputDir" + test_data_base = "https://raw.githubusercontent.com/nf-core/test-datasets/modules/data" + } + } + + then { + assertAll( + { assert workflow.success }, + { assert snapshot(workflow.out).match() }, + ) + } + } + + test("Should run stringtie workflow with params.all = true") { + + when { + workflow { + """ + input[0] = Channel.of([ + [ id:'test', strandedness:'reverse' ], // meta map + file("${params.test_data_base}/genomics/homo_sapiens/illumina/bam/test.paired_end.sorted.bam", checkIfExists: true) + ]) + input[1] = Channel.of([ + [ id:'test' ], // meta map + file("${params.test_data_base}/genomics/homo_sapiens/genome/genome.gtf", checkIfExists: true) + ]) + """ + } + params { + all = true + outdir = "$outputDir" + test_data_base = "https://raw.githubusercontent.com/nf-core/test-datasets/modules/data" + } + } + + then { + assertAll( + { assert workflow.success }, + { assert snapshot(workflow.out).match() }, + ) + } + } + +} diff --git a/subworkflows/local/stringtie_workflow/tests/main.nf.test.snap b/subworkflows/local/stringtie_workflow/tests/main.nf.test.snap new file mode 100644 index 00000000..791c85e4 --- /dev/null +++ b/subworkflows/local/stringtie_workflow/tests/main.nf.test.snap @@ -0,0 +1,56 @@ +{ + "Should run stringtie workflow with params.all = true": { + "content": [ + { + "0": [ + "stringtie.merged.gtf:md5,6da479298d73d5b3216d4e1576a2bdf4" + ], + "1": [ + "versions.yml:md5,16bf76d2f56399280213cfa721195317", + "versions.yml:md5,16bf76d2f56399280213cfa721195317", + "versions.yml:md5,be2acf8efb04dc33562c9d00df7c3a50" + ], + "stringtie_gtf": [ + "stringtie.merged.gtf:md5,6da479298d73d5b3216d4e1576a2bdf4" + ], + "versions": [ + "versions.yml:md5,16bf76d2f56399280213cfa721195317", + "versions.yml:md5,16bf76d2f56399280213cfa721195317", + "versions.yml:md5,be2acf8efb04dc33562c9d00df7c3a50" + ] + } + ], + "meta": { + "nf-test": "0.9.0", + "nextflow": "24.10.1" + }, + "timestamp": "2024-11-27T17:51:10.73612885" + }, + "Should run stringtie workflow with stringtie = true": { + "content": [ + { + "0": [ + "stringtie.merged.gtf:md5,6da479298d73d5b3216d4e1576a2bdf4" + ], + "1": [ + "versions.yml:md5,16bf76d2f56399280213cfa721195317", + "versions.yml:md5,16bf76d2f56399280213cfa721195317", + "versions.yml:md5,be2acf8efb04dc33562c9d00df7c3a50" + ], + "stringtie_gtf": [ + "stringtie.merged.gtf:md5,6da479298d73d5b3216d4e1576a2bdf4" + ], + "versions": [ + "versions.yml:md5,16bf76d2f56399280213cfa721195317", + "versions.yml:md5,16bf76d2f56399280213cfa721195317", + "versions.yml:md5,be2acf8efb04dc33562c9d00df7c3a50" + ] + } + ], + "meta": { + "nf-test": "0.9.0", + "nextflow": "24.10.1" + }, + "timestamp": "2024-11-27T17:50:57.437102535" + } +} \ No newline at end of file diff --git a/subworkflows/local/trim_workflow/main.nf b/subworkflows/local/trim_workflow/main.nf new file mode 100644 index 00000000..d548c8ae --- /dev/null +++ b/subworkflows/local/trim_workflow/main.nf @@ -0,0 +1,46 @@ + + +include { FASTP } from '../../../modules/nf-core/fastp/main' +include { FASTQC as FASTQC_FOR_FASTP } from '../../../modules/nf-core/fastqc/main' + +workflow TRIM_WORKFLOW { + + take: + reads // channel [ meta, [ fastq files ] ] + adapter_fasta // channel [ path ] + fastp_trim // boolean + + main: + ch_versions = Channel.empty() + ch_fastp_html = Channel.empty() + ch_fastp_json = Channel.empty() + ch_fastqc_trimmed = Channel.empty() + + if ( fastp_trim ) { + FASTP(reads, adapter_fasta.ifEmpty( [] ), false, false, false) + ch_versions = ch_versions.mix(FASTP.out.versions) + + FASTQC_FOR_FASTP(FASTP.out.reads) + ch_versions = ch_versions.mix(FASTQC_FOR_FASTP.out.versions) + + ch_reads_all = FASTP.out.reads + ch_reads_fusioncatcher = ch_reads_all + ch_fastp_html = FASTP.out.html + ch_fastp_json = FASTP.out.json + ch_fastqc_trimmed = FASTQC_FOR_FASTP.out.zip + + } + else { + ch_reads_all = reads + ch_reads_fusioncatcher = reads + } + + emit: + ch_reads_all // Channel [ meta, [reads] ] + ch_reads_fusioncatcher // Channel [ meta, [reads] ] + ch_fastp_html // Channel [ meta, path_html ] + ch_fastp_json // Channel [ meta, path_json ] + ch_fastqc_trimmed // Channel [ meta, path_zip ] + versions = ch_versions // Channel [ versions ] + } + diff --git a/subworkflows/local/trim_workflow/tests/main.nf.test b/subworkflows/local/trim_workflow/tests/main.nf.test new file mode 100644 index 00000000..2f7568d6 --- /dev/null +++ b/subworkflows/local/trim_workflow/tests/main.nf.test @@ -0,0 +1,125 @@ +nextflow_workflow { + + name "Test Subworkflow QC_WORKFLOW" + script "../main.nf" + workflow "TRIM_WORKFLOW" + tag "qc" + tag "subworkflow" + tag "fastqc" + tag "fastp" + + test("TRIM_WORKFLOW - Homo sapiens - FASTQs - fastp == true") { + + when { + + workflow { + """ + // ch_reads + input[0] = Channel.of( + [ + [id: "test_fastq"], + [ + file("https://raw.githubusercontent.com/nf-core/test-datasets/refs/heads/modules/data/genomics/homo_sapiens/illumina/fastq/test_1.fastq.gz", checkIfExists: true), + file("https://raw.githubusercontent.com/nf-core/test-datasets/refs/heads/modules/data/genomics/homo_sapiens/illumina/fastq/test_2.fastq.gz", checkIfExists: true) + ] + ]) + input[1] = Channel.empty() + input[2] = true + """ + } + } + + then { + assertAll( + { assert workflow.success }, + { with(workflow.out) { + assert snapshot( + versions.collect{ file(it) }, + ch_reads_all[0][1].collect { file(it) }, + ch_reads_fusioncatcher[0][1].collect { file(it) }, + file(ch_fastp_html[0][1]).name, + file(ch_fastp_json[0][1]), + ch_fastqc_trimmed[0][1].collect { file(it).name } + ).match() } + } + ) + } + } + + test("TRIM_WORKFLOW - Homo sapiens - FASTQs - fastp == true + fasp_adaptors") { + + when { + + workflow { + """ + // ch_reads + input[0] = Channel.of( + [ + [id: "test_fastq"], + [ + file("https://raw.githubusercontent.com/nf-core/test-datasets/refs/heads/modules/data/genomics/homo_sapiens/illumina/fastq/test_1.fastq.gz", checkIfExists: true), + file("https://raw.githubusercontent.com/nf-core/test-datasets/refs/heads/modules/data/genomics/homo_sapiens/illumina/fastq/test_2.fastq.gz", checkIfExists: true) + ] + ]) + input[1] = Channel.fromPath('https://raw.githubusercontent.com/nf-core/test-datasets/refs/heads/modules/data/delete_me/fastp/adapters.fasta', checkIfExists: true) + input[2] = true + """ + } + } + + then { + assertAll( + { assert workflow.success }, + { with(workflow.out) { + assert snapshot( + versions.collect{ file(it) }, + ch_reads_all[0][1].collect { file(it) }, + ch_reads_fusioncatcher[0][1].collect { file(it) }, + file(ch_fastp_html[0][1]).name, + file(ch_fastp_json[0][1]), + ch_fastqc_trimmed[0][1].collect { file(it).name } + ).match() } + } + ) + } + } + + test("TRIM_WORKFLOW - Homo sapiens - FASTQs - fastp == false") { + + when { + + workflow { + """ + // ch_reads + input[0] = Channel.of( + [ + [id: "test_fastq"], + [ + file("https://raw.githubusercontent.com/nf-core/test-datasets/refs/heads/modules/data/genomics/homo_sapiens/illumina/fastq/test_1.fastq.gz", checkIfExists: true), + file("https://raw.githubusercontent.com/nf-core/test-datasets/refs/heads/modules/data/genomics/homo_sapiens/illumina/fastq/test_2.fastq.gz", checkIfExists: true) + ] + ]) + input[1] = Channel.empty() + input[2] = false + """ + } + } + + then { + assertAll( + { assert workflow.success }, + { with(workflow.out) { + assert snapshot( + versions.size() == 0, + ch_reads_all[0][1].size() == 2, + ch_reads_fusioncatcher[0][1].size() == 2, + ch_fastp_html.size() == 0, + ch_fastp_json.size() == 0, + ch_fastqc_trimmed.size() == 0 + ).match() } + } + ) + } + } + +} diff --git a/subworkflows/local/trim_workflow/tests/main.nf.test.snap b/subworkflows/local/trim_workflow/tests/main.nf.test.snap new file mode 100644 index 00000000..ae9f9e2e --- /dev/null +++ b/subworkflows/local/trim_workflow/tests/main.nf.test.snap @@ -0,0 +1,71 @@ +{ + "TRIM_WORKFLOW - Homo sapiens - FASTQs - fastp == true": { + "content": [ + [ + "versions.yml:md5,16187796d989b6260f572247e7dc0fc6", + "versions.yml:md5,ea42abe9875f41f8362a55ee7533f102" + ], + [ + "test_fastq_1.fastp.fastq.gz:md5,0c436583301dea48755a5252a2675b64", + "test_fastq_2.fastp.fastq.gz:md5,f7f38138255e63b33286b819b6177612" + ], + [ + "test_fastq_1.fastp.fastq.gz:md5,0c436583301dea48755a5252a2675b64", + "test_fastq_2.fastp.fastq.gz:md5,f7f38138255e63b33286b819b6177612" + ], + "test_fastq.fastp.html", + "test_fastq.fastp.json:md5,62066ad48c3d5981045cdd43e354cb2b", + [ + "test_fastq_trimmed_1_fastqc.zip", + "test_fastq_trimmed_2_fastqc.zip" + ] + ], + "meta": { + "nf-test": "0.9.0", + "nextflow": "24.10.2" + }, + "timestamp": "2024-12-05T16:21:52.926289296" + }, + "TRIM_WORKFLOW - Homo sapiens - FASTQs - fastp == false": { + "content": [ + true, + true, + true, + true, + true, + true + ], + "meta": { + "nf-test": "0.9.0", + "nextflow": "24.10.2" + }, + "timestamp": "2024-12-05T16:22:45.877168833" + }, + "TRIM_WORKFLOW - Homo sapiens - FASTQs - fastp == true + fasp_adaptors": { + "content": [ + [ + "versions.yml:md5,16187796d989b6260f572247e7dc0fc6", + "versions.yml:md5,ea42abe9875f41f8362a55ee7533f102" + ], + [ + "test_fastq_1.fastp.fastq.gz:md5,adc67a7b4d0bf3520866d7599a4ba814", + "test_fastq_2.fastp.fastq.gz:md5,9ee7d6c5230442970997477464255e67" + ], + [ + "test_fastq_1.fastp.fastq.gz:md5,adc67a7b4d0bf3520866d7599a4ba814", + "test_fastq_2.fastp.fastq.gz:md5,9ee7d6c5230442970997477464255e67" + ], + "test_fastq.fastp.html", + "test_fastq.fastp.json:md5,feb3483311bfa4ded60146f1cbc13fd5", + [ + "test_fastq_trimmed_1_fastqc.zip", + "test_fastq_trimmed_2_fastqc.zip" + ] + ], + "meta": { + "nf-test": "0.9.0", + "nextflow": "24.10.2" + }, + "timestamp": "2024-12-05T16:22:26.29488483" + } +} \ No newline at end of file diff --git a/subworkflows/local/utils_nfcore_rnafusion_pipeline/main.nf b/subworkflows/local/utils_nfcore_rnafusion_pipeline/main.nf index 9ec640a6..7e036228 100644 --- a/subworkflows/local/utils_nfcore_rnafusion_pipeline/main.nf +++ b/subworkflows/local/utils_nfcore_rnafusion_pipeline/main.nf @@ -2,6 +2,8 @@ // Subworkflow with functionality specific to the nf-core/rnafusion pipeline // +import groovy.json.JsonSlurper + /* ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ IMPORT FUNCTIONS / MODULES / SUBWORKFLOWS @@ -28,10 +30,8 @@ workflow PIPELINE_INITIALISATION { take: version // boolean: Display version and exit validate_params // boolean: Boolean whether to validate parameters against the schema at runtime - monochrome_logs // boolean: Do not use coloured log outputs nextflow_cli_args // array: List of positional nextflow CLI args outdir // string: The output directory where the results will be saved - input // string: Path to input samplesheet main: @@ -75,11 +75,11 @@ workflow PIPELINE_INITIALISATION { Channel .fromList(samplesheetToList(params.input, "${projectDir}/assets/schema_input.json")) .map { - meta, fastq_1, fastq_2 -> + meta, fastq_1, fastq_2, strandedness -> if (!fastq_2) { - return [ meta.id, meta + [ single_end:true ], [ fastq_1 ] ] + return [ meta.id, meta + [ single_end:true ], [ fastq_1 ], strandedness ] } else { - return [ meta.id, meta + [ single_end:false ], [ fastq_1, fastq_2 ] ] + return [ meta.id, meta + [ single_end:false ], [ fastq_1, fastq_2 ], strandedness ] } } .groupTuple() @@ -130,7 +130,7 @@ workflow PIPELINE_COMPLETION { plaintext_email, outdir, monochrome_logs, - multiqc_reports.getVal(), + multiqc_reports.getVal() ) } @@ -150,11 +150,21 @@ workflow PIPELINE_COMPLETION { FUNCTIONS ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ */ + // // Check and validate pipeline parameters // def validateInputParameters() { genomeExistsError() + + if (params.no_cosmic) { + log.warn("Skipping COSMIC DB download from `FUSIONREPORT_DOWNLOAD` and skip using it in `FUSIONREPORT`") + } + + if (params.starfusion_build && !params.fusion_annot_lib) { + error("No fusion annotation library provided. `STARFUSION_BUILD` is unable to run.") + } + } // @@ -163,6 +173,12 @@ def validateInputParameters() { def validateInputSamplesheet(input) { def (metas, fastqs) = input[1..2] + // Check that multiple runs of the same sample are of the same strandedness + def strandedness_ok = metas.collect{ it.strandedness }.unique().size == 1 + if (!strandedness_ok) { + error("Please check input samplesheet -> Multiple runs of a sample must have the same strandedness!: ${metas[0].id}") + } + // Check that multiple runs of the same sample are of the same datatype i.e. single-end / paired-end def endedness_ok = metas.collect{ meta -> meta.single_end }.unique().size == 1 if (!endedness_ok) { @@ -171,6 +187,7 @@ def validateInputSamplesheet(input) { return [ metas[0], fastqs ] } + // // Get attribute from genome config file e.g. fasta // @@ -200,7 +217,6 @@ def genomeExistsError() { // Generate methods description for MultiQC // def toolCitationText() { - // TODO nf-core: Optionally add in-text citation tools to this list. // Can use ternary operators to dynamically construct based conditions, e.g. params["run_xyz"] ? "Tool (Foo et al. 2023)" : "", // Uncomment function in methodsDescriptionText to render in MultiQC report def citation_text = [ @@ -214,7 +230,6 @@ def toolCitationText() { } def toolBibliographyText() { - // TODO nf-core: Optionally add bibliographic entries to this list. // Can use ternary operators to dynamically construct based conditions, e.g. params["run_xyz"] ? "
  • Author (2023) Pub name, Journal, DOI
  • " : "", // Uncomment function in methodsDescriptionText to render in MultiQC report def reference_text = [ @@ -249,11 +264,10 @@ def methodsDescriptionText(mqc_methods_yaml) { meta["tool_citations"] = "" meta["tool_bibliography"] = "" - // TODO nf-core: Only uncomment below if logic in toolCitationText/toolBibliographyText has been filled! + // nf-core: Only uncomment below if logic in toolCitationText/toolBibliographyText has been filled! // meta["tool_citations"] = toolCitationText().replaceAll(", \\.", ".").replaceAll("\\. \\.", ".").replaceAll(", \\.", ".") // meta["tool_bibliography"] = toolBibliographyText() - def methods_text = mqc_methods_yaml.text def engine = new groovy.text.SimpleTemplateEngine() @@ -262,3 +276,24 @@ def methodsDescriptionText(mqc_methods_yaml) { return description_html.toString() } +// +// Function to generate an error if contigs in genome fasta file > 512 Mbp +// +def checkMaxContigSize(fai_file) { + def max_size = 512000000 + fai_file.eachLine { line -> + def lspl = line.split('\t') + def chrom = lspl[0] + def size = lspl[1] + if (size.toInteger() > max_size) { + def error_string = "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n" + + " Contig longer than ${max_size}bp found in reference genome!\n\n" + + " ${chrom}: ${size}\n\n" + + " Provide the '--bam_csi_index' parameter to use a CSI instead of BAI index.\n\n" + + " Please see:\n" + + " https://github.com/nf-core/rnaseq/issues/744\n" + + "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~" + error(error_string) + } + } +} diff --git a/tests/.nftignore b/tests/.nftignore new file mode 100644 index 00000000..cb651aed --- /dev/null +++ b/tests/.nftignore @@ -0,0 +1,37 @@ +pipeline_info/*.{html,json,txt,yml} +references/arriba/blacklist_hg38_GRCh38_v2.4.0.tsv.gz +references/arriba/known_fusions_hg38_GRCh38_v2.4.0.tsv.gz +references/ensembl/Homo_sapiens.GRCh38.102.dna.primary_assembly.fa +references/hgnc/HGNC-DB-timestamp.txt +references/hgnc/hgnc_complete_set.txt +references/fusion_report_db/DB-timestamp.txt +references/fusion_report_db/fusiongdb2.db +references/fusion_report_db/mitelman.db +references/salmon/salmon/ctable.bin +references/salmon/salmon/pos.bin +references/salmon/salmon/pre_indexing.log +references/salmon/salmon/ref_indexing.log +references/salmon/salmon/seq.bin +references/star/Log.out +starfusion/ctat_genome_lib_build_dir/ref_genome.fa.star.idx/ +starfusion/ctat_genome_lib_build_dir/PFAM.domtblout.dat.gz +starfusion/ctat_genome_lib_build_dir/blast_pairs.dat.gz +starfusion/ctat_genome_lib_build_dir/blast_pairs.idx +starfusion/ctat_genome_lib_build_dir/fusion_annot_lib.idx +starfusion/ctat_genome_lib_build_dir/pfam_domains.dbm +starfusion/ctat_genome_lib_build_dir/ref_annot.cdna.fa +starfusion/ctat_genome_lib_build_dir/ref_annot.cdna.fa.idx +starfusion/ctat_genome_lib_build_dir/ref_annot.cds +starfusion/ctat_genome_lib_build_dir/ref_annot.cdsplus.fa +starfusion/ctat_genome_lib_build_dir/ref_annot.cdsplus.fa.idx +starfusion/ctat_genome_lib_build_dir/ref_annot.gtf.gene_spans +starfusion/ctat_genome_lib_build_dir/ref_annot.pep +starfusion/ctat_genome_lib_build_dir/ref_annot.prot_info.dbm +starfusion/ctat_genome_lib_build_dir/ref_genome.fa.nin +starfusion/ctat_genome_lib_build_dir/ref_genome.fa.njs +starfusion/ctat_genome_lib_build_dir/ref_genome.fa.star.idx/Log.out +starfusion/ctat_genome_lib_build_dir/trans.blast.align_coords.align_coords.dbm +references/star/genomeParameters.txt +starfusion/ctat_genome_lib_build_dir/ref_genome.fa.star.idx/genomeParameters.txt +references/fusion_report_db/mitelman.db +references/fusion_report_db/fusiongdb2.db diff --git a/tests/nextflow.config b/tests/nextflow.config new file mode 100644 index 00000000..abb1ba9c --- /dev/null +++ b/tests/nextflow.config @@ -0,0 +1,8 @@ +// Limit and standardize resources for github actions and reproducibility +process { + resourceLimits = [ + cpus: 4, + memory: '15.GB', + time: '4.h' + ] +} diff --git a/tests/test_build.nf.test b/tests/test_build.nf.test new file mode 100644 index 00000000..f6665777 --- /dev/null +++ b/tests/test_build.nf.test @@ -0,0 +1,40 @@ +nextflow_pipeline { + + name "Test pipeline" + script "../main.nf" + profile "test_build" + tag "pipeline" + tag "pipeline_rnafusion" + tag "test_build" + + test("Test build references") { + + when { + params { + outdir = "$outputDir" + } + } + + then { + // stable_name: All files + folders in ${params.outdir}/ with a stable name + // Ignore files with timestamps in their names + def stable_name = getAllFilesFromDir(params.outdir, relative: true, includeDir: true, ignore: ['pipeline_info/*.{html,json,txt}']) + // stable_path: All files in ${params.outdir}/ with stable content + def stable_path = getAllFilesFromDir(params.outdir, ignoreFile: 'tests/.nftignore') + assertAll( + { assert workflow.success}, + { assert snapshot( + // Number of successful tasks + workflow.trace.succeeded().size(), + // pipeline versions.yml file for multiqc from which Nextflow version is removed because we tests pipelines on multiple Nextflow versions + removeNextflowVersion("$outputDir/pipeline_info/nf_core_rnafusion_software_mqc_versions.yml"), + // All stable path name, with a relative path + stable_name, + // All files with stable contents + stable_path + ).match() } + ) + } + } + +} diff --git a/tests/test_build.nf.test.snap b/tests/test_build.nf.test.snap new file mode 100644 index 00000000..30d80c73 --- /dev/null +++ b/tests/test_build.nf.test.snap @@ -0,0 +1,284 @@ +{ + "Test build references": { + "content": [ + 12, + { + "ARRIBA_DOWNLOAD": { + "arriba_download": "2.4.0" + }, + "FUSIONREPORT_DOWNLOAD": { + "fusion_report": "2.1.5" + }, + "GATK4_BEDTOINTERVALLIST": { + "gatk4": "4.6.1.0" + }, + "GATK4_CREATESEQUENCEDICTIONARY": { + "gatk4": "4.6.1.0" + }, + "GET_RRNA_TRANSCRIPTS": { + "get_rrna_transcripts": "v1.0" + }, + "GFFREAD": { + "gffread": "0.12.7" + }, + "GTF_TO_REFFLAT": { + "gtfToGenePred": 377 + }, + "HGNC_DOWNLOAD": { + "wget": null + }, + "SALMON_INDEX": { + "salmon": "1.10.3" + }, + "SAMTOOLS_FAIDX": { + "samtools": 1.21 + }, + "STARFUSION_BUILD": { + "STAR-Fusion": "1.14.0" + }, + "STAR_GENOMEGENERATE": { + "star": "2.7.11b", + "samtools": 1.2, + "gawk": "5.1.0" + }, + "Workflow": { + "nf-core/rnafusion": "v4.0.0dev" + } + }, + [ + "gatk4", + "gatk4/minigenome.dict", + "get", + "get/rrna.bed", + "get/rrna.gtf", + "pipeline_info", + "pipeline_info/nf_core_rnafusion_software_mqc_versions.yml", + "references", + "references/arriba", + "references/arriba/blacklist_hg38_GRCh38_v2.4.0.tsv.gz", + "references/arriba/cytobands_hg38_GRCh38_v2.4.0.tsv", + "references/arriba/known_fusions_hg38_GRCh38_v2.4.0.tsv.gz", + "references/arriba/protein_domains_hg38_GRCh38_v2.4.0.gff3", + "references/fusion_report_db", + "references/fusion_report_db/DB-timestamp.txt", + "references/fusion_report_db/fusion_report.log", + "references/fusion_report_db/fusiongdb2.db", + "references/fusion_report_db/mitelman.db", + "references/gencode", + "references/gencode/minigenome.fa.fai", + "references/gencode/minigenome.gtf.interval_list", + "references/gencode/minigenome.gtf.refflat", + "references/gffread", + "references/gffread/minigenome.gtf.fasta", + "references/hgnc", + "references/hgnc/HGNC-DB-timestamp.txt", + "references/hgnc/hgnc_complete_set.txt", + "references/salmon", + "references/salmon/salmon", + "references/salmon/salmon/complete_ref_lens.bin", + "references/salmon/salmon/ctable.bin", + "references/salmon/salmon/ctg_offsets.bin", + "references/salmon/salmon/duplicate_clusters.tsv", + "references/salmon/salmon/info.json", + "references/salmon/salmon/mphf.bin", + "references/salmon/salmon/pos.bin", + "references/salmon/salmon/pre_indexing.log", + "references/salmon/salmon/rank.bin", + "references/salmon/salmon/refAccumLengths.bin", + "references/salmon/salmon/ref_indexing.log", + "references/salmon/salmon/reflengths.bin", + "references/salmon/salmon/refseq.bin", + "references/salmon/salmon/seq.bin", + "references/salmon/salmon/versionInfo.json", + "references/star", + "references/star/Genome", + "references/star/Log.out", + "references/star/SA", + "references/star/SAindex", + "references/star/chrLength.txt", + "references/star/chrName.txt", + "references/star/chrNameLength.txt", + "references/star/chrStart.txt", + "references/star/exonGeTrInfo.tab", + "references/star/exonInfo.tab", + "references/star/geneInfo.tab", + "references/star/genomeParameters.txt", + "references/star/sjdbInfo.txt", + "references/star/sjdbList.fromGTF.out.tab", + "references/star/sjdbList.out.tab", + "references/star/transcriptInfo.tab", + "starfusion", + "starfusion/ctat_genome_lib_build_dir", + "starfusion/ctat_genome_lib_build_dir/AnnotFilterRule.pm", + "starfusion/ctat_genome_lib_build_dir/PFAM.domtblout.dat.gz", + "starfusion/ctat_genome_lib_build_dir/__chkpts", + "starfusion/ctat_genome_lib_build_dir/__chkpts/_fusion_annot_lib.idx.ok", + "starfusion/ctat_genome_lib_build_dir/__chkpts/_prot_info_db.ok", + "starfusion/ctat_genome_lib_build_dir/__chkpts/annotfiltrule_cp.ok", + "starfusion/ctat_genome_lib_build_dir/__chkpts/blast_pairs.idx.ok", + "starfusion/ctat_genome_lib_build_dir/__chkpts/cp_gene_blast_pairs.ok", + "starfusion/ctat_genome_lib_build_dir/__chkpts/cp_pfam_dat.ok", + "starfusion/ctat_genome_lib_build_dir/__chkpts/cp_ref_annot_cdna.ok", + "starfusion/ctat_genome_lib_build_dir/__chkpts/fusion_annot_lib.cp.ok", + "starfusion/ctat_genome_lib_build_dir/__chkpts/index_pfam_hits.ok", + "starfusion/ctat_genome_lib_build_dir/__chkpts/index_ref_annot_cdna.ok", + "starfusion/ctat_genome_lib_build_dir/__chkpts/makeblastdb.ok", + "starfusion/ctat_genome_lib_build_dir/__chkpts/mm2.splice_bed.ok", + "starfusion/ctat_genome_lib_build_dir/__chkpts/mm2_genome_idx.ok", + "starfusion/ctat_genome_lib_build_dir/__chkpts/ref_annot.cdsplus.dfam_masked.fa.cp.ok", + "starfusion/ctat_genome_lib_build_dir/__chkpts/ref_annot.cdsplus.dfam_masked.fa.idx.ok", + "starfusion/ctat_genome_lib_build_dir/__chkpts/ref_annot.gtf.gene_spans.ok", + "starfusion/ctat_genome_lib_build_dir/__chkpts/ref_annot.gtf.mini.sortu.ok", + "starfusion/ctat_genome_lib_build_dir/__chkpts/ref_annot.gtf.ok", + "starfusion/ctat_genome_lib_build_dir/__chkpts/ref_genome.fa.ok", + "starfusion/ctat_genome_lib_build_dir/__chkpts/ref_genome_fai.ok", + "starfusion/ctat_genome_lib_build_dir/__chkpts/trans.blast.dat.cp.ok", + "starfusion/ctat_genome_lib_build_dir/__chkpts/trans.blast.dat.index.ok", + "starfusion/ctat_genome_lib_build_dir/__chkpts/validate_ctat_genome_lib.ok", + "starfusion/ctat_genome_lib_build_dir/blast_pairs.dat.gz", + "starfusion/ctat_genome_lib_build_dir/blast_pairs.idx", + "starfusion/ctat_genome_lib_build_dir/fusion_annot_lib.gz", + "starfusion/ctat_genome_lib_build_dir/fusion_annot_lib.idx", + "starfusion/ctat_genome_lib_build_dir/pfam_domains.dbm", + "starfusion/ctat_genome_lib_build_dir/ref_annot.cdna.fa", + "starfusion/ctat_genome_lib_build_dir/ref_annot.cdna.fa.idx", + "starfusion/ctat_genome_lib_build_dir/ref_annot.cds", + "starfusion/ctat_genome_lib_build_dir/ref_annot.cdsplus.fa", + "starfusion/ctat_genome_lib_build_dir/ref_annot.cdsplus.fa.idx", + "starfusion/ctat_genome_lib_build_dir/ref_annot.gtf", + "starfusion/ctat_genome_lib_build_dir/ref_annot.gtf.gene_spans", + "starfusion/ctat_genome_lib_build_dir/ref_annot.gtf.mini.sortu", + "starfusion/ctat_genome_lib_build_dir/ref_annot.gtf.mm2.splice.bed", + "starfusion/ctat_genome_lib_build_dir/ref_annot.pep", + "starfusion/ctat_genome_lib_build_dir/ref_annot.prot_info.dbm", + "starfusion/ctat_genome_lib_build_dir/ref_genome.fa", + "starfusion/ctat_genome_lib_build_dir/ref_genome.fa.fai", + "starfusion/ctat_genome_lib_build_dir/ref_genome.fa.mm2", + "starfusion/ctat_genome_lib_build_dir/ref_genome.fa.ndb", + "starfusion/ctat_genome_lib_build_dir/ref_genome.fa.nhr", + "starfusion/ctat_genome_lib_build_dir/ref_genome.fa.nin", + "starfusion/ctat_genome_lib_build_dir/ref_genome.fa.njs", + "starfusion/ctat_genome_lib_build_dir/ref_genome.fa.not", + "starfusion/ctat_genome_lib_build_dir/ref_genome.fa.nsq", + "starfusion/ctat_genome_lib_build_dir/ref_genome.fa.ntf", + "starfusion/ctat_genome_lib_build_dir/ref_genome.fa.nto", + "starfusion/ctat_genome_lib_build_dir/ref_genome.fa.star.idx", + "starfusion/ctat_genome_lib_build_dir/ref_genome.fa.star.idx/Genome", + "starfusion/ctat_genome_lib_build_dir/ref_genome.fa.star.idx/Log.out", + "starfusion/ctat_genome_lib_build_dir/ref_genome.fa.star.idx/SA", + "starfusion/ctat_genome_lib_build_dir/ref_genome.fa.star.idx/SAindex", + "starfusion/ctat_genome_lib_build_dir/ref_genome.fa.star.idx/build.ok", + "starfusion/ctat_genome_lib_build_dir/ref_genome.fa.star.idx/chrLength.txt", + "starfusion/ctat_genome_lib_build_dir/ref_genome.fa.star.idx/chrName.txt", + "starfusion/ctat_genome_lib_build_dir/ref_genome.fa.star.idx/chrNameLength.txt", + "starfusion/ctat_genome_lib_build_dir/ref_genome.fa.star.idx/chrStart.txt", + "starfusion/ctat_genome_lib_build_dir/ref_genome.fa.star.idx/exonGeTrInfo.tab", + "starfusion/ctat_genome_lib_build_dir/ref_genome.fa.star.idx/exonInfo.tab", + "starfusion/ctat_genome_lib_build_dir/ref_genome.fa.star.idx/geneInfo.tab", + "starfusion/ctat_genome_lib_build_dir/ref_genome.fa.star.idx/genomeParameters.txt", + "starfusion/ctat_genome_lib_build_dir/ref_genome.fa.star.idx/sjdbInfo.txt", + "starfusion/ctat_genome_lib_build_dir/ref_genome.fa.star.idx/sjdbList.fromGTF.out.tab", + "starfusion/ctat_genome_lib_build_dir/ref_genome.fa.star.idx/sjdbList.out.tab", + "starfusion/ctat_genome_lib_build_dir/ref_genome.fa.star.idx/transcriptInfo.tab", + "starfusion/ctat_genome_lib_build_dir/trans.blast.align_coords.align_coords.dat", + "starfusion/ctat_genome_lib_build_dir/trans.blast.align_coords.align_coords.dbm", + "starfusion/ctat_genome_lib_build_dir/trans.blast.dat.gz" + ], + [ + "minigenome.dict:md5,f2dfb8df1d1f860050fc0dba1399fd9e", + "rrna.bed:md5,d41d8cd98f00b204e9800998ecf8427e", + "rrna.gtf:md5,d41d8cd98f00b204e9800998ecf8427e", + "cytobands_hg38_GRCh38_v2.4.0.tsv:md5,7bd504feefb33fcfc9be0517439a423c", + "protein_domains_hg38_GRCh38_v2.4.0.gff3:md5,43c387a784ebeed71b4147076cebf978", + "fusion_report.log:md5,d41d8cd98f00b204e9800998ecf8427e", + "minigenome.fa.fai:md5,e3f74a27219b33ae80fd5de5cbeaf32b", + "minigenome.gtf.interval_list:md5,9111fe876b17e5c18314206e9f985b8c", + "minigenome.gtf.refflat:md5,ed76bc063c98087f1100a928923758c7", + "minigenome.gtf.fasta:md5,349d42b5dbd73e163cdbad3453d8cd3a", + "complete_ref_lens.bin:md5,3b6d71ddb3eb8239be6623ab350a636d", + "ctg_offsets.bin:md5,174ead59c00c5264300d3ff1de673074", + "duplicate_clusters.tsv:md5,d30ab58d133298ed9ba51f1ba20ce89c", + "info.json:md5,d91d8a954afcb4be9fe7bfb3e84b5be6", + "mphf.bin:md5,b364192c6c73de4c38ec2ed0750f5155", + "rank.bin:md5,3fe9c2dc9da7faff6362510c4f29e87d", + "refAccumLengths.bin:md5,26d9405e7591ec9a606caa36a094941a", + "reflengths.bin:md5,187a4a0dee316c5dbf440fa3c58d9eec", + "refseq.bin:md5,712c4a8d988c15016ea746db7022e549", + "versionInfo.json:md5,d2c799050e81aa6e282ac8a73e773941", + "Genome:md5,1621486e3b854fefa37c0b4e77275233", + "SA:md5,9b6b936f3a8a1232c4fc5533daf9401d", + "SAindex:md5,8b2025227b78fdc83e3e5b17c0f538f7", + "chrLength.txt:md5,e02cd536b7281b894246863b160d5d06", + "chrName.txt:md5,f4d0d6595f423084e6b9472e40dfe6e8", + "chrNameLength.txt:md5,07a67d7ac441d7d30d80840b0927e717", + "chrStart.txt:md5,e2031239a74fe5ee9051e9364e4f608a", + "exonGeTrInfo.tab:md5,3c35618d07a8e35a0f9108699fcdda42", + "exonInfo.tab:md5,bcbb3f32fa31fe504cc737f337ad341c", + "geneInfo.tab:md5,db5db4b6e003904e9908fce7c05f0125", + "sjdbInfo.txt:md5,f044cb1cd38af66c832f3afc4b1655cb", + "sjdbList.fromGTF.out.tab:md5,8f3e8604b00d4067e4eb80aa476a8113", + "sjdbList.out.tab:md5,5d78dd49d5db24ca2c056b7ebe5c2059", + "transcriptInfo.tab:md5,b758c0ccaddcf0453bab5905b3cec4a2", + "AnnotFilterRule.pm:md5,5391fcc58d9c71cd1f0e45668c5ec597", + "_fusion_annot_lib.idx.ok:md5,d41d8cd98f00b204e9800998ecf8427e", + "_prot_info_db.ok:md5,d41d8cd98f00b204e9800998ecf8427e", + "annotfiltrule_cp.ok:md5,d41d8cd98f00b204e9800998ecf8427e", + "blast_pairs.idx.ok:md5,d41d8cd98f00b204e9800998ecf8427e", + "cp_gene_blast_pairs.ok:md5,d41d8cd98f00b204e9800998ecf8427e", + "cp_pfam_dat.ok:md5,d41d8cd98f00b204e9800998ecf8427e", + "cp_ref_annot_cdna.ok:md5,d41d8cd98f00b204e9800998ecf8427e", + "fusion_annot_lib.cp.ok:md5,d41d8cd98f00b204e9800998ecf8427e", + "index_pfam_hits.ok:md5,d41d8cd98f00b204e9800998ecf8427e", + "index_ref_annot_cdna.ok:md5,d41d8cd98f00b204e9800998ecf8427e", + "makeblastdb.ok:md5,d41d8cd98f00b204e9800998ecf8427e", + "mm2.splice_bed.ok:md5,d41d8cd98f00b204e9800998ecf8427e", + "mm2_genome_idx.ok:md5,d41d8cd98f00b204e9800998ecf8427e", + "ref_annot.cdsplus.dfam_masked.fa.cp.ok:md5,d41d8cd98f00b204e9800998ecf8427e", + "ref_annot.cdsplus.dfam_masked.fa.idx.ok:md5,d41d8cd98f00b204e9800998ecf8427e", + "ref_annot.gtf.gene_spans.ok:md5,d41d8cd98f00b204e9800998ecf8427e", + "ref_annot.gtf.mini.sortu.ok:md5,d41d8cd98f00b204e9800998ecf8427e", + "ref_annot.gtf.ok:md5,d41d8cd98f00b204e9800998ecf8427e", + "ref_genome.fa.ok:md5,d41d8cd98f00b204e9800998ecf8427e", + "ref_genome_fai.ok:md5,d41d8cd98f00b204e9800998ecf8427e", + "trans.blast.dat.cp.ok:md5,d41d8cd98f00b204e9800998ecf8427e", + "trans.blast.dat.index.ok:md5,d41d8cd98f00b204e9800998ecf8427e", + "validate_ctat_genome_lib.ok:md5,d41d8cd98f00b204e9800998ecf8427e", + "fusion_annot_lib.gz:md5,23d82a5da81f91ca4e1ecd6481992a12", + "ref_annot.gtf:md5,5ce8afe99ef3940a877a04caeacf9181", + "ref_annot.gtf.mini.sortu:md5,1d29ccecdbb7b40a99c84a02d6c2c1be", + "ref_annot.gtf.mm2.splice.bed:md5,340585ea1843bf06bf555575ddecf28c", + "ref_genome.fa:md5,ad699c56ed38566c7d3e9579486b1706", + "ref_genome.fa.fai:md5,e3f74a27219b33ae80fd5de5cbeaf32b", + "ref_genome.fa.mm2:md5,ce50979ea284748eb9f84ae88cfd930e", + "ref_genome.fa.ndb:md5,6ea574753b557610f62f6e4ab79e19f5", + "ref_genome.fa.nhr:md5,50f28dae71683c4394bfaf94a1ef4392", + "ref_genome.fa.not:md5,1e53e9d08f1d23af0299cfa87478a7bb", + "ref_genome.fa.nsq:md5,d2361e7871ce4cf51181c112a48f191b", + "ref_genome.fa.ntf:md5,de1250813f0c7affc6d12dac9d0fb6bb", + "ref_genome.fa.nto:md5,33cdeccccebe80329f1fdbee7f5874cb", + "Genome:md5,9e3efdd0901cabb5a2d589664a63b372", + "SA:md5,7dd9083264be9c6a2194d990bc10d237", + "SAindex:md5,ac4711df685109e04356db9e9cb9fb7f", + "build.ok:md5,d41d8cd98f00b204e9800998ecf8427e", + "chrLength.txt:md5,e02cd536b7281b894246863b160d5d06", + "chrName.txt:md5,f4d0d6595f423084e6b9472e40dfe6e8", + "chrNameLength.txt:md5,07a67d7ac441d7d30d80840b0927e717", + "chrStart.txt:md5,e2031239a74fe5ee9051e9364e4f608a", + "exonGeTrInfo.tab:md5,3c35618d07a8e35a0f9108699fcdda42", + "exonInfo.tab:md5,bcbb3f32fa31fe504cc737f337ad341c", + "geneInfo.tab:md5,db5db4b6e003904e9908fce7c05f0125", + "sjdbInfo.txt:md5,e4cc1bbf8bd687cfc3d7c2c702e6def7", + "sjdbList.fromGTF.out.tab:md5,8f3e8604b00d4067e4eb80aa476a8113", + "sjdbList.out.tab:md5,5d78dd49d5db24ca2c056b7ebe5c2059", + "transcriptInfo.tab:md5,b758c0ccaddcf0453bab5905b3cec4a2", + "trans.blast.align_coords.align_coords.dat:md5,9f6b7a75aea03a9671190be25ecdd4c2", + "trans.blast.dat.gz:md5,85ba5ea96c566f751ad83a3e4b8ab128" + ] + ], + "meta": { + "nf-test": "0.9.2", + "nextflow": "24.10.3" + }, + "timestamp": "2025-01-14T13:08:24.564679638" + } +} \ No newline at end of file diff --git a/tests/test_cosmic.nf.test b/tests/test_cosmic.nf.test new file mode 100644 index 00000000..b4f2a363 --- /dev/null +++ b/tests/test_cosmic.nf.test @@ -0,0 +1,101 @@ +nextflow_pipeline { + + name "Test pipeline" + script "../main.nf" + profile "test_cosmic" + tag "pipeline" + tag "pipeline_rnafusion" + tag "test_cosmic" + + test("test cosmic with fastp trim") { + + when { + params { + outdir = "$outputDir" + fastp_trim = true + } + } + + then { + // stable_name: All files + folders in ${params.outdir}/ with a stable name + // Ignore files with timestamps in their names + def stable_name = getAllFilesFromDir(params.outdir, relative: true, includeDir: true, ignore: ['pipeline_info/*.{html,json,txt}']) + // stable_path: All files in ${params.outdir}/ with stable content + def stable_path = getAllFilesFromDir(params.outdir, ignoreFile: 'tests/.nftignore') + assertAll( + { assert workflow.success}, + { assert snapshot( + // Number of successful tasks + workflow.trace.succeeded().size(), + // pipeline versions.yml file for multiqc from which Nextflow version is removed because we tests pipelines on multiple Nextflow versions + removeNextflowVersion("$outputDir/pipeline_info/nf_core_rnafusion_software_mqc_versions.yml"), + // All stable path name, with a relative path + stable_name, + // All files with stable contents + stable_path + ).match() } + ) + } + } + + test("test cosmic no fastp trim") { + + when { + params { + outdir = "$outputDir" + } + } + + then { + // stable_name: All files + folders in ${params.outdir}/ with a stable name + // Ignore files with timestamps in their names + def stable_name = getAllFilesFromDir(params.outdir, relative: true, includeDir: true, ignore: ['pipeline_info/*.{html,json,txt}']) + // stable_path: All files in ${params.outdir}/ with stable content + def stable_path = getAllFilesFromDir(params.outdir, ignoreFile: 'tests/.nftignore') + assertAll( + { assert workflow.success}, + { assert snapshot( + // Number of successful tasks + workflow.trace.succeeded().size(), + // pipeline versions.yml file for multiqc from which Nextflow version is removed because we tests pipelines on multiple Nextflow versions + removeNextflowVersion("$outputDir/pipeline_info/nf_core_rnafusion_software_mqc_versions.yml"), + // All stable path name, with a relative path + stable_name, + // All files with stable contents + stable_path + ).match() } + ) + } + } + + test("test cosmic no fastp trim build") { + + when { + params { + outdir = "$outputDir" + build_references = true + } + } + + then { + // stable_name: All files + folders in ${params.outdir}/ with a stable name + // Ignore files with timestamps in their names + def stable_name = getAllFilesFromDir(params.outdir, relative: true, includeDir: true, ignore: ['pipeline_info/*.{html,json,txt}']) + // stable_path: All files in ${params.outdir}/ with stable content + def stable_path = getAllFilesFromDir(params.outdir, ignoreFile: 'tests/.nftignore') + assertAll( + { assert workflow.success}, + { assert snapshot( + // Number of successful tasks + workflow.trace.succeeded().size(), + // pipeline versions.yml file for multiqc from which Nextflow version is removed because we tests pipelines on multiple Nextflow versions + removeNextflowVersion("$outputDir/pipeline_info/nf_core_rnafusion_software_mqc_versions.yml"), + // All stable path name, with a relative path + stable_name, + // All files with stable contents + stable_path + ).match() } + ) + } + } +} diff --git a/tests/test_cosmic.nf.test.snap b/tests/test_cosmic.nf.test.snap new file mode 100644 index 00000000..c920e032 --- /dev/null +++ b/tests/test_cosmic.nf.test.snap @@ -0,0 +1,58 @@ +{ + "test cosmic no fastp trim build": { + "content": [ + 0, + { + "Workflow": { + "nf-core/rnafusion": "v4.0.0dev" + } + }, + [ + "pipeline_info", + "pipeline_info/nf_core_rnafusion_software_mqc_versions.yml" + ], + [ + + ] + ], + "meta": { + "nf-test": "0.9.0", + "nextflow": "24.10.2" + }, + "timestamp": "2024-12-10T12:42:49.558429" + }, + "test cosmic no fastp trim": { + "content": [ + 0, + null, + [ + "pipeline_info" + ], + [ + + ] + ], + "meta": { + "nf-test": "0.9.0", + "nextflow": "24.10.2" + }, + "timestamp": "2024-12-10T15:09:53.130295" + }, + "test cosmic with fastp trim": { + "content": [ + 0, + null, + [ + "pipeline_info" + ], + [ + + ] + ], + "meta": { + "nf-test": "0.9.0", + "nextflow": "24.10.2" + }, + "timestamp": "2024-12-10T15:09:29.277399" + } +} diff --git a/tests/test_stub.nf.test b/tests/test_stub.nf.test new file mode 100644 index 00000000..065723e2 --- /dev/null +++ b/tests/test_stub.nf.test @@ -0,0 +1,73 @@ +nextflow_pipeline { + + name "Test pipeline" + script "../main.nf" + profile "test" + options "-stub" // TODO remove once reference files are available + tag "pipeline" + tag "pipeline_rnafusion" + tag "test_stub" + + test("stub test with fastp trim") { + + when { + params { + outdir = "$outputDir" + fastp_trim = true + fusion_annot_lib = 'https://github.com/STAR-Fusion/STAR-Fusion-Tutorial/raw/master/CTAT_HumanFusionLib.mini.dat.gz' + } + } + + then { + // stable_name: All files + folders in ${params.outdir}/ with a stable name + // Ignore files with timestamps in their names + def stable_name = getAllFilesFromDir(params.outdir, relative: true, includeDir: true, ignore: ['pipeline_info/*.{html,json,txt}']) + // stable_path: All files in ${params.outdir}/ with stable content + // def stable_path = getAllFilesFromDir(params.outdir, ignoreFile: 'tests/.nftignore') //TODO uncomment once -stub is removed + assertAll( + { assert workflow.success}, + { assert snapshot( + // Number of successful tasks + workflow.trace.succeeded().size(), + // pipeline versions.yml file for multiqc from which Nextflow version is removed because we tests pipelines on multiple Nextflow versions + removeNextflowVersion("$outputDir/pipeline_info/nf_core_rnafusion_software_mqc_versions.yml"), + // All stable path name, with a relative path + stable_name, + // All files with stable contents + // stable_path // TODO uncomment once -stub is removed + ).match() } + ) + } + } + + test("stub test no fastp trim") { + + when { + params { + outdir = "$outputDir" + fusion_annot_lib = 'https://github.com/STAR-Fusion/STAR-Fusion-Tutorial/raw/master/CTAT_HumanFusionLib.mini.dat.gz' + } + } + + then { + // stable_name: All files + folders in ${params.outdir}/ with a stable name + // Ignore files with timestamps in their names + def stable_name = getAllFilesFromDir(params.outdir, relative: true, includeDir: true, ignore: ['pipeline_info/*.{html,json,txt}']) + // stable_path: All files in ${params.outdir}/ with stable content + // def stable_path = getAllFilesFromDir(params.outdir, ignoreFile: 'tests/.nftignore') //TODO uncomment once -stub is removed + assertAll( + { assert workflow.success}, + { assert snapshot( + // Number of successful tasks + workflow.trace.succeeded().size(), + // pipeline versions.yml file for multiqc from which Nextflow version is removed because we tests pipelines on multiple Nextflow versions + removeNextflowVersion("$outputDir/pipeline_info/nf_core_rnafusion_software_mqc_versions.yml"), + // All stable path name, with a relative path + stable_name, + // All files with stable contents + // stable_path // TODO uncomment once -stub is removed + ).match() } + ) + } + } +} diff --git a/tests/test_stub.nf.test.snap b/tests/test_stub.nf.test.snap new file mode 100644 index 00000000..3a425748 --- /dev/null +++ b/tests/test_stub.nf.test.snap @@ -0,0 +1,737 @@ +{ + "stub test no fastp trim": { + "content": [ + 31, + { + "ARRIBA_ARRIBA": { + "arriba": "2.4.0" + }, + "ARRIBA_DOWNLOAD": { + "arriba_download": "2.4.0" + }, + "CTATSPLICING_STARTOCANCERINTRONS": { + "ctat-splicing": "0.0.2" + }, + "FASTQC": { + "fastqc": "0.12.1" + }, + "FUSIONCATCHER_BUILD": { + "fusioncatcher": "1.33" + }, + "FUSIONCATCHER_DETECT": { + "fusioncatcher": "1.33" + }, + "FUSIONREPORT": { + "fusion_report": "2.1.5" + }, + "FUSIONREPORT_DOWNLOAD": { + "fusion_report": "2.1.5" + }, + "GATK4_BEDTOINTERVALLIST": { + "gatk4": "4.6.1.0" + }, + "GATK4_CREATESEQUENCEDICTIONARY": { + "gatk4": "4.6.1.0" + }, + "GATK4_MARKDUPLICATES": { + "gatk4": "4.5.0.0", + "samtools": "1.19.2" + }, + "GENCODE_DOWNLOAD": { + "wget": null + }, + "GET_RRNA_TRANSCRIPTS": { + "get_rrna_transcripts": "v1.0" + }, + "GFFREAD": { + "gffread": "0.12.7" + }, + "GTF_TO_REFFLAT": { + "gtfToGenePred": 377 + }, + "HGNC_DOWNLOAD": { + "wget": null + }, + "PICARD_COLLECTINSERTSIZEMETRICS": { + "picard": "3.3.0" + }, + "PICARD_COLLECTRNASEQMETRICS": { + "picard": "3.3.0" + }, + "SALMON_INDEX": { + "salmon": "1.10.3" + }, + "SALMON_QUANT": { + "salmon": "1.10.3" + }, + "SAMTOOLS_FAIDX": { + "samtools": 1.21 + }, + "SAMTOOLS_INDEX_FOR_STARFUSION": { + "samtools": 1.21 + }, + "STARFUSION": { + "STAR-Fusion": "1.14.0" + }, + "STARFUSION_BUILD": { + "STAR-Fusion": "1.14.0" + }, + "STAR_FOR_ARRIBA": { + "star": "2.7.11b", + "samtools": 1.2, + "gawk": "5.1.0" + }, + "STAR_FOR_STARFUSION": { + "star": "2.7.11b", + "samtools": 1.2, + "gawk": "5.1.0" + }, + "STAR_GENOMEGENERATE": { + "star": "2.7.11b", + "samtools": 1.2, + "gawk": "5.1.0" + }, + "STRINGTIE_MERGE": { + "stringtie": "2.2.1" + }, + "STRINGTIE_STRINGTIE": { + "stringtie": "2.2.3" + }, + "Workflow": { + "nf-core/rnafusion": "v4.0.0dev" + } + }, + [ + "arriba", + "arriba/test.arriba.fusions.discarded.tsv", + "arriba/test.arriba.fusions.tsv", + "ctatsplicing", + "ctatsplicing/arriba", + "ctatsplicing/arriba/test.cancer.introns", + "ctatsplicing/arriba/test.cancer.introns.prelim", + "ctatsplicing/arriba/test.cancer_intron_reads.sorted.bam", + "ctatsplicing/arriba/test.cancer_intron_reads.sorted.bam.bai", + "ctatsplicing/arriba/test.chckpts", + "ctatsplicing/arriba/test.ctat-splicing.igv.html", + "ctatsplicing/arriba/test.gene_reads.sorted.sifted.bam", + "ctatsplicing/arriba/test.gene_reads.sorted.sifted.bam.bai", + "ctatsplicing/arriba/test.igv.tracks", + "ctatsplicing/arriba/test.introns", + "ctatsplicing/arriba/test.introns.for_IGV.bed", + "ctatsplicing/starfusion", + "ctatsplicing/starfusion/test.cancer.introns", + "ctatsplicing/starfusion/test.cancer.introns.prelim", + "ctatsplicing/starfusion/test.cancer_intron_reads.sorted.bam", + "ctatsplicing/starfusion/test.cancer_intron_reads.sorted.bam.bai", + "ctatsplicing/starfusion/test.chckpts", + "ctatsplicing/starfusion/test.ctat-splicing.igv.html", + "ctatsplicing/starfusion/test.gene_reads.sorted.sifted.bam", + "ctatsplicing/starfusion/test.gene_reads.sorted.sifted.bam.bai", + "ctatsplicing/starfusion/test.igv.tracks", + "ctatsplicing/starfusion/test.introns", + "ctatsplicing/starfusion/test.introns.for_IGV.bed", + "fastqc", + "fastqc/test.html", + "fastqc/test.zip", + "fusioncatcher", + "fusioncatcher/human_v46", + "fusioncatcher/human_v46/ensembl_fully_overlapping_genes.txt", + "fusioncatcher/test.fusioncatcher.fusion-genes.txt", + "fusioncatcher/test.fusioncatcher.log", + "fusioncatcher/test.fusioncatcher.summary.txt", + "fusionreport", + "fusionreport/test", + "fusionreport/test/AAA_BBB.html", + "fusionreport/test/test.fusionreport.tsv", + "fusionreport/test/test.fusionreport_filtered.tsv", + "fusionreport/test/test.fusions.csv", + "fusionreport/test/test.fusions.json", + "fusionreport/test/test_fusionreport_index.html", + "gatk4", + "gatk4/Homo_sapiens.GRCh38.46_dna_primary_assembly.dict", + "get", + "get/rrna.bed", + "get/rrna.gtf", + "multiqc", + "multiqc/multiqc_data", + "multiqc/multiqc_plots", + "multiqc/multiqc_report.html", + "picard", + "picard/test.bai", + "picard/test.bam", + "picard/test.bam.metrics", + "picard/test.cram", + "picard/test.cram.crai", + "picard/test.rna_metrics", + "picard/test_collectinsertsize.pdf", + "picard/test_collectinsertsize.txt", + "pipeline_info", + "pipeline_info/nf_core_rnafusion_software_mqc_versions.yml", + "references", + "references/arriba", + "references/arriba/blacklist_hg38_GRCh38_v2.4.0.tsv.gz", + "references/arriba/cytobands_hg38_GRCh38_v2.4.0.tsv", + "references/arriba/known_fusions_hg38_GRCh38_v2.4.0.tsv.gz", + "references/arriba/protein_domains_hg38_GRCh38_v2.4.0.gff3", + "references/fusion_report_db", + "references/fusion_report_db/DB-timestamp.txt", + "references/fusion_report_db/cosmic.db", + "references/fusion_report_db/fusion_report.log", + "references/fusion_report_db/fusiongdb2.db", + "references/fusion_report_db/mitelman.db", + "references/gencode", + "references/gencode/Homo_sapiens.GRCh38.46.gtf", + "references/gencode/Homo_sapiens.GRCh38.46.gtf.interval_list", + "references/gencode/Homo_sapiens.GRCh38.46.gtf.refflat", + "references/gencode/Homo_sapiens.GRCh38.46_dna_primary_assembly.fa", + "references/gencode/Homo_sapiens.GRCh38.46_dna_primary_assembly.fa.fai", + "references/gffread", + "references/gffread/Homo_sapiens.GRCh38.46.gtf.fasta", + "references/hgnc", + "references/hgnc/HGNC-DB-timestamp.txt", + "references/hgnc/hgnc_complete_set.txt", + "references/salmon", + "references/salmon/salmon", + "references/salmon/salmon/complete_ref_lens.bin", + "references/salmon/salmon/ctable.bin", + "references/salmon/salmon/ctg_offsets.bin", + "references/salmon/salmon/duplicate_clusters.tsv", + "references/salmon/salmon/info.json", + "references/salmon/salmon/mphf.bin", + "references/salmon/salmon/pos.bin", + "references/salmon/salmon/pre_indexing.log", + "references/salmon/salmon/rank.bin", + "references/salmon/salmon/refAccumLengths.bin", + "references/salmon/salmon/ref_indexing.log", + "references/salmon/salmon/reflengths.bin", + "references/salmon/salmon/refseq.bin", + "references/salmon/salmon/seq.bin", + "references/salmon/salmon/versionInfo.json", + "references/star", + "references/star/Genome", + "references/star/Log.out", + "references/star/SA", + "references/star/SAindex", + "references/star/chrLength.txt", + "references/star/chrName.txt", + "references/star/chrNameLength.txt", + "references/star/chrStart.txt", + "references/star/exonGeTrInfo.tab", + "references/star/exonInfo.tab", + "references/star/geneInfo.tab", + "references/star/genomeParameters.txt", + "references/star/sjdbInfo.txt", + "references/star/sjdbList.fromGTF.out.tab", + "references/star/sjdbList.out.tab", + "references/star/transcriptInfo.tab", + "salmon", + "salmon/test", + "salmon/test_lib_format_counts.json", + "salmon/test_meta_info.json", + "star_for_arriba", + "star_for_arriba/test.Aligned.sortedByCoord.out.bam", + "star_for_arriba/test.Aligned.unsort.out.bam", + "star_for_arriba/test.Chimeric.out.junction", + "star_for_arriba/test.Log.final.out", + "star_for_arriba/test.Log.out", + "star_for_arriba/test.Log.progress.out", + "star_for_arriba/test.ReadsPerGene.out.tab", + "star_for_arriba/test.SJ.out.tab", + "star_for_arriba/test.Signal.UniqueMultiple.str1.out.bg", + "star_for_arriba/test.Signal.UniqueMultiple.str1.out.wig", + "star_for_arriba/test.out.sam", + "star_for_arriba/test.sortedByCoord.out.bam", + "star_for_arriba/test.tab", + "star_for_arriba/test.toTranscriptome.out.bam", + "star_for_arriba/test.unmapped_1.fastq.gz", + "star_for_arriba/test.unmapped_2.fastq.gz", + "star_for_arriba/testXd.out.bam", + "star_for_starfusion", + "star_for_starfusion/test.Aligned.sortedByCoord.out.bam", + "star_for_starfusion/test.Aligned.unsort.out.bam", + "star_for_starfusion/test.Chimeric.out.junction", + "star_for_starfusion/test.Log.final.out", + "star_for_starfusion/test.Log.out", + "star_for_starfusion/test.Log.progress.out", + "star_for_starfusion/test.ReadsPerGene.out.tab", + "star_for_starfusion/test.SJ.out.tab", + "star_for_starfusion/test.Signal.UniqueMultiple.str1.out.bg", + "star_for_starfusion/test.Signal.UniqueMultiple.str1.out.wig", + "star_for_starfusion/test.out.sam", + "star_for_starfusion/test.sortedByCoord.out.bam", + "star_for_starfusion/test.sortedByCoord.out.bam.bai", + "star_for_starfusion/test.tab", + "star_for_starfusion/test.toTranscriptome.out.bam", + "star_for_starfusion/test.unmapped_1.fastq.gz", + "star_for_starfusion/test.unmapped_2.fastq.gz", + "star_for_starfusion/testXd.out.bam", + "starfusion", + "starfusion/ctat_genome_lib_build_dir", + "starfusion/ctat_genome_lib_build_dir/AnnotFilterRule.pm", + "starfusion/ctat_genome_lib_build_dir/PFAM.domtblout.dat.gz", + "starfusion/ctat_genome_lib_build_dir/__chkpts", + "starfusion/ctat_genome_lib_build_dir/__chkpts/_fusion_annot_lib.idx.ok", + "starfusion/ctat_genome_lib_build_dir/__chkpts/_prot_info_db.ok", + "starfusion/ctat_genome_lib_build_dir/__chkpts/annotfiltrule_cp.ok", + "starfusion/ctat_genome_lib_build_dir/__chkpts/blast_pairs.idx.ok", + "starfusion/ctat_genome_lib_build_dir/__chkpts/cp_gene_blast_pairs.ok", + "starfusion/ctat_genome_lib_build_dir/__chkpts/cp_pfam_dat.ok", + "starfusion/ctat_genome_lib_build_dir/__chkpts/cp_ref_annot_cdna.ok", + "starfusion/ctat_genome_lib_build_dir/__chkpts/fusion_annot_lib.cp.ok", + "starfusion/ctat_genome_lib_build_dir/__chkpts/index_pfam_hits.ok", + "starfusion/ctat_genome_lib_build_dir/__chkpts/index_ref_annot_cdna.ok", + "starfusion/ctat_genome_lib_build_dir/__chkpts/makeblastdb.ok", + "starfusion/ctat_genome_lib_build_dir/__chkpts/mm2.splice_bed.ok", + "starfusion/ctat_genome_lib_build_dir/__chkpts/mm2_genome_idx.ok", + "starfusion/ctat_genome_lib_build_dir/__chkpts/ref_annot.cdsplus.dfam_masked.fa.cp.ok", + "starfusion/ctat_genome_lib_build_dir/__chkpts/ref_annot.cdsplus.dfam_masked.fa.idx.ok", + "starfusion/ctat_genome_lib_build_dir/__chkpts/ref_annot.gtf.gene_spans.ok", + "starfusion/ctat_genome_lib_build_dir/__chkpts/ref_annot.gtf.mini.sortu.ok", + "starfusion/ctat_genome_lib_build_dir/__chkpts/ref_annot.gtf.ok", + "starfusion/ctat_genome_lib_build_dir/__chkpts/ref_genome.fa.ok", + "starfusion/ctat_genome_lib_build_dir/__chkpts/ref_genome_fai.ok", + "starfusion/ctat_genome_lib_build_dir/__chkpts/trans.blast.dat.cp.ok", + "starfusion/ctat_genome_lib_build_dir/__chkpts/trans.blast.dat.index.ok", + "starfusion/ctat_genome_lib_build_dir/__chkpts/validate_ctat_genome_lib.ok", + "starfusion/ctat_genome_lib_build_dir/blast_pairs.dat.gz", + "starfusion/ctat_genome_lib_build_dir/blast_pairs.idx", + "starfusion/ctat_genome_lib_build_dir/fusion_annot_lib.gz", + "starfusion/ctat_genome_lib_build_dir/fusion_annot_lib.idx", + "starfusion/ctat_genome_lib_build_dir/pfam_domains.dbm", + "starfusion/ctat_genome_lib_build_dir/ref_annot.cdna.fa", + "starfusion/ctat_genome_lib_build_dir/ref_annot.cdna.fa.idx", + "starfusion/ctat_genome_lib_build_dir/ref_annot.cds", + "starfusion/ctat_genome_lib_build_dir/ref_annot.cdsplus.fa", + "starfusion/ctat_genome_lib_build_dir/ref_annot.cdsplus.fa.idx", + "starfusion/ctat_genome_lib_build_dir/ref_annot.gtf", + "starfusion/ctat_genome_lib_build_dir/ref_annot.gtf.gene_spans", + "starfusion/ctat_genome_lib_build_dir/ref_annot.gtf.mini.sortu", + "starfusion/ctat_genome_lib_build_dir/ref_annot.gtf.mm2.splice.bed", + "starfusion/ctat_genome_lib_build_dir/ref_annot.pep", + "starfusion/ctat_genome_lib_build_dir/ref_annot.prot_info.dbm", + "starfusion/ctat_genome_lib_build_dir/ref_genome.fa", + "starfusion/ctat_genome_lib_build_dir/ref_genome.fa.fai", + "starfusion/ctat_genome_lib_build_dir/ref_genome.fa.mm2", + "starfusion/ctat_genome_lib_build_dir/ref_genome.fa.ndb", + "starfusion/ctat_genome_lib_build_dir/ref_genome.fa.nhr", + "starfusion/ctat_genome_lib_build_dir/ref_genome.fa.nin", + "starfusion/ctat_genome_lib_build_dir/ref_genome.fa.njs", + "starfusion/ctat_genome_lib_build_dir/ref_genome.fa.not", + "starfusion/ctat_genome_lib_build_dir/ref_genome.fa.nsq", + "starfusion/ctat_genome_lib_build_dir/ref_genome.fa.ntf", + "starfusion/ctat_genome_lib_build_dir/ref_genome.fa.nto", + "starfusion/ctat_genome_lib_build_dir/ref_genome.fa.star.idx", + "starfusion/ctat_genome_lib_build_dir/ref_genome.fa.star.idx/Genome", + "starfusion/ctat_genome_lib_build_dir/ref_genome.fa.star.idx/Log.out", + "starfusion/ctat_genome_lib_build_dir/ref_genome.fa.star.idx/SA", + "starfusion/ctat_genome_lib_build_dir/ref_genome.fa.star.idx/SAindex", + "starfusion/ctat_genome_lib_build_dir/ref_genome.fa.star.idx/build.ok", + "starfusion/ctat_genome_lib_build_dir/ref_genome.fa.star.idx/chrLength.txt", + "starfusion/ctat_genome_lib_build_dir/ref_genome.fa.star.idx/chrName.txt", + "starfusion/ctat_genome_lib_build_dir/ref_genome.fa.star.idx/chrNameLength.txt", + "starfusion/ctat_genome_lib_build_dir/ref_genome.fa.star.idx/chrStart.txt", + "starfusion/ctat_genome_lib_build_dir/ref_genome.fa.star.idx/exonGeTrInfo.tab", + "starfusion/ctat_genome_lib_build_dir/ref_genome.fa.star.idx/exonInfo.tab", + "starfusion/ctat_genome_lib_build_dir/ref_genome.fa.star.idx/geneInfo.tab", + "starfusion/ctat_genome_lib_build_dir/ref_genome.fa.star.idx/genomeParameters.txt", + "starfusion/ctat_genome_lib_build_dir/ref_genome.fa.star.idx/sjdbInfo.txt", + "starfusion/ctat_genome_lib_build_dir/ref_genome.fa.star.idx/sjdbList.fromGTF.out.tab", + "starfusion/ctat_genome_lib_build_dir/ref_genome.fa.star.idx/sjdbList.out.tab", + "starfusion/ctat_genome_lib_build_dir/ref_genome.fa.star.idx/transcriptInfo.tab", + "starfusion/ctat_genome_lib_build_dir/trans.blast.align_coords.align_coords.dat", + "starfusion/ctat_genome_lib_build_dir/trans.blast.align_coords.align_coords.dbm", + "starfusion/ctat_genome_lib_build_dir/trans.blast.dat.gz", + "starfusion/test.starfusion.abridged.coding_effect.tsv", + "starfusion/test.starfusion.abridged.tsv", + "starfusion/test.starfusion.fusion_predictions.tsv", + "stringtie", + "stringtie/[:]", + "stringtie/[:]/stringtie.merged.gtf", + "stringtie/test.ballgown", + "stringtie/test.coverage.gtf", + "stringtie/test.gene.abundance.txt", + "stringtie/test.transcripts.gtf" + ] + ], + "meta": { + "nf-test": "0.9.2", + "nextflow": "24.10.3" + }, + "timestamp": "2025-01-07T13:16:02.754052" + }, + "stub test with fastp trim": { + "content": [ + 33, + { + "ARRIBA_ARRIBA": { + "arriba": "2.4.0" + }, + "ARRIBA_DOWNLOAD": { + "arriba_download": "2.4.0" + }, + "CTATSPLICING_STARTOCANCERINTRONS": { + "ctat-splicing": "0.0.2" + }, + "FASTP": { + "fastp": "0.23.4" + }, + "FASTQC": { + "fastqc": "0.12.1" + }, + "FASTQC_FOR_FASTP": { + "fastqc": "0.12.1" + }, + "FUSIONCATCHER_BUILD": { + "fusioncatcher": "1.33" + }, + "FUSIONCATCHER_DETECT": { + "fusioncatcher": "1.33" + }, + "FUSIONREPORT": { + "fusion_report": "2.1.5" + }, + "FUSIONREPORT_DOWNLOAD": { + "fusion_report": "2.1.5" + }, + "GATK4_BEDTOINTERVALLIST": { + "gatk4": "4.6.1.0" + }, + "GATK4_CREATESEQUENCEDICTIONARY": { + "gatk4": "4.6.1.0" + }, + "GATK4_MARKDUPLICATES": { + "gatk4": "4.5.0.0", + "samtools": "1.19.2" + }, + "GENCODE_DOWNLOAD": { + "wget": null + }, + "GET_RRNA_TRANSCRIPTS": { + "get_rrna_transcripts": "v1.0" + }, + "GFFREAD": { + "gffread": "0.12.7" + }, + "GTF_TO_REFFLAT": { + "gtfToGenePred": 377 + }, + "HGNC_DOWNLOAD": { + "wget": null + }, + "PICARD_COLLECTINSERTSIZEMETRICS": { + "picard": "3.3.0" + }, + "PICARD_COLLECTRNASEQMETRICS": { + "picard": "3.3.0" + }, + "SALMON_INDEX": { + "salmon": "1.10.3" + }, + "SALMON_QUANT": { + "salmon": "1.10.3" + }, + "SAMTOOLS_FAIDX": { + "samtools": 1.21 + }, + "SAMTOOLS_INDEX_FOR_STARFUSION": { + "samtools": 1.21 + }, + "STARFUSION": { + "STAR-Fusion": "1.14.0" + }, + "STARFUSION_BUILD": { + "STAR-Fusion": "1.14.0" + }, + "STAR_FOR_ARRIBA": { + "star": "2.7.11b", + "samtools": 1.2, + "gawk": "5.1.0" + }, + "STAR_FOR_STARFUSION": { + "star": "2.7.11b", + "samtools": 1.2, + "gawk": "5.1.0" + }, + "STAR_GENOMEGENERATE": { + "star": "2.7.11b", + "samtools": 1.2, + "gawk": "5.1.0" + }, + "STRINGTIE_MERGE": { + "stringtie": "2.2.1" + }, + "STRINGTIE_STRINGTIE": { + "stringtie": "2.2.3" + }, + "Workflow": { + "nf-core/rnafusion": "v4.0.0dev" + } + }, + [ + "arriba", + "arriba/test.arriba.fusions.discarded.tsv", + "arriba/test.arriba.fusions.tsv", + "ctatsplicing", + "ctatsplicing/arriba", + "ctatsplicing/arriba/test.cancer.introns", + "ctatsplicing/arriba/test.cancer.introns.prelim", + "ctatsplicing/arriba/test.cancer_intron_reads.sorted.bam", + "ctatsplicing/arriba/test.cancer_intron_reads.sorted.bam.bai", + "ctatsplicing/arriba/test.chckpts", + "ctatsplicing/arriba/test.ctat-splicing.igv.html", + "ctatsplicing/arriba/test.gene_reads.sorted.sifted.bam", + "ctatsplicing/arriba/test.gene_reads.sorted.sifted.bam.bai", + "ctatsplicing/arriba/test.igv.tracks", + "ctatsplicing/arriba/test.introns", + "ctatsplicing/arriba/test.introns.for_IGV.bed", + "ctatsplicing/starfusion", + "ctatsplicing/starfusion/test.cancer.introns", + "ctatsplicing/starfusion/test.cancer.introns.prelim", + "ctatsplicing/starfusion/test.cancer_intron_reads.sorted.bam", + "ctatsplicing/starfusion/test.cancer_intron_reads.sorted.bam.bai", + "ctatsplicing/starfusion/test.chckpts", + "ctatsplicing/starfusion/test.ctat-splicing.igv.html", + "ctatsplicing/starfusion/test.gene_reads.sorted.sifted.bam", + "ctatsplicing/starfusion/test.gene_reads.sorted.sifted.bam.bai", + "ctatsplicing/starfusion/test.igv.tracks", + "ctatsplicing/starfusion/test.introns", + "ctatsplicing/starfusion/test.introns.for_IGV.bed", + "fastp", + "fastp/test.fastp.html", + "fastp/test.fastp.json", + "fastp/test.fastp.log", + "fastp/test_1.fastp.fastq.gz", + "fastp/test_2.fastp.fastq.gz", + "fastqc", + "fastqc/test.html", + "fastqc/test.zip", + "fastqc_for_fastp", + "fastqc_for_fastp/test_trimmed.html", + "fastqc_for_fastp/test_trimmed.zip", + "fusioncatcher", + "fusioncatcher/human_v46", + "fusioncatcher/human_v46/ensembl_fully_overlapping_genes.txt", + "fusioncatcher/test.fusioncatcher.fusion-genes.txt", + "fusioncatcher/test.fusioncatcher.log", + "fusioncatcher/test.fusioncatcher.summary.txt", + "fusionreport", + "fusionreport/test", + "fusionreport/test/AAA_BBB.html", + "fusionreport/test/test.fusionreport.tsv", + "fusionreport/test/test.fusionreport_filtered.tsv", + "fusionreport/test/test.fusions.csv", + "fusionreport/test/test.fusions.json", + "fusionreport/test/test_fusionreport_index.html", + "gatk4", + "gatk4/Homo_sapiens.GRCh38.46_dna_primary_assembly.dict", + "get", + "get/rrna.bed", + "get/rrna.gtf", + "multiqc", + "multiqc/multiqc_data", + "multiqc/multiqc_plots", + "multiqc/multiqc_report.html", + "picard", + "picard/test.bai", + "picard/test.bam", + "picard/test.bam.metrics", + "picard/test.cram", + "picard/test.cram.crai", + "picard/test.rna_metrics", + "picard/test_collectinsertsize.pdf", + "picard/test_collectinsertsize.txt", + "pipeline_info", + "pipeline_info/nf_core_rnafusion_software_mqc_versions.yml", + "references", + "references/arriba", + "references/arriba/blacklist_hg38_GRCh38_v2.4.0.tsv.gz", + "references/arriba/cytobands_hg38_GRCh38_v2.4.0.tsv", + "references/arriba/known_fusions_hg38_GRCh38_v2.4.0.tsv.gz", + "references/arriba/protein_domains_hg38_GRCh38_v2.4.0.gff3", + "references/fusion_report_db", + "references/fusion_report_db/DB-timestamp.txt", + "references/fusion_report_db/cosmic.db", + "references/fusion_report_db/fusion_report.log", + "references/fusion_report_db/fusiongdb2.db", + "references/fusion_report_db/mitelman.db", + "references/gencode", + "references/gencode/Homo_sapiens.GRCh38.46.gtf", + "references/gencode/Homo_sapiens.GRCh38.46.gtf.interval_list", + "references/gencode/Homo_sapiens.GRCh38.46.gtf.refflat", + "references/gencode/Homo_sapiens.GRCh38.46_dna_primary_assembly.fa", + "references/gencode/Homo_sapiens.GRCh38.46_dna_primary_assembly.fa.fai", + "references/gffread", + "references/gffread/Homo_sapiens.GRCh38.46.gtf.fasta", + "references/hgnc", + "references/hgnc/HGNC-DB-timestamp.txt", + "references/hgnc/hgnc_complete_set.txt", + "references/salmon", + "references/salmon/salmon", + "references/salmon/salmon/complete_ref_lens.bin", + "references/salmon/salmon/ctable.bin", + "references/salmon/salmon/ctg_offsets.bin", + "references/salmon/salmon/duplicate_clusters.tsv", + "references/salmon/salmon/info.json", + "references/salmon/salmon/mphf.bin", + "references/salmon/salmon/pos.bin", + "references/salmon/salmon/pre_indexing.log", + "references/salmon/salmon/rank.bin", + "references/salmon/salmon/refAccumLengths.bin", + "references/salmon/salmon/ref_indexing.log", + "references/salmon/salmon/reflengths.bin", + "references/salmon/salmon/refseq.bin", + "references/salmon/salmon/seq.bin", + "references/salmon/salmon/versionInfo.json", + "references/star", + "references/star/Genome", + "references/star/Log.out", + "references/star/SA", + "references/star/SAindex", + "references/star/chrLength.txt", + "references/star/chrName.txt", + "references/star/chrNameLength.txt", + "references/star/chrStart.txt", + "references/star/exonGeTrInfo.tab", + "references/star/exonInfo.tab", + "references/star/geneInfo.tab", + "references/star/genomeParameters.txt", + "references/star/sjdbInfo.txt", + "references/star/sjdbList.fromGTF.out.tab", + "references/star/sjdbList.out.tab", + "references/star/transcriptInfo.tab", + "salmon", + "salmon/test", + "salmon/test_lib_format_counts.json", + "salmon/test_meta_info.json", + "star_for_arriba", + "star_for_arriba/test.Aligned.sortedByCoord.out.bam", + "star_for_arriba/test.Aligned.unsort.out.bam", + "star_for_arriba/test.Chimeric.out.junction", + "star_for_arriba/test.Log.final.out", + "star_for_arriba/test.Log.out", + "star_for_arriba/test.Log.progress.out", + "star_for_arriba/test.ReadsPerGene.out.tab", + "star_for_arriba/test.SJ.out.tab", + "star_for_arriba/test.Signal.UniqueMultiple.str1.out.bg", + "star_for_arriba/test.Signal.UniqueMultiple.str1.out.wig", + "star_for_arriba/test.out.sam", + "star_for_arriba/test.sortedByCoord.out.bam", + "star_for_arriba/test.tab", + "star_for_arriba/test.toTranscriptome.out.bam", + "star_for_arriba/test.unmapped_1.fastq.gz", + "star_for_arriba/test.unmapped_2.fastq.gz", + "star_for_arriba/testXd.out.bam", + "star_for_starfusion", + "star_for_starfusion/test.Aligned.sortedByCoord.out.bam", + "star_for_starfusion/test.Aligned.unsort.out.bam", + "star_for_starfusion/test.Chimeric.out.junction", + "star_for_starfusion/test.Log.final.out", + "star_for_starfusion/test.Log.out", + "star_for_starfusion/test.Log.progress.out", + "star_for_starfusion/test.ReadsPerGene.out.tab", + "star_for_starfusion/test.SJ.out.tab", + "star_for_starfusion/test.Signal.UniqueMultiple.str1.out.bg", + "star_for_starfusion/test.Signal.UniqueMultiple.str1.out.wig", + "star_for_starfusion/test.out.sam", + "star_for_starfusion/test.sortedByCoord.out.bam", + "star_for_starfusion/test.sortedByCoord.out.bam.bai", + "star_for_starfusion/test.tab", + "star_for_starfusion/test.toTranscriptome.out.bam", + "star_for_starfusion/test.unmapped_1.fastq.gz", + "star_for_starfusion/test.unmapped_2.fastq.gz", + "star_for_starfusion/testXd.out.bam", + "starfusion", + "starfusion/ctat_genome_lib_build_dir", + "starfusion/ctat_genome_lib_build_dir/AnnotFilterRule.pm", + "starfusion/ctat_genome_lib_build_dir/PFAM.domtblout.dat.gz", + "starfusion/ctat_genome_lib_build_dir/__chkpts", + "starfusion/ctat_genome_lib_build_dir/__chkpts/_fusion_annot_lib.idx.ok", + "starfusion/ctat_genome_lib_build_dir/__chkpts/_prot_info_db.ok", + "starfusion/ctat_genome_lib_build_dir/__chkpts/annotfiltrule_cp.ok", + "starfusion/ctat_genome_lib_build_dir/__chkpts/blast_pairs.idx.ok", + "starfusion/ctat_genome_lib_build_dir/__chkpts/cp_gene_blast_pairs.ok", + "starfusion/ctat_genome_lib_build_dir/__chkpts/cp_pfam_dat.ok", + "starfusion/ctat_genome_lib_build_dir/__chkpts/cp_ref_annot_cdna.ok", + "starfusion/ctat_genome_lib_build_dir/__chkpts/fusion_annot_lib.cp.ok", + "starfusion/ctat_genome_lib_build_dir/__chkpts/index_pfam_hits.ok", + "starfusion/ctat_genome_lib_build_dir/__chkpts/index_ref_annot_cdna.ok", + "starfusion/ctat_genome_lib_build_dir/__chkpts/makeblastdb.ok", + "starfusion/ctat_genome_lib_build_dir/__chkpts/mm2.splice_bed.ok", + "starfusion/ctat_genome_lib_build_dir/__chkpts/mm2_genome_idx.ok", + "starfusion/ctat_genome_lib_build_dir/__chkpts/ref_annot.cdsplus.dfam_masked.fa.cp.ok", + "starfusion/ctat_genome_lib_build_dir/__chkpts/ref_annot.cdsplus.dfam_masked.fa.idx.ok", + "starfusion/ctat_genome_lib_build_dir/__chkpts/ref_annot.gtf.gene_spans.ok", + "starfusion/ctat_genome_lib_build_dir/__chkpts/ref_annot.gtf.mini.sortu.ok", + "starfusion/ctat_genome_lib_build_dir/__chkpts/ref_annot.gtf.ok", + "starfusion/ctat_genome_lib_build_dir/__chkpts/ref_genome.fa.ok", + "starfusion/ctat_genome_lib_build_dir/__chkpts/ref_genome_fai.ok", + "starfusion/ctat_genome_lib_build_dir/__chkpts/trans.blast.dat.cp.ok", + "starfusion/ctat_genome_lib_build_dir/__chkpts/trans.blast.dat.index.ok", + "starfusion/ctat_genome_lib_build_dir/__chkpts/validate_ctat_genome_lib.ok", + "starfusion/ctat_genome_lib_build_dir/blast_pairs.dat.gz", + "starfusion/ctat_genome_lib_build_dir/blast_pairs.idx", + "starfusion/ctat_genome_lib_build_dir/fusion_annot_lib.gz", + "starfusion/ctat_genome_lib_build_dir/fusion_annot_lib.idx", + "starfusion/ctat_genome_lib_build_dir/pfam_domains.dbm", + "starfusion/ctat_genome_lib_build_dir/ref_annot.cdna.fa", + "starfusion/ctat_genome_lib_build_dir/ref_annot.cdna.fa.idx", + "starfusion/ctat_genome_lib_build_dir/ref_annot.cds", + "starfusion/ctat_genome_lib_build_dir/ref_annot.cdsplus.fa", + "starfusion/ctat_genome_lib_build_dir/ref_annot.cdsplus.fa.idx", + "starfusion/ctat_genome_lib_build_dir/ref_annot.gtf", + "starfusion/ctat_genome_lib_build_dir/ref_annot.gtf.gene_spans", + "starfusion/ctat_genome_lib_build_dir/ref_annot.gtf.mini.sortu", + "starfusion/ctat_genome_lib_build_dir/ref_annot.gtf.mm2.splice.bed", + "starfusion/ctat_genome_lib_build_dir/ref_annot.pep", + "starfusion/ctat_genome_lib_build_dir/ref_annot.prot_info.dbm", + "starfusion/ctat_genome_lib_build_dir/ref_genome.fa", + "starfusion/ctat_genome_lib_build_dir/ref_genome.fa.fai", + "starfusion/ctat_genome_lib_build_dir/ref_genome.fa.mm2", + "starfusion/ctat_genome_lib_build_dir/ref_genome.fa.ndb", + "starfusion/ctat_genome_lib_build_dir/ref_genome.fa.nhr", + "starfusion/ctat_genome_lib_build_dir/ref_genome.fa.nin", + "starfusion/ctat_genome_lib_build_dir/ref_genome.fa.njs", + "starfusion/ctat_genome_lib_build_dir/ref_genome.fa.not", + "starfusion/ctat_genome_lib_build_dir/ref_genome.fa.nsq", + "starfusion/ctat_genome_lib_build_dir/ref_genome.fa.ntf", + "starfusion/ctat_genome_lib_build_dir/ref_genome.fa.nto", + "starfusion/ctat_genome_lib_build_dir/ref_genome.fa.star.idx", + "starfusion/ctat_genome_lib_build_dir/ref_genome.fa.star.idx/Genome", + "starfusion/ctat_genome_lib_build_dir/ref_genome.fa.star.idx/Log.out", + "starfusion/ctat_genome_lib_build_dir/ref_genome.fa.star.idx/SA", + "starfusion/ctat_genome_lib_build_dir/ref_genome.fa.star.idx/SAindex", + "starfusion/ctat_genome_lib_build_dir/ref_genome.fa.star.idx/build.ok", + "starfusion/ctat_genome_lib_build_dir/ref_genome.fa.star.idx/chrLength.txt", + "starfusion/ctat_genome_lib_build_dir/ref_genome.fa.star.idx/chrName.txt", + "starfusion/ctat_genome_lib_build_dir/ref_genome.fa.star.idx/chrNameLength.txt", + "starfusion/ctat_genome_lib_build_dir/ref_genome.fa.star.idx/chrStart.txt", + "starfusion/ctat_genome_lib_build_dir/ref_genome.fa.star.idx/exonGeTrInfo.tab", + "starfusion/ctat_genome_lib_build_dir/ref_genome.fa.star.idx/exonInfo.tab", + "starfusion/ctat_genome_lib_build_dir/ref_genome.fa.star.idx/geneInfo.tab", + "starfusion/ctat_genome_lib_build_dir/ref_genome.fa.star.idx/genomeParameters.txt", + "starfusion/ctat_genome_lib_build_dir/ref_genome.fa.star.idx/sjdbInfo.txt", + "starfusion/ctat_genome_lib_build_dir/ref_genome.fa.star.idx/sjdbList.fromGTF.out.tab", + "starfusion/ctat_genome_lib_build_dir/ref_genome.fa.star.idx/sjdbList.out.tab", + "starfusion/ctat_genome_lib_build_dir/ref_genome.fa.star.idx/transcriptInfo.tab", + "starfusion/ctat_genome_lib_build_dir/trans.blast.align_coords.align_coords.dat", + "starfusion/ctat_genome_lib_build_dir/trans.blast.align_coords.align_coords.dbm", + "starfusion/ctat_genome_lib_build_dir/trans.blast.dat.gz", + "starfusion/test.starfusion.abridged.coding_effect.tsv", + "starfusion/test.starfusion.abridged.tsv", + "starfusion/test.starfusion.fusion_predictions.tsv", + "stringtie", + "stringtie/[:]", + "stringtie/[:]/stringtie.merged.gtf", + "stringtie/test.ballgown", + "stringtie/test.coverage.gtf", + "stringtie/test.gene.abundance.txt", + "stringtie/test.transcripts.gtf" + ] + ], + "meta": { + "nf-test": "0.9.2", + "nextflow": "24.10.3" + }, + "timestamp": "2025-01-07T13:13:38.690939" + } +} \ No newline at end of file diff --git a/tower.yml b/tower.yml index 787aedfe..2edf5a7f 100644 --- a/tower.yml +++ b/tower.yml @@ -1,5 +1,31 @@ reports: multiqc_report.html: display: "MultiQC HTML report" - samplesheet.csv: - display: "Auto-created samplesheet with collated metadata and FASTQ paths" + "**/arriba/*.arriba.fusions.tsv": + display: "Arriba identified fusion TSV report" + "**/arriba_visualisation/*_combined_fusions_arriba_visualisation.pdf": + display: "PDF visualisation of the transcripts involved in predicted fusions" + "**/fastp/*fastp.html": + display: "Post fastp trimming HTML report" + "**/fusioncatcher/*.fusioncatcher.fusion-genes.txt": + display: "FusionCatcher identified fusion TXT report" + "**/fusioninspector/*.FusionInspector.fusions.abridged.tsv": + display: "FusionInspector TSV report" + "**/fusionreport/*/*_fusionreport_index.html": + display: "Fusion-report HTML report" + "**/vcf/*_fusion_data.vcf.gz": + display: "Collected statistics on each fusion fed to FusionInspector in VCF format" + "**/picard/*.MarkDuplicates.metrics.txt": + display: "Picard: Metrics from CollectRnaMetrics" + "**/picard/*_rna_metrics.txt": + display: "GATK4: Metrics from MarkDuplicates" + "**/picard/*insert*size*metrics.txt": + display: "GATK4: Metrics from InsertSizeMetrics" + "**/picard/*pdf": + display: "GATK4: InsertSizeMetrics histogram" + "**/star_for_starfusion/*ReadsPerGene.out.tab": + display: "Number of reads per gene" + "**/starfusion/*.starfusion.fusion_predictions.tsv": + display: "STAR-Fusion identified fusion TSV report" + "**/stringtie/*/*stringtie.merged.gtf": + display: "Merged GTFs from StringTie with annotations" diff --git a/workflows/rnafusion.nf b/workflows/rnafusion.nf index e80985a2..ddd006ba 100644 --- a/workflows/rnafusion.nf +++ b/workflows/rnafusion.nf @@ -3,12 +3,26 @@ IMPORT MODULES / SUBWORKFLOWS / FUNCTIONS ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ */ -include { FASTQC } from '../modules/nf-core/fastqc/main' -include { MULTIQC } from '../modules/nf-core/multiqc/main' -include { paramsSummaryMap } from 'plugin/nf-schema' -include { paramsSummaryMultiqc } from '../subworkflows/nf-core/utils_nfcore_pipeline' -include { softwareVersionsToYAML } from '../subworkflows/nf-core/utils_nfcore_pipeline' -include { methodsDescriptionText } from '../subworkflows/local/utils_nfcore_rnafusion_pipeline' + +include { BUILD_REFERENCES } from '../subworkflows/local/build_references' +include { CAT_FASTQ } from '../modules/nf-core/cat/fastq/main' +include { TRIM_WORKFLOW } from '../subworkflows/local/trim_workflow/main' +include { ARRIBA_WORKFLOW } from '../subworkflows/local/arriba_workflow' +include { QC_WORKFLOW } from '../subworkflows/local/qc_workflow' +include { STARFUSION_WORKFLOW } from '../subworkflows/local/starfusion_workflow' +include { STRINGTIE_WORKFLOW } from '../subworkflows/local/stringtie_workflow/main' +include { FUSIONCATCHER_WORKFLOW } from '../subworkflows/local/fusioncatcher_workflow' +include { FUSIONINSPECTOR_WORKFLOW } from '../subworkflows/local/fusioninspector_workflow' +include { FUSIONREPORT_WORKFLOW } from '../subworkflows/local/fusionreport_workflow' +include { FASTQC } from '../modules/nf-core/fastqc/main' +include { MULTIQC } from '../modules/nf-core/multiqc/main' +include { SALMON_QUANT } from '../modules/nf-core/salmon/quant/main' +include { paramsSummaryMap } from 'plugin/nf-schema' +include { paramsSummaryMultiqc } from '../subworkflows/nf-core/utils_nfcore_pipeline' +include { softwareVersionsToYAML } from '../subworkflows/nf-core/utils_nfcore_pipeline' +include { methodsDescriptionText } from '../subworkflows/local/utils_nfcore_rnafusion_pipeline' +include { validateInputSamplesheet } from '../subworkflows/local/utils_nfcore_rnafusion_pipeline' + /* ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ @@ -18,21 +32,150 @@ include { methodsDescriptionText } from '../subworkflows/local/utils_nfcore_rnaf workflow RNAFUSION { + take: ch_samplesheet // channel: samplesheet read in from --input + main: - ch_versions = Channel.empty() - ch_multiqc_files = Channel.empty() + def ch_versions = Channel.empty() + def ch_multiqc_files = Channel.empty() + // - // MODULE: Run FastQC + // Create references if necessary // - FASTQC ( - ch_samplesheet + + BUILD_REFERENCES() + ch_versions = ch_versions.mix(BUILD_REFERENCES.out.versions) + + if (!params.references_only) { // TODO: Remove this temporary parameter when we have a full-working GitHub nf-test + + // + // QC from FASTQ files + // + FASTQC ( + ch_samplesheet + ) + ch_multiqc_files = ch_multiqc_files.mix(FASTQC.out.zip.collect{it[1]}) + ch_versions = ch_versions.mix(FASTQC.out.versions) + + + // + // Trimming + // + TRIM_WORKFLOW ( + ch_samplesheet, + Channel.value(params.adapter_fasta), + params.fastp_trim + ) + ch_reads = TRIM_WORKFLOW.out.ch_reads_all + ch_versions = ch_versions.mix(TRIM_WORKFLOW.out.versions) + + SALMON_QUANT( ch_reads, BUILD_REFERENCES.out.ch_salmon_index.map{ it -> it[1] }, BUILD_REFERENCES.out.ch_gtf.map{ it -> it[1] }, [], false, 'A') + ch_multiqc_files = ch_multiqc_files.mix(SALMON_QUANT.out.json_info.collect{it[1]}) + ch_versions = ch_versions.mix(SALMON_QUANT.out.versions) + + + // + // SUBWORKFLOW: Run STAR alignment and Arriba + // + + // TODO: add params.seq_platform and pass it as argument to arriba_workflow + // TODO: improve how params.arriba_fusions would avoid running arriba module. Maybe imputed from samplesheet? + // TODO: same as above, but with ch_arriba_fusion_fail. It's currently replaces by a dummy file + + ARRIBA_WORKFLOW ( + ch_reads, + BUILD_REFERENCES.out.ch_gtf, + BUILD_REFERENCES.out.ch_fasta, + BUILD_REFERENCES.out.ch_starindex_ref, + BUILD_REFERENCES.out.ch_arriba_ref_blacklist, + BUILD_REFERENCES.out.ch_arriba_ref_cytobands, + BUILD_REFERENCES.out.ch_arriba_ref_known_fusions, + BUILD_REFERENCES.out.ch_arriba_ref_protein_domains, + BUILD_REFERENCES.out.ch_starfusion_ref, + params.arriba, // boolean + params.all, // boolean + params.fusioninspector_only, // boolean + params.star_ignore_sjdbgtf, // boolean + params.ctatsplicing, // boolean + params.seq_center ?: '', // string + params.arriba_fusions, // path + params.cram // array + ) + ch_versions = ch_versions.mix(ARRIBA_WORKFLOW.out.versions) + + + //Run STAR fusion + STARFUSION_WORKFLOW ( + ch_reads, + BUILD_REFERENCES.out.ch_gtf, + BUILD_REFERENCES.out.ch_starindex_ref, + BUILD_REFERENCES.out.ch_fasta, + BUILD_REFERENCES.out.ch_starfusion_ref + ) + ch_versions = ch_versions.mix(STARFUSION_WORKFLOW.out.versions) + + + + //Run fusioncatcher + FUSIONCATCHER_WORKFLOW ( + ch_reads, + BUILD_REFERENCES.out.ch_fusioncatcher_ref, // channel [ meta, path ] + params.run_fusioncatcher, + params.all, + params.fusioninspector_only, + params.fusioncatcher_fusions ) - ch_multiqc_files = ch_multiqc_files.mix(FASTQC.out.zip.collect{it[1]}) - ch_versions = ch_versions.mix(FASTQC.out.versions.first()) + ch_versions = ch_versions.mix(FUSIONCATCHER_WORKFLOW.out.versions) + + //Run stringtie + STRINGTIE_WORKFLOW ( + STARFUSION_WORKFLOW.out.ch_bam_sorted, + BUILD_REFERENCES.out.ch_gtf + ) + ch_versions = ch_versions.mix(STRINGTIE_WORKFLOW.out.versions) + + + //Run fusion-report + FUSIONREPORT_WORKFLOW ( + ch_reads, + BUILD_REFERENCES.out.ch_fusionreport_ref, + ARRIBA_WORKFLOW.out.fusions, + STARFUSION_WORKFLOW.out.fusions, + FUSIONCATCHER_WORKFLOW.out.fusions + ) + ch_versions = ch_versions.mix(FUSIONREPORT_WORKFLOW.out.versions) + + //Run fusionInpector + FUSIONINSPECTOR_WORKFLOW ( + ch_reads, + FUSIONREPORT_WORKFLOW.out.fusion_list, + FUSIONREPORT_WORKFLOW.out.fusion_list_filtered, + FUSIONREPORT_WORKFLOW.out.report, + FUSIONREPORT_WORKFLOW.out.csv, + STARFUSION_WORKFLOW.out.ch_bam_sorted_indexed, + BUILD_REFERENCES.out.ch_gtf, + BUILD_REFERENCES.out.ch_arriba_ref_protein_domains, + BUILD_REFERENCES.out.ch_arriba_ref_cytobands, + BUILD_REFERENCES.out.ch_hgnc_ref, + BUILD_REFERENCES.out.ch_hgnc_date + ) + ch_versions = ch_versions.mix(FUSIONINSPECTOR_WORKFLOW.out.versions) + + + //QC + QC_WORKFLOW ( + STARFUSION_WORKFLOW.out.ch_bam_sorted, + BUILD_REFERENCES.out.ch_refflat, + BUILD_REFERENCES.out.ch_fasta, + BUILD_REFERENCES.out.ch_fai, + BUILD_REFERENCES.out.ch_rrna_interval + ) + ch_versions = ch_versions.mix(QC_WORKFLOW.out.versions) + + } // // Collate and save software versions // @@ -76,6 +219,19 @@ workflow RNAFUSION { ) ) + if (!params.references_only) { // TODO: Remove this temporary parameter when we have a full-working GitHub nf-test + ch_multiqc_files = ch_multiqc_files.mix(FASTQC.out.zip.collect{it[1]}.ifEmpty([])) + ch_multiqc_files = ch_multiqc_files.mix(TRIM_WORKFLOW.out.ch_fastp_html.collect{it[1]}.ifEmpty([])) + ch_multiqc_files = ch_multiqc_files.mix(TRIM_WORKFLOW.out.ch_fastp_json.collect{it[1]}.ifEmpty([])) + ch_multiqc_files = ch_multiqc_files.mix(TRIM_WORKFLOW.out.ch_fastqc_trimmed.collect{it[1]}.ifEmpty([])) + ch_multiqc_files = ch_multiqc_files.mix(STARFUSION_WORKFLOW.out.star_stats.collect{it[1]}.ifEmpty([])) + ch_multiqc_files = ch_multiqc_files.mix(STARFUSION_WORKFLOW.out.star_gene_count.collect{it[1]}.ifEmpty([])) + ch_multiqc_files = ch_multiqc_files.mix(QC_WORKFLOW.out.rnaseq_metrics.collect{it[1]}.ifEmpty([])) + ch_multiqc_files = ch_multiqc_files.mix(QC_WORKFLOW.out.duplicate_metrics.collect{it[1]}.ifEmpty([])) + ch_multiqc_files = ch_multiqc_files.mix(QC_WORKFLOW.out.insertsize_metrics.collect{it[1]}.ifEmpty([])) + ch_multiqc_files = ch_multiqc_files.mix(FUSIONINSPECTOR_WORKFLOW.out.ch_arriba_visualisation.collect{it[1]}.ifEmpty([])) + } + MULTIQC ( ch_multiqc_files.collect(), ch_multiqc_config.toList(), @@ -85,7 +241,10 @@ workflow RNAFUSION { [] ) - emit:multiqc_report = MULTIQC.out.report.toList() // channel: /path/to/multiqc_report.html + + + emit: + multiqc_report = MULTIQC.out.report.toList() // channel: /path/to/multiqc_report.html versions = ch_versions // channel: [ path(versions.yml) ] }