Skip to content

Commit

Permalink
Add bamToFastq module
Browse files Browse the repository at this point in the history
  • Loading branch information
emi80 committed Jun 21, 2024
1 parent 5e307a9 commit 14afcfa
Show file tree
Hide file tree
Showing 7 changed files with 241 additions and 0 deletions.
87 changes: 87 additions & 0 deletions modules/bamToFastq/samtools/main.nf
Original file line number Diff line number Diff line change
@@ -0,0 +1,87 @@
include { parseJSON } from "../../functions"

params.samtoolsVersion = '1.19.2--h50ea8bc_1'
params.samtoolsContainer = "${params.containerRepo}/samtools:${params.samtoolsVersion}"
params.bamstatsVersion = '0.3.5--he881be0_0'
params.bamStatsContainer = "quay.io/biocontainers/bamstats:${params.bamstatsVersion}"
params.bamStatsMaxBuf = '1000000'
params.bamStatsLogLevel = 'info'

process getProtocol {

tag "${sample}"
container params.bamStatsContainer

input:
tuple val(sample), val(id), path(bam), val(type), val(view)


output:
tuple val(sample), val(id), stdout


script:
prefix = "${sample}"

"""
bamstats -c ${task.cpus} \\
-i ${bam} \\
-n 1000 \\
--max-buf ${params.bamStatsMaxBuf} \\
--loglevel ${params.bamStatsLogLevel}
"""
}

process toFastq {

tag "${sample}"
container params.samtoolsContainer

input:
tuple val(sample), val(id), path(bam), val(type), val(view), val(protocol)

output:
tuple val(sample), val(id), path("${prefix}*.fastq.gz"), val(type), val(view)

script:
prefix = "${id}"
type = "fastq"
view = "FqRd"

def cmd = []
def inputBam = bam
outParams = "-o ${prefix}.fastq.gz"
if ( protocol == "PairedEnd" ) {
view = ['FqRd1', 'FqRd2']
inputBam = "${prefix}.collate.bam"
outParams = "-1 ${prefix}_1.fastq.gz -2 ${prefix}_2.fastq.gz"
cmd << "mkfifo ${prefix}.collate.bam"
cmd << "samtools collate -o ${prefix}.collate.bam --threads ${task.cpus} ${bam} &"
}

cmd << "samtools fastq -N -O -c 9 --threads ${task.cpus} ${outParams} ${inputBam}"

cmd.join('\n')
}

workflow bamToFastq {
take:
genomeAlignments

main:

getProtocol( genomeAlignments )
getProtocol.out.map {
d = parseJSON(it[-1])
it[0..1] + [ d.general.protocol ]
}.set { protocol }

genomeAlignments
.join( protocol, by: [0,1] )
.set { bams }

toFastq( bams )

emit:
toFastq.out
}
26 changes: 26 additions & 0 deletions tests/modules/bamToFastq/samtools/main.getprotocol.nf.test
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
nextflow_process {

name "Test Process getProtocol"
script "modules/bamToFastq/samtools/main.nf"
process "getProtocol"

test("Should get sequencing protocol from BAM file") {

when {
process {
"""
input[0] = Channel.from([
[ "sample3", "test3", file("${baseDir}/data/sample3_m4_n10_toGenome.bam"), "bam", "GenomeAlignments" ]
])
"""
}
}

then {
assert process.success
assert snapshot(process.out).match()
}

}

}
20 changes: 20 additions & 0 deletions tests/modules/bamToFastq/samtools/main.getprotocol.nf.test.snap
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
{
"Should get sequencing protocol from BAM file": {
"content": [
{
"0": [
[
"sample3",
"test3",
"{\n\t\"general\": {\n\t\t\"protocol\": \"PairedEnd\",\n\t\t\"reads\": {\n\t\t\t\"total\": 1000,\n\t\t\t\"mapped\": {\n\t\t\t\t\"1\": 998,\n\t\t\t\t\"2\": 2\n\t\t\t},\n\t\t\t\"mappings\": {\n\t\t\t\t\"ratio\": 1.002,\n\t\t\t\t\"count\": 1002\n\t\t\t}\n\t\t},\n\t\t\"pairs\": {\n\t\t\t\"total\": 489,\n\t\t\t\"mapped\": {\n\t\t\t\t\"1\": 488,\n\t\t\t\t\"2\": 1\n\t\t\t},\n\t\t\t\"insert_sizes\": {\n\t\t\t\t\"74\": 1,\n\t\t\t\t\"102\": 1,\n\t\t\t\t\"104\": 1,\n\t\t\t\t\"107\": 1,\n\t\t\t\t\"111\": 2,\n\t\t\t\t\"112\": 1,\n\t\t\t\t\"113\": 2,\n\t\t\t\t\"123\": 1,\n\t\t\t\t\"124\": 2,\n\t\t\t\t\"125\": 1,\n\t\t\t\t\"126\": 3,\n\t\t\t\t\"127\": 2,\n\t\t\t\t\"129\": 2,\n\t\t\t\t\"133\": 3,\n\t\t\t\t\"135\": 2,\n\t\t\t\t\"137\": 2,\n\t\t\t\t\"141\": 2,\n\t\t\t\t\"143\": 2,\n\t\t\t\t\"144\": 1,\n\t\t\t\t\"150\": 1,\n\t\t\t\t\"152\": 2,\n\t\t\t\t\"153\": 3,\n\t\t\t\t\"154\": 2,\n\t\t\t\t\"155\": 1,\n\t\t\t\t\"156\": 2,\n\t\t\t\t\"157\": 1,\n\t\t\t\t\"158\": 1,\n\t\t\t\t\"159\": 1,\n\t\t\t\t\"160\": 4,\n\t\t\t\t\"161\": 5,\n\t\t\t\t\"162\": 3,\n\t\t\t\t\"163\": 4,\n\t\t\t\t\"164\": 8,\n\t\t\t\t\"165\": 4,\n\t\t\t\t\"166\": 7,\n\t\t\t\t\"167\": 6,\n\t\t\t\t\"168\": 11,\n\t\t\t\t\"169\": 9,\n\t\t\t\t\"170\": 12,\n\t\t\t\t\"171\": 4,\n\t\t\t\t\"172\": 3,\n\t\t\t\t\"173\": 6,\n\t\t\t\t\"174\": 13,\n\t\t\t\t\"175\": 12,\n\t\t\t\t\"176\": 9,\n\t\t\t\t\"177\": 12,\n\t\t\t\t\"178\": 12,\n\t\t\t\t\"179\": 11,\n\t\t\t\t\"180\": 15,\n\t\t\t\t\"181\": 10,\n\t\t\t\t\"182\": 11,\n\t\t\t\t\"183\": 11,\n\t\t\t\t\"184\": 13,\n\t\t\t\t\"185\": 10,\n\t\t\t\t\"186\": 14,\n\t\t\t\t\"187\": 10,\n\t\t\t\t\"188\": 10,\n\t\t\t\t\"189\": 8,\n\t\t\t\t\"190\": 4,\n\t\t\t\t\"191\": 4,\n\t\t\t\t\"192\": 11,\n\t\t\t\t\"193\": 11,\n\t\t\t\t\"194\": 9,\n\t\t\t\t\"195\": 11,\n\t\t\t\t\"196\": 10,\n\t\t\t\t\"197\": 8,\n\t\t\t\t\"198\": 3,\n\t\t\t\t\"199\": 5,\n\t\t\t\t\"200\": 8,\n\t\t\t\t\"201\": 5,\n\t\t\t\t\"202\": 7,\n\t\t\t\t\"203\": 5,\n\t\t\t\t\"204\": 6,\n\t\t\t\t\"205\": 5,\n\t\t\t\t\"206\": 2,\n\t\t\t\t\"208\": 2,\n\t\t\t\t\"209\": 6,\n\t\t\t\t\"210\": 4,\n\t\t\t\t\"211\": 2,\n\t\t\t\t\"212\": 1,\n\t\t\t\t\"213\": 2,\n\t\t\t\t\"214\": 3,\n\t\t\t\t\"215\": 2,\n\t\t\t\t\"216\": 1,\n\t\t\t\t\"217\": 2,\n\t\t\t\t\"218\": 2,\n\t\t\t\t\"219\": 4,\n\t\t\t\t\"220\": 1,\n\t\t\t\t\"221\": 2,\n\t\t\t\t\"222\": 1,\n\t\t\t\t\"223\": 1,\n\t\t\t\t\"224\": 2,\n\t\t\t\t\"233\": 1,\n\t\t\t\t\"235\": 2,\n\t\t\t\t\"237\": 1,\n\t\t\t\t\"7228\": 1,\n\t\t\t\t\"11975\": 1,\n\t\t\t\t\"20827\": 1,\n\t\t\t\t\"59774\": 1,\n\t\t\t\t\"59790\": 2,\n\t\t\t\t\"59793\": 1,\n\t\t\t\t\"59797\": 1,\n\t\t\t\t\"59799\": 1,\n\t\t\t\t\"59800\": 1,\n\t\t\t\t\"59805\": 1,\n\t\t\t\t\"59810\": 1,\n\t\t\t\t\"59812\": 1,\n\t\t\t\t\"59816\": 1,\n\t\t\t\t\"59817\": 1,\n\t\t\t\t\"59828\": 1,\n\t\t\t\t\"59829\": 1,\n\t\t\t\t\"59830\": 1,\n\t\t\t\t\"68769\": 1,\n\t\t\t\t\"80023\": 1\n\t\t\t}\n\t\t}\n\t}\n}"
]
]
}
],
"meta": {
"nf-test": "0.8.4",
"nextflow": "24.04.2"
},
"timestamp": "2024-06-21T16:29:27.186894"
}
}
26 changes: 26 additions & 0 deletions tests/modules/bamToFastq/samtools/main.nf.test
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
nextflow_workflow {

name "Test Workflow bamToFastq"
script "modules/bamToFastq/samtools/main.nf"
workflow "bamToFastq"

test("Should convert BAM to FastQ (detecting protocol)") {

when {
workflow {
"""
input[0] = Channel.from([
[ "sample3", "test3", file("${baseDir}/data/sample3_m4_n10_toGenome.bam"), "bam", "GenomeAlignments" ]
])
"""
}
}

then {
assert workflow.success
assert snapshot(workflow.out).match()
}

}

}
28 changes: 28 additions & 0 deletions tests/modules/bamToFastq/samtools/main.nf.test.snap
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
{
"Should convert BAM to FastQ (detecting protocol)": {
"content": [
{
"0": [
[
"sample3",
"test3",
[
"test3_1.fastq.gz:md5,acac9b626a3bba40b309c4fb880360f7",
"test3_2.fastq.gz:md5,877a915ea5eb84a315f41a1fa54bf5f1"
],
"fastq",
[
"FqRd1",
"FqRd2"
]
]
]
}
],
"meta": {
"nf-test": "0.8.4",
"nextflow": "24.04.2"
},
"timestamp": "2024-06-21T16:29:39.476232"
}
}
26 changes: 26 additions & 0 deletions tests/modules/bamToFastq/samtools/main.tofastq.nf.test
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
nextflow_process {

name "Test Process toFastq"
script "modules/bamToFastq/samtools/main.nf"
process "toFastq"

test("Should convert BAM to FastQ") {

when {
process {
"""
input[0] = Channel.from([
[ "sample3", "test3", file("${baseDir}/data/sample3_m4_n10_toGenome.bam"), "bam", "GenomeAlignments", "PairedEnd"]
])
"""
}
}

then {
assert process.success
assert snapshot(process.out).match()
}

}

}
28 changes: 28 additions & 0 deletions tests/modules/bamToFastq/samtools/main.tofastq.nf.test.snap
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
{
"Should convert BAM to FastQ": {
"content": [
{
"0": [
[
"sample3",
"test3",
[
"test3_1.fastq.gz:md5,acac9b626a3bba40b309c4fb880360f7",
"test3_2.fastq.gz:md5,877a915ea5eb84a315f41a1fa54bf5f1"
],
"fastq",
[
"FqRd1",
"FqRd2"
]
]
]
}
],
"meta": {
"nf-test": "0.8.4",
"nextflow": "24.04.2"
},
"timestamp": "2024-06-21T16:29:17.900642"
}
}

0 comments on commit 14afcfa

Please sign in to comment.