feature naming option addition - combine gene_name & gene_id

mdibl · Sep 10, 2024 · 75c42b1 · 75c42b1
1 parent 5049e74
commit 75c42b1
Show file tree

Hide file tree

Showing 2 changed files with 82 additions and 3 deletions.
diff --git a/modules/local/feature_naming.nf b/modules/local/feature_naming.nf
@@ -0,0 +1,71 @@
+process FEATURE_NAMING {
+
+    tag "${meta.id}"
+    label 'process_low'
+
+    conda "conda-forge::python=3.9.5"
+    container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
+        'https://depot.galaxyproject.org/singularity/python:3.9--1' :
+        'quay.io/biocontainers/gzip:1.11' }"
+
+    input:
+    tuple val(meta), path(sample_files)
+    tuple val(meta), path(gene_list)
+
+    output:
+    tuple val(meta), path (sample_files), path ("AuxGeneList.csv")
+    path("versions.yml"), emit: versions
+
+    when:
+    task.ext.when == null || task.ext.when
+
+    script:
+    """
+    #!/bin/bash
+
+    gzcat ${sample_files}/features.tsv.gz | perl -ane 'if ($F[0] ne $F[1]) { print "$F[0]\t$F[1]::$F[0]\tExpression\n"; } else { print "$F[0]\t$F[1]\tExpression\n"; }' | gzip > features_new.tsv.gz
+
+    rm ${sample_files}/features.tsv.gz
+    mv features_new.tsv.gz ${sample_files}/features.tsv.gz
+
+    echo "MTgenes" > MT.csv
+    cut -f1 -d ","  $gene_list | grep -v "^$" | tail -n +2 | perl -pe "s/^/\\t/;s/$/::/" > origMT.csv
+    gzcat ${sample_files}/features.tsv.gz | grep -f origMT.csv | cut -f2 >> MT.csv
+    rm origMT.csv
+
+    echo "G2Mgenes" > G2M.csv
+    cut -f2 -d ","  $gene_list | grep -v "^$" | tail -n +2 | perl -pe "s/^/\\t/;s/$/::/"> origG2M.csv
+    gzcat ${sample_files}/features.tsv.gz | grep -f origG2M.csv | cut -f2 >> G2M.csv
+    rm origG2M.csv
+
+    echo "Sgenes" > S.csv
+    cut -f3 -d ","  $gene_list | grep -v "^$" | tail -n +2 | perl -pe "s/^/\\t/;s/$/::/"> origS.csv
+    gzcat ${sample_files}/features.tsv.gz | grep -f origS.csv | cut -f2 >> S.csv
+    rm origS.csv
+
+    echo "RMgenes" > RM.csv
+    cut -f4 -d ","  $gene_list | grep -v "^$" | tail -n +2 | perl -pe "s/^/\\t/;s/$/::/"> origRM.csv
+    gzcat ${sample_files}/features.tsv.gz | grep -f origRM.csv | cut -f2 >> RM.csv
+    rm UpdatedFiles/origRM.csv
+
+    paste -d ',' MT.csv G2M.csv S.csv RM.csv > AuxGeneList.csv
+
+    rm -r UpdatedFiles
+
+
+    cat <<-END_VERSIONS > versions.yml
+    "${task.process}":
+        BASH: \$(echo \$(bash --version) )
+    END_VERSIONS
+    """
+
+    stub:
+    """
+    cat <<-END_VERSIONS > versions.yml
+    "${task.process}":
+        BASH: \$(echo \$(bash --version) )
+    END_VERSIONS
+    """
+
+}
+
diff --git a/workflows/scscape.nf b/workflows/scscape.nf
@@ -48,6 +48,7 @@ include { INTEGRATION          } from '../modules/local/integration.nf'
 include { FIND_NN_CLUSTER      } from '../modules/local/find_NN_clusters.nf'
 include { DISPLAY_REDUCTION    } from '../modules/local/plotting.nf'
 include { GZIP                 } from '../modules/local/gzip.nf'
+include { FEATURE_NAMING       } from '../modules/local/feature_naming.nf'
 
 /*
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
@@ -127,10 +128,17 @@ workflow SCSCAPE {
                 .map { it.reverse() }
                 .set { ch_updated_meta }
 
-    ch_updated_meta.view()
+    if (params.gene_identifier == "COMBINE"){
+        ch_updated_features = FEATURE_NAMING(
+            ch_updated_meta.map( it[0], it[1] ),
+            ch_updated_meta.map( it[0], it[2] )
+        )
+    }
+
+    ch_updated_features.view()
     ch_init_rds = MAKE_SEURAT (
-        ch_updated_meta.map { [it[0], it[1]] },
-        ch_updated_meta.map { [it[0], it[2]] },
+        ch_updated_features.map { [it[0], it[1]] },
+        ch_updated_features.map { [it[0], it[2]] },
         params.min_cells,
         params.min_features,
         params.gene_identifier