Skip to content

Commit

Permalink
feature naming option addition - combine gene_name & gene_id
Browse files Browse the repository at this point in the history
  • Loading branch information
Riley-Grindle committed Sep 10, 2024
1 parent 5049e74 commit 75c42b1
Show file tree
Hide file tree
Showing 2 changed files with 82 additions and 3 deletions.
71 changes: 71 additions & 0 deletions modules/local/feature_naming.nf
Original file line number Diff line number Diff line change
@@ -0,0 +1,71 @@
process FEATURE_NAMING {

tag "${meta.id}"
label 'process_low'

conda "conda-forge::python=3.9.5"
container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
'https://depot.galaxyproject.org/singularity/python:3.9--1' :
'quay.io/biocontainers/gzip:1.11' }"

input:
tuple val(meta), path(sample_files)
tuple val(meta), path(gene_list)

output:
tuple val(meta), path (sample_files), path ("AuxGeneList.csv")
path("versions.yml"), emit: versions

when:
task.ext.when == null || task.ext.when

script:
"""
#!/bin/bash
gzcat ${sample_files}/features.tsv.gz | perl -ane 'if ($F[0] ne $F[1]) { print "$F[0]\t$F[1]::$F[0]\tExpression\n"; } else { print "$F[0]\t$F[1]\tExpression\n"; }' | gzip > features_new.tsv.gz
rm ${sample_files}/features.tsv.gz
mv features_new.tsv.gz ${sample_files}/features.tsv.gz
echo "MTgenes" > MT.csv
cut -f1 -d "," $gene_list | grep -v "^$" | tail -n +2 | perl -pe "s/^/\\t/;s/$/::/" > origMT.csv
gzcat ${sample_files}/features.tsv.gz | grep -f origMT.csv | cut -f2 >> MT.csv
rm origMT.csv
echo "G2Mgenes" > G2M.csv
cut -f2 -d "," $gene_list | grep -v "^$" | tail -n +2 | perl -pe "s/^/\\t/;s/$/::/"> origG2M.csv
gzcat ${sample_files}/features.tsv.gz | grep -f origG2M.csv | cut -f2 >> G2M.csv
rm origG2M.csv
echo "Sgenes" > S.csv
cut -f3 -d "," $gene_list | grep -v "^$" | tail -n +2 | perl -pe "s/^/\\t/;s/$/::/"> origS.csv
gzcat ${sample_files}/features.tsv.gz | grep -f origS.csv | cut -f2 >> S.csv
rm origS.csv
echo "RMgenes" > RM.csv
cut -f4 -d "," $gene_list | grep -v "^$" | tail -n +2 | perl -pe "s/^/\\t/;s/$/::/"> origRM.csv
gzcat ${sample_files}/features.tsv.gz | grep -f origRM.csv | cut -f2 >> RM.csv
rm UpdatedFiles/origRM.csv
paste -d ',' MT.csv G2M.csv S.csv RM.csv > AuxGeneList.csv
rm -r UpdatedFiles
cat <<-END_VERSIONS > versions.yml
"${task.process}":
BASH: \$(echo \$(bash --version) )
END_VERSIONS
"""

stub:
"""
cat <<-END_VERSIONS > versions.yml
"${task.process}":
BASH: \$(echo \$(bash --version) )
END_VERSIONS
"""

}

14 changes: 11 additions & 3 deletions workflows/scscape.nf
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,7 @@ include { INTEGRATION } from '../modules/local/integration.nf'
include { FIND_NN_CLUSTER } from '../modules/local/find_NN_clusters.nf'
include { DISPLAY_REDUCTION } from '../modules/local/plotting.nf'
include { GZIP } from '../modules/local/gzip.nf'
include { FEATURE_NAMING } from '../modules/local/feature_naming.nf'

/*
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
Expand Down Expand Up @@ -127,10 +128,17 @@ workflow SCSCAPE {
.map { it.reverse() }
.set { ch_updated_meta }

ch_updated_meta.view()
if (params.gene_identifier == "COMBINE"){
ch_updated_features = FEATURE_NAMING(
ch_updated_meta.map( it[0], it[1] ),
ch_updated_meta.map( it[0], it[2] )
)
}

ch_updated_features.view()
ch_init_rds = MAKE_SEURAT (
ch_updated_meta.map { [it[0], it[1]] },
ch_updated_meta.map { [it[0], it[2]] },
ch_updated_features.map { [it[0], it[1]] },
ch_updated_features.map { [it[0], it[2]] },
params.min_cells,
params.min_features,
params.gene_identifier
Expand Down

0 comments on commit 75c42b1

Please sign in to comment.