diff --git a/Samples.csv b/Samples.csv index 675adf6..764cd20 100644 --- a/Samples.csv +++ b/Samples.csv @@ -1,2 +1,3 @@ -id,data_directory,genes_2_rm,mito_genes -test, +id,data_directory,features_2_rm,mito_genes +samp_1,/Users/rgrindle/Desktop/mdibl/wd/scRNA-seq/test_pipe/Samp1,,test_mito.csv +samp_2,/Users/rgrindle/Desktop/mdibl/wd/scRNA-seq/test_pipe/Samp2,,test_mito.csv diff --git a/conf/modules.config b/conf/modules.config index 39e8138..8798d4f 100644 --- a/conf/modules.config +++ b/conf/modules.config @@ -12,11 +12,65 @@ process { - publishDir = [ - path: { "${params.outdir}/${task.process.tokenize(':')[-1].tokenize('_')[0].toLowerCase()}" }, - mode: params.publish_dir_mode, - saveAs: { filename -> filename.equals('versions.yml') ? null : filename } - ] + withName: 'DISPLAY_REDUCTION' { + publishDir = [ + path: { "${params.outdir}/Group_Analysis/${meta}/Dimensional_Reduction" }, + mode: params.publish_dir_mode + ] + ext.args = params.integration_method + } + + withName: 'FIND_NN_CLUSTER' { + publishDir = [ + path: { "${params.outdir}/Group_Analysis/${meta.group}/Nearest_Neighbor-Clusters" }, + mode: params.publish_dir_mode + ] + } + + withName: 'INTEGRATION' { + publishDir = [ + path: { "${params.outdir}/Group_Analysis/${meta}/Integration" }, + mode: params.publish_dir_mode + ] + } + + withName: 'RUN_PCA' { + publishDir = [ + path: { "${params.outdir}/Group_Analysis/${meta}/PCA" }, + mode: params.publish_dir_mode + ] + ext.args = params.pcMax + } + + withName: 'MERGE_SO' { + publishDir = [ + path: { "${params.outdir}/Group_Analysis/${meta}/Merge" }, + mode: params.publish_dir_mode + ] + ext.args2 = params.n_features_2_scale + ext.args3 = params.scale_method + } + + withName: 'FIND_DOUBLETS' { + publishDir = [ + path: { "${params.outdir}/Sample_Processing/${meta.id}/Doublet_Detection" }, + mode: params.publish_dir_mode + ] + } + + withName: 'NORMALIZE_QC' { + publishDir = [ + path: { "${params.outdir}/Sample_Processing/${meta.id}/Normalize_QC" }, + mode: params.publish_dir_mode + ] + } + + withName: 'MAKE_SEURAT' { + publishDir = [ + path: { "${params.outdir}/Sample_Processing/${meta.id}/Inital_SO" }, + mode: params.publish_dir_mode + ] + } withName: SAMPLESHEET_CHECK { publishDir = [ @@ -26,9 +80,6 @@ process { ] } - withName: FASTQC { - ext.args = '--quiet' - } withName: CUSTOM_DUMPSOFTWAREVERSIONS { publishDir = [ diff --git a/modules/local/doubletfinder.nf b/modules/local/doubletfinder.nf index 5f2f8fc..32329cd 100644 --- a/modules/local/doubletfinder.nf +++ b/modules/local/doubletfinder.nf @@ -1,7 +1,7 @@ process FIND_DOUBLETS { tag "${meta.id}" - label 'process_medium' + label 'process_small' conda "conda-forge::python=3.9.5" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? @@ -12,11 +12,12 @@ process FIND_DOUBLETS { tuple val(meta), path (rds) tuple val(meta), path (data_directory) val vars_2_regress - + output: - tuple val(meta), path ("*_DoubletsRemoved.rds"), emit: rds - path("*.validation.log"), emit: log + tuple val(meta), path ("*_DoubletsRemoved.rds"), emit: rds + tuple val(meta), path("*.validation.log"), emit: log + path("*.pdf") //path ("versions.yml"), emit: versions when: @@ -30,12 +31,8 @@ process FIND_DOUBLETS { $vars_2_regress \\ $data_directory \\ ${meta.id} \\ - ${args} + ${args} - //cat <<-END_VERSIONS > versions.yml - //"${task.process}": - //Seurat: \$(echo \$( version) | sed "s/, version //g" ) - //END_VERSIONS """ stub: @@ -46,4 +43,4 @@ process FIND_DOUBLETS { END_VERSIONS """ -} +} diff --git a/modules/local/find_NN_clusters.nf b/modules/local/find_NN_clusters.nf index 06dab60..8bce753 100644 --- a/modules/local/find_NN_clusters.nf +++ b/modules/local/find_NN_clusters.nf @@ -1,7 +1,7 @@ process FIND_NN_CLUSTER { - tag "${meta.id}" - label 'process_medium' + tag "${meta.group}" + label 'process_small' conda "conda-forge::python=3.9.5" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? @@ -10,35 +10,37 @@ process FIND_NN_CLUSTER { input: tuple val(meta), path (rds) - path validation_log + tuple val(meta), path (validation_log) val resolutions - val integration_method + val integration_tool output: tuple val(meta), path ("*_Clustered.rds"), emit: rds - path("*validation.log"), emit: log + tuple val(meta), path("*validation.log"), emit: log + path("markers") + path("*.pdf") //path ("versions.yml"), emit: versions when: task.ext.when == null || task.ext.when script: - def args = task.ext.args ?: '' + if (meta.integrated) { + integration_method = integration_tool + } else { + integration_method = "NULL" + } + def args = task.ext.args ?: '' """ - pcMax=\$(paste -s <(grep PC $validation_log| grep -E -o "[0-9]") | sed 's|\t||') + pcMax=\$(paste -s <(grep PC $validation_log| grep -E -o "[0-9]") | sed 's|\\t||') FindNeighborsClustersMarkers.R \\ $rds\\ $resolutions \\ \$pcMax \\ - $integration \\ + ${integration_method} \\ ${meta.group} \\ - ${args} - - //cat <<-END_VERSIONS > versions.yml - //"${task.process}": - //Seurat: \$(echo \$( version) | sed "s/, version //g" ) - //END_VERSIONS + ${args} """ stub: @@ -49,4 +51,4 @@ process FIND_NN_CLUSTER { END_VERSIONS """ -} +} diff --git a/modules/local/integration.nf b/modules/local/integration.nf index b5187cf..3d746c7 100644 --- a/modules/local/integration.nf +++ b/modules/local/integration.nf @@ -1,7 +1,7 @@ process INTEGRATION { - tag "${meta.group}" - label 'process_medium' + tag "${meta}" + label 'process_small' conda "conda-forge::python=3.9.5" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? @@ -11,10 +11,10 @@ process INTEGRATION { input: tuple val(meta), path (rds) val integration_method - + output: tuple val(meta), path ("*_Integrated.rds"), emit: rds - path("*validation.log"), emit: log + tuple val(meta), path("*validation.log"), emit: log //path ("versions.yml"), emit: versions when: @@ -26,13 +26,9 @@ process INTEGRATION { Integration.R \\ $rds \\ $integration_method \\ - ${meta.group} \\ - ${args} + ${meta} \\ + ${args} - //cat <<-END_VERSIONS > versions.yml - //"${task.process}": - //Seurat: \$(echo \$( version) | sed "s/, version //g" ) - //END_VERSIONS """ stub: @@ -43,4 +39,4 @@ process INTEGRATION { END_VERSIONS """ -} +} diff --git a/modules/local/makeseurat.nf b/modules/local/makeseurat.nf index 4c6ab7c..0fb58ee 100644 --- a/modules/local/makeseurat.nf +++ b/modules/local/makeseurat.nf @@ -16,9 +16,9 @@ process MAKE_SEURAT { val gene_identifier output: - tuple val(meta), path ("*_SO.rds"), emit: rds - path("*.validation.log"), emit: log - path ("versions.yml"), emit: versions + tuple val(meta), path ("*_SO.rds"), emit: rds + tuple val(meta), path("*_Validation.log"), emit: log + path ("versions.yml"), emit: versions when: task.ext.when == null || task.ext.when @@ -34,11 +34,13 @@ process MAKE_SEURAT { $gene_identifier \\ $genes_2_rm \\ ${meta.id} \\ - ${meta.group} \\ + "${meta.groups}" \\ $min_cells \\ $min_features \\ ${args} + perl -i -pe 's/"//g;s/\\[\\d\\d?\\d?\\] //g' fileName.log *_Validation.log + cat <<-END_VERSIONS > versions.yml "${task.process}": Seurat: \$(echo \$( version) | sed "s/, version //g" ) diff --git a/modules/local/merge.nf b/modules/local/merge.nf index e303baa..bcdaf48 100644 --- a/modules/local/merge.nf +++ b/modules/local/merge.nf @@ -1,7 +1,7 @@ process MERGE_SO { - tag "${meta.group}" - label 'process_medium' + tag "${meta}" + label 'process_small' conda "conda-forge::python=3.9.5" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? @@ -15,23 +15,24 @@ process MERGE_SO { output: tuple val(meta), path ("*Merged_SO.rds"), emit: rds path("*validation.log"), emit: log + path("*.pdf") //path ("versions.yml"), emit: versions when: task.ext.when == null || task.ext.when script: + def n_features = task.ext.args2 ?: 'ALL' + def scale_method = task.ext.args3 ?: 'SCT' def args = task.ext.args ?: '' """ Merge.R \\ - ${Meta.group} \\ + ${meta} \\ $vars_2_regress \\ - ${args} + $n_features \\ + $scale_method \\ + ${args} - //cat <<-END_VERSIONS > versions.yml - //"${task.process}": - //Seurat: \$(echo \$( version) | sed "s/, version //g" ) - //END_VERSIONS """ stub: @@ -42,4 +43,4 @@ process MERGE_SO { END_VERSIONS """ -} +} diff --git a/modules/local/normalize_qc.nf b/modules/local/normalize_qc.nf index cea75df..4f588a0 100644 --- a/modules/local/normalize_qc.nf +++ b/modules/local/normalize_qc.nf @@ -1,7 +1,7 @@ process NORMALIZE_QC { tag "${meta.id}" - label 'process_medium' + label 'process_small' conda "conda-forge::python=3.9.5" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? @@ -16,11 +16,11 @@ process NORMALIZE_QC { val ncount_lower val ncount_upper val max_mito_pct - + output: - tuple val(meta), path ("*_QC.rds"), emit: rds - path("*.validation.log"), emit: log + tuple val(meta), path ("*_QC.rds"), emit: rds + tuple val(meta), path("*.validation.log"), emit: log //path ("versions.yml"), emit: versions when: @@ -28,6 +28,9 @@ process NORMALIZE_QC { script: def args = task.ext.args ?: '' + if (mito_genes.getClass().name == "nextflow.util.BlankSeparatedList") { + mito_genes = 'NULL' + } """ Normalize_QC.R \\ $mito_genes \\ @@ -38,12 +41,12 @@ process NORMALIZE_QC { $max_mito_pct \\ $rds \\ ${meta.id} \\ - ${args} + ${args} - //cat <<-END_VERSIONS > versions.yml - //"${task.process}": - //Seurat: \$(echo \$( version) | sed "s/, version //g" ) - //END_VERSIONS + cat <<-END_VERSIONS > versions.yml + "${task.process}": + Seurat: \$(echo \$( version) | sed "s/, version //g" ) + END_VERSIONS """ stub: @@ -54,4 +57,4 @@ process NORMALIZE_QC { END_VERSIONS """ -} +} diff --git a/modules/local/plotting.nf b/modules/local/plotting.nf new file mode 100644 index 0000000..3bea17b --- /dev/null +++ b/modules/local/plotting.nf @@ -0,0 +1,60 @@ +process DISPLAY_REDUCTION { + + tag "${meta}" + label 'process_small' + + conda "conda-forge::python=3.9.5" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/python:3.9--1' : + 'docker.io/mdiblbiocore/seurat:latest' }" + + input: + tuple val(meta), path (rds_file) + tuple val(meta), path(validation_log) + val integrated + val resolutions + val makeLoupe + val integration_tool + + + + output: + tuple val(meta), path ("*Final.rds"), emit: rds + path("*validation.log"), emit: log + path("*.loupe") + path("*.pdf") + //path ("versions.yml"), emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + if (integrated) { + integration_method = integration_tool + } else { + integration_method = "NULL" + } + def args = task.ext.args1 ?: 'NULL' + """ + pcMax=\$(paste -s <(grep PC $validation_log| grep -E -o "[0-9]") | sed 's|\\t||') + + Plotting.R \ + $rds_file \ + $resolutions \ + \$pcMax \ + $integration_method \ + $meta \ + $makeLoupe \ + ${args} + + """ + + stub: + """ + cat <<-END_VERSIONS > versions.yml + "${task.process}": + Seurat: \$(echo \$(Seurat --version) | sed "s/Seurat, version //g" ) + END_VERSIONS + """ + +} diff --git a/modules/local/runpca.nf b/modules/local/runpca.nf index c1ea984..35fd773 100644 --- a/modules/local/runpca.nf +++ b/modules/local/runpca.nf @@ -1,7 +1,7 @@ process RUN_PCA { - tag "${meta.group}" - label 'process_medium' + tag "${meta}" + label 'process_small' conda "conda-forge::python=3.9.5" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? @@ -10,29 +10,26 @@ process RUN_PCA { input: tuple val(meta), path (rds) - val pcMax output: tuple val(meta), path ("*_PCA.rds"), emit: rds - path("*.validation.log"), emit: log + tuple val(meta), path("*.validation.log"), emit: log + path("*.pdf") //path ("versions.yml"), emit: versions when: task.ext.when == null || task.ext.when script: - def args = task.ext.args ?: '' + def pcMax = task.ext.args ?: 'null' + def args = task.ext.args2 ?: '' """ RunPCA.R \\ $rds \\ - $pcMax \\ - ${meta.group} \\ - ${args} - - //cat <<-END_VERSIONS > versions.yml - //"${task.process}": - //Seurat: \$(echo \$( version) | sed "s/, version //g" ) - //END_VERSIONS + ${pcMax} \\ + ${meta} \\ + ${args} + """ stub: @@ -43,4 +40,4 @@ process RUN_PCA { END_VERSIONS """ -} +} diff --git a/nextflow.config b/nextflow.config index b85c644..ef2f8e0 100644 --- a/nextflow.config +++ b/nextflow.config @@ -13,7 +13,8 @@ params { // Input options input = null sample_sheet = null - design_sheet = null + contrast_sheet = null + n_variables = null // MakeSeurat gene_identifier = null @@ -26,20 +27,30 @@ params { ncount_lower = null ncount_upper = null max_mito_pct = null - + //DoubletFinder vars_2_regress = null + //Merge + n_features_2_scale = null + scale_method = null + //RunPCA pcMax = null - + //Integration integration_method = null //Find_NN_Cluster - resolutions = null + resolutions = null - // References + //Plotting - UMAP/TSNE + makeLoupe = true + + // TEST + meta_test = null + rds_test = null + log_test = null // MultiQC options multiqc_config = null @@ -66,7 +77,7 @@ params { custom_config_base = "https://raw.githubusercontent.com/nf-core/configs/${params.custom_config_version}" config_profile_contact = null config_profile_url = null - + // Max resource options // Defaults only, expecting to be overwritten diff --git a/nextflow_schema.json b/nextflow_schema.json index b165334..45cb166 100644 --- a/nextflow_schema.json +++ b/nextflow_schema.json @@ -289,19 +289,19 @@ "type": "integer" }, "nfeature_lower": { - "type": "string" + "type": "integer" }, "nfeature_upper": { - "type": "string" + "type": "integer" }, "ncount_lower": { - "type": "string" + "type": "integer" }, "ncount_upper": { - "type": "string" + "type": "integer" }, "max_mito_pct": { - "type": "string" + "type": "integer" }, "vars_2_regress": { "type": "string" diff --git a/workflows/scscape.nf b/workflows/scscape.nf index f9e9cd4..576ebec 100644 --- a/workflows/scscape.nf +++ b/workflows/scscape.nf @@ -36,14 +36,17 @@ ch_multiqc_custom_methods_description = params.multiqc_methods_description ? fil // // SUBWORKFLOW: Consisting of a mix of local and nf-core/modules // -include { INPUT_CHECK } from '../subworkflows/local/input_check' -include { MAKE_SEURAT } from '../modules/local/makeseurat.nf' -include { NORMALIZE_QC } from '../modules/local/normalize_qc.nf' -include { FIND_DOUBLETS } from '../modules/local/doubletfinder.nf' -include { MERGE_SO } from '../modules/local/merge.nf' -include { RUN_PCA } from '../modules/local/runpca.nf' -include { INTEGRATION } from '../modules/local/integration.nf' -include { FIND_NN_CLUSTER } from '../modules/local/find_NN_clusters.nf' +include { INPUT_CHECK } from '../subworkflows/local/input_check' +include { MAKE_SEURAT } from '../modules/local/makeseurat.nf' +include { NORMALIZE_QC } from '../modules/local/normalize_qc.nf' +include { FIND_DOUBLETS } from '../modules/local/doubletfinder.nf' +include { MERGE_SO } from '../modules/local/merge.nf' +include { MERGE_SO as SCALE_SO } from '../modules/local/merge.nf' +include { RUN_PCA as PCA_MULT } from '../modules/local/runpca.nf' +include { RUN_PCA as PCA_SING } from '../modules/local/runpca.nf' +include { INTEGRATION } from '../modules/local/integration.nf' +include { FIND_NN_CLUSTER } from '../modules/local/find_NN_clusters.nf' +include { DISPLAY_REDUCTION } from '../modules/local/plotting.nf' /* ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ @@ -72,20 +75,174 @@ workflow SCSCAPE { ch_versions = Channel.empty() ch_samples = Channel.fromList(samplesheetToList(params.sample_sheet, "./assets/schema_input.json")) - ch_samples.view() + ch_contrasts_file = Channel.from(file(params.contrast_sheet)) + ch_contrasts_file.splitCsv ( header:true, sep:(params.contrast_sheet.endsWith('tsv') ? '\t' : ',')) + .flatMap().filter { !(it.toString().toUpperCase().contains("FALSE")) } + .map { it -> + if (it.toString().contains("id")){ + lhm = new LinkedHashMap() + lhm['id'] = it.toString().split("=")[1] + return lhm + } else { + return it.toString().split("=")[0] + } + } + .collect().map { it.reverse() }.flatMap() + .buffer { it instanceof LinkedHashMap } + .map { it.reverse() } + .set { ch_contrasts } + + ch_contrasts.join(ch_samples).flatMap() + .map { it -> + if ( it instanceof LinkedHashMap ){ + group_ls = new ArrayList() + return it + [ groups: group_ls ] + } else if (it instanceof java.lang.String ){ + group_ls.add(it) + return + } else if (it instanceof java.util.ArrayList ){ + return it + } else { + return it + } + } + .map { it -> + if (it instanceof LinkedHashMap){ + return [id: it.id, groups: it.groups.sort()] + } else { + return it + } + }.collect(flat: false).map { it.reverse() }.flatMap() + .buffer { it instanceof LinkedHashMap } + .map { it.reverse() } + .set { ch_updated_meta } ch_init_rds = MAKE_SEURAT ( - ch_samples.map { [it[0], it[1]] }, - ch_samples.map { [it[0], it[2]] }, + ch_updated_meta.map { [it[0], it[1]] }, + ch_updated_meta.map { [it[0], it[2]] }, params.min_cells, params.min_features, params.gene_identifier ) + ch_normalized_qc_rds = NORMALIZE_QC ( + ch_init_rds.rds, + ch_updated_meta.map { [it[0], it[3]] }, + params.nfeature_lower, + params.nfeature_upper, + params.ncount_lower, + params.ncount_upper, + params.max_mito_pct + ) + ch_doublet_filtered_rds = FIND_DOUBLETS ( + ch_normalized_qc_rds.rds, + ch_updated_meta.map { [it[0], it[1]] }, + params.vars_2_regress + ) + + ch_doublet_filtered_rds.rds.map { meta, rds -> [ rds, meta.groups ]} + .transpose() + .groupTuple(by: 1) + .map { data, group -> [ group, data ] } + .set { ch_merged_groups } + + ch_merged_groups.branch { group, data -> + single: data.size() == 1 + multiple: data.size() > 1 + none: data.size() < 1 + } + .set { ch_merged_groups } + + ch_merged_so = MERGE_SO ( + ch_merged_groups.multiple, + params.vars_2_regress + ) + + ch_scaled_so = SCALE_SO ( + ch_merged_groups.single, + params.vars_2_regress + ) + + ch_pca_multiple = PCA_MULT ( + ch_merged_so.rds + ) + ch_pca_single = PCA_SING ( + ch_scaled_so.rds + ) + + ch_pca_single.rds.join(ch_pca_single.log).set { ch_pca_single_updated } + ch_pca_multiple.rds.join(ch_pca_multiple.log).set { ch_pca_multiple_updated } + + ch_pca_single_updated.map { meta, rds, logs -> + if (true) { + new_meta = new LinkedHashMap() + new_meta['group'] = meta + new_meta['integrated'] = false + return [ new_meta, rds, logs ] + } else { + return [ meta, rds, logs ] + } + } + .set { ch_pca_single_updated } + + if (params.integration_method){ + ch_integrated = INTEGRATION ( + ch_pca_multiple_updated.map { meta, rds, logs -> [ meta, rds ] }, + params.integration_method + ) + ch_integrated.rds.join(ch_pca_multiple.log).set { ch_integrated_all } + ch_integrated_all.map { meta, rds, logs -> + if (true) { + new_meta = new LinkedHashMap() + new_meta['group'] = meta + new_meta['integrated'] = true + return [ new_meta, rds, logs ] + } else { + return [ meta, rds, logs ] + } + }.set { ch_dimensions_def } + } else { + ch_pca_multiple.rds.map { meta, rds, logs -> + if (true) { + new_meta = new LinkedHashMap() + new_meta['group'] = meta + new_meta['integrated'] = false + return [ new_meta, rds, logs ] + } else { + return [ meta, rds, logs ] + } + } + .set { ch_dimensions_def } + } + + ch_dim_def_all = Channel.empty() + ch_dim_def_all.mix(ch_pca_single_updated).set { ch_dim_def_all } + ch_dim_def_all.mix(ch_dimensions_def).set { ch_dim_def_all } + //ch_dim_def_all.view() + + ch_nn_clusters = FIND_NN_CLUSTER ( + ch_dim_def_all.map {meta, rds, log -> [meta, rds]}, + ch_dim_def_all.map {meta, rds, log -> [meta, log]}, + params.resolutions, + params.integration_method + ) + ch_nn_clusters.rds.map { meta, rds -> [ meta.group, meta.integrated, rds ] } + .join( ch_pca_multiple.log, by: [0, 0]) + .join( ch_pca_single.log, by: [0, 0], remainder: true) + .set { ch_nn_clusters_w_log } - CUSTOM_DUMPSOFTWAREVERSIONS ( - ch_versions.unique().collectFile(name: 'collated_versions.yml') + ch_nn_clusters_w_log.view() + DISPLAY_REDUCTION ( + ch_nn_clusters_w_log.map { [ it[0], it[2] ] }, + ch_nn_clusters_w_log.map { [ it[0], it[3] ] }, + ch_nn_clusters_w_log.map { it[1] }, + params.resolutions, + params.makeLoupe, + params.integration_method ) + //CUSTOM_DUMPSOFTWAREVERSIONS ( + // ch_versions.unique().collectFile(name: 'collated_versions.yml') + //) // // MODULE: MultiQC