Skip to content

Commit

Permalink
Merge remote-tracking branch 'upstream/main' into remove_hail_docker
Browse files Browse the repository at this point in the history
# Conflicts:
#	inputs/values/dockers.json
#	inputs/values/dockers_azure.json
  • Loading branch information
VJalili committed Apr 29, 2024
2 parents d779ae8 + 076529c commit e1a4dd1
Show file tree
Hide file tree
Showing 30 changed files with 2,538 additions and 163 deletions.
22 changes: 9 additions & 13 deletions dockerfiles/melt/run_MELT_2.0.5.sh
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@ set -euo pipefail
##### Usage statement
usage(){
cat <<EOF
usage: runMELT.sh bam ref cov read_len mean_is MELT_DIR RUN_DIR REF_VER
usage: runMELT.sh bam ref cov read_len mean_is MELT_DIR REF_VER
Runs MELT for mobile element detection. Requires paired-end deep (>10X) WGS data mapped with BWA.
Positional arguments (all required):
bam Full path to mapped bam file (bam.bai assumed accompanying the BAM)
Expand All @@ -28,15 +28,14 @@ cat <<EOF
read_len Mean read length of library
mean_is Mean insert size of library
MELT_DIR Full path to MELT install directory
RUN_DIR Full path to directory for MELT output
REF_VER Reference version (19|38)
EOF
}

##### Args parsing and validation
if [[ "$#" -eq 0 ]]; then
usage && exit 0;
elif [[ "$#" -lt 8 ]]; then
elif [[ "$#" -lt 7 ]]; then
echo "At least one of the required parameters is not properly set by the given command:"
temp_args="$@" && echo "$0 ${temp_args}" && exit 1;
fi
Expand All @@ -51,11 +50,10 @@ cov=$3
read_len=$4
mean_is=$5
MELT_DIR=$6
RUN_DIR=$7
REF_VER=$8
REF_VER=$7

##### Check for required input (unset or empty)
if [ -z "${bam}" ] || [ -z "${ref}" ] || [ -z "${cov}" ] || [ -z "${read_len}" ] || [ -z "${mean_is}" ] || [ -z "${MELT_DIR}" ] || [ -z "${RUN_DIR}" ] || [ -z "${REF_VER}" ]; then
if [ -z "${bam}" ] || [ -z "${ref}" ] || [ -z "${cov}" ] || [ -z "${read_len}" ] || [ -z "${mean_is}" ] || [ -z "${MELT_DIR}" ] || [ -z "${REF_VER}" ]; then
echo "At least one of the required parameters is not properly set by the given command:"
temp_args="$@" && echo "$0 ${temp_args}" && exit 1; # non-zero exit because it indicates user errror
fi
Expand All @@ -76,20 +74,18 @@ read_len=$( echo "${read_len}" | cut -f1 -d\. )
mean_is=$( echo "${mean_is}" | cut -f1 -d\. )

##### remove trailing slash just to make sure
RUN_DIR="${RUN_DIR%/}"
MELT_DIR="${MELT_DIR%/}"

##### Create transposons reference list
if [[ "$REF_VER" == "38" ]]; then
ls "${MELT_DIR}"/me_refs/Hg38/*zip | sed 's/\*//g' > "${RUN_DIR}/transposon_reference.list"
ls "${MELT_DIR}"/me_refs/Hg38/*zip | sed 's/\*//g' > "transposon_reference.list"
GENE_BED_FILE="${MELT_DIR}/add_bed_files/Hg38/Hg38.genes.bed"
elif [[ "$REF_VER" == "19" ]]; then
ls "${MELT_DIR}"/me_refs/1KGP_Hg19/*zip | sed 's/\*//g' > "${RUN_DIR}/transposon_reference.list"
ls "${MELT_DIR}"/me_refs/1KGP_Hg19/*zip | sed 's/\*//g' > "transposon_reference.list"
GENE_BED_FILE="${MELT_DIR}/add_bed_files/1KGP_Hg19/hg19.genes.bed"
fi

##### Create output directory if it doesn't exist. then run MELT Single
mkdir -p "${RUN_DIR}" && cd "${RUN_DIR}"
##### Run MELT Single
java -Xmx"${JVM_MAX_MEM}" \
-jar "${MELT_DIR}/MELT.jar" \
Single \
Expand All @@ -99,6 +95,6 @@ java -Xmx"${JVM_MAX_MEM}" \
-r "${read_len}" \
-e "${mean_is}" \
-d "${MIN_CHR_LENGTH}" \
-t "${RUN_DIR}/transposon_reference.list" \
-t "transposon_reference.list" \
-n "${GENE_BED_FILE}" \
-w "${RUN_DIR}"
-w "."
23 changes: 5 additions & 18 deletions dockerfiles/sv-pipeline-virtual-env/Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@ RUN apt-get -qqy update --fix-missing && \
# install conda packages
# NOTE: need to use scipy=1.7.3 instead of scipy=1.8.0 because it makes hail angry
ARG CONDA_PKGS="cython=0.29.28 numpy=1.22.3 pandas=1.4.2 scipy=1.7.3 scikit-learn=1.0.2 intervaltree=3.1.0 \
matplotlib=3.5.1 natsort=8.1.0 google-cloud-dataproc=4.0.2"
matplotlib=3.5.1 natsort=8.1.0 google-cloud-dataproc=4.0.2 seaborn=0.12.2"
RUN mamba install -qy --freeze-installed -n $CONDA_ENV_NAME -c conda-forge -c bioconda $CONDA_PKGS

# copy in HTSLIB install so that pysam uses same version as is available in pipeline
Expand Down Expand Up @@ -48,9 +48,9 @@ RUN export SETUPTOOLS_VERSION=$(python -c 'import setuptools; print(setuptools._
pip install setuptools==$SETUPTOOLS_VERSION

# pybedtools needs to be installed via pip because it doesn't like the updated python
# hail's latest version is only available via pip or local build
# hail's latest version is only available via pip or local build. Run cache purge in case the base is out of date.
ARG PIP_PKGS="pybedtools==0.9.0 hail==0.2.93"
RUN pip3 --no-cache-dir install $PIP_PKGS
RUN pip3 cache purge && pip3 --no-cache-dir install $PIP_PKGS

# clean unneeded stuff
RUN conda clean -ay --force-pkgs-dirs
Expand Down Expand Up @@ -79,7 +79,7 @@ RUN export NEW_PACKAGES=$(diff_of_lists.sh "$RUN_DEPS" $APT_REQUIRED_PACKAGES) &

# install R packages
ARG R_PACKAGES="assertthat beeswarm BH BSDA caret cli crayon DAAG data.table devtools digest dplyr e1071 fansi fpc \
generics gert glue HardyWeinberg hash latticeExtra magrittr Matrix metap mnormt nlme nloptr nnet \
generics gert glue HardyWeinberg hash latticeExtra magrittr metap mnormt nlme nloptr nnet \
numDeriv perm pillar pkgconfig plogr plyr purrr pwr R6 RColorBrewer Rcpp reshape reshape2 rlang ROCR \
rpart stringi stringr survival tibble tidyr tidyselect utf8 vioplot withr zoo"
ARG BIOCONDUCTOR_PKGS="SNPRelate multtest"
Expand All @@ -89,6 +89,7 @@ RUN export APT_TRANSIENT_PACKAGES=$(diff_of_lists.sh "$BUILD_DEPS" $APT_REQUIRED
apt-get -qqy install --no-install-recommends $BUILD_DEPS $(fix_spaces.sh $APT_REQUIRED_PACKAGES) && \
install_bioconductor_packages.R $BIOCONDUCTOR_PKGS && \
install_deprecated_R_package.sh "https://cran.r-project.org/src/contrib/Archive/MASS/MASS_7.3-58.tar.gz" && \
install_deprecated_R_package.sh "https://cran.r-project.org/src/contrib/Archive/Matrix/Matrix_1.6-5.tar.gz" && \
install_R_packages.R $R_PACKAGES && \
apt-get -qqy remove --purge $APT_TRANSIENT_PACKAGES && \
apt-get -qqy autoremove --purge && \
Expand All @@ -100,18 +101,4 @@ RUN export APT_TRANSIENT_PACKAGES=$(diff_of_lists.sh "$BUILD_DEPS" $APT_REQUIRED
/usr/share/man/?? \
/usr/share/man/??_*

# Install plink2 & KING (for relatedness inference)
ARG KING_URL="https://www.kingrelatedness.com/executables/Linux-king215.tar.gz"
RUN mkdir -p /opt/bin && \
cd /opt/bin && \
wget -q $KING_URL && \
tar -xzf Linux-king215.tar.gz && \
rm -f Linux-king215.tar.gz

ARG PLINK2_URL="https://github.com/chrchang/plink-ng/releases/download/2019/plink2_linux_x86_64_20190107.zip"
RUN cd /opt/bin && \
wget -q $PLINK2_URL && \
unzip plink2_linux_x86_64_20190107.zip && \
rm -f plink2_linux_x86_64_20190107.zip

ENV PATH=/opt/bin:$PATH
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@
{
"FilterGenotypes.vcf": {{ test_batch.concordance_vcf | tojson }},
"FilterGenotypes.output_prefix": {{ test_batch.name | tojson }},
"FilterGenotypes.ploidy_table": {{ test_batch.ploidy_table | tojson }},
"FilterGenotypes.truth_json": {{ test_batch.recalibrate_gq_truth_json | tojson }},

"FilterGenotypes.primary_contigs_fai": {{ reference_resources.primary_contigs_fai | tojson }},
"FilterGenotypes.gq_recalibrator_model_file": {{ reference_resources.aou_recalibrate_gq_model_file | tojson }},
"FilterGenotypes.genome_tracks": {{ reference_resources.recalibrate_gq_genome_tracks | tojson }},
"FilterGenotypes.recalibrate_gq_args": [
"--keep-homvar false",
"--keep-homref true",
"--keep-multiallelic true",
"--skip-genotype-filtering true",
"--min-samples-to-estimate-allele-frequency -1"
],

"FilterGenotypes.ped_file": {{ test_batch.ped_file | tojson }},
"FilterGenotypes.site_level_comparison_datasets": [
{{ reference_resources.ccdg_abel_site_level_benchmarking_dataset | tojson }},
{{ reference_resources.gnomad_v2_collins_site_level_benchmarking_dataset | tojson }},
{{ reference_resources.hgsv_byrska_bishop_site_level_benchmarking_dataset | tojson }},
{{ reference_resources.thousand_genomes_site_level_benchmarking_dataset | tojson }}
],
"FilterGenotypes.sample_level_comparison_datasets": [
{{ reference_resources.hgsv_byrska_bishop_sample_level_benchmarking_dataset | tojson }}
],
"FilterGenotypes.sample_renaming_tsv": {{ reference_resources.hgsv_byrska_bishop_sample_renaming_tsv | tojson }},
"FilterGenotypes.runtime_override_per_sample_benchmark_plot": {
"mem_gb": 30,
"disk_gb": 50
},

"FilterGenotypes.linux_docker": {{ dockers.linux_docker | tojson }},
"FilterGenotypes.gatk_docker": {{ dockers.gq_recalibrator_docker | tojson }},
"FilterGenotypes.sv_base_mini_docker": {{ dockers.sv_base_mini_docker | tojson }},
"FilterGenotypes.sv_pipeline_docker": {{ dockers.sv_pipeline_docker | tojson }}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
{
"MakeGqRecalibratorTrainingSetFromPacBio.vcfs": [{{ test_batch.concordance_vcf | tojson }}],
"MakeGqRecalibratorTrainingSetFromPacBio.training_sample_ids": {{ test_batch.pacbio_samples_list | tojson }},
"MakeGqRecalibratorTrainingSetFromPacBio.output_prefix": {{ test_batch.name | tojson }},
"MakeGqRecalibratorTrainingSetFromPacBio.ploidy_table": {{ test_batch.ploidy_table | tojson }},

"MakeGqRecalibratorTrainingSetFromPacBio.pacbio_sample_ids": {{ test_batch.pacbio_samples | tojson }},
"MakeGqRecalibratorTrainingSetFromPacBio.vapor_files": {{ test_batch.vapor_files | tojson }},
"MakeGqRecalibratorTrainingSetFromPacBio.pbsv_vcfs": {{ test_batch.pacbio_pbsv_vcfs | tojson }},
"MakeGqRecalibratorTrainingSetFromPacBio.pav_vcfs": {{ test_batch.pacbio_pav_vcfs | tojson }},
"MakeGqRecalibratorTrainingSetFromPacBio.sniffles_vcfs": {{ test_batch.pacbio_sniffles_vcfs | tojson }},
"MakeGqRecalibratorTrainingSetFromPacBio.reference_dict": {{ reference_resources.reference_dict | tojson }},

"MakeGqRecalibratorTrainingSetFromPacBio.sv_utils_docker" : {{ dockers.sv_utils_docker | tojson }},
"MakeGqRecalibratorTrainingSetFromPacBio.gatk_docker" : {{ dockers.gatk_docker | tojson }},
"MakeGqRecalibratorTrainingSetFromPacBio.sv_base_mini_docker" : {{ dockers.sv_base_mini_docker | tojson }},
"MakeGqRecalibratorTrainingSetFromPacBio.sv_pipeline_docker" : {{ dockers.sv_pipeline_docker | tojson }},
"MakeGqRecalibratorTrainingSetFromPacBio.linux_docker" : {{ dockers.linux_docker | tojson }}
}
16 changes: 8 additions & 8 deletions inputs/values/dockers.json
Original file line number Diff line number Diff line change
Expand Up @@ -2,20 +2,20 @@
"name": "dockers",
"cnmops_docker": "us.gcr.io/broad-dsde-methods/gatk-sv/cnmops:2024-01-24-v0.28.4-beta-9debd6d7",
"condense_counts_docker": "us.gcr.io/broad-dsde-methods/tsharpe/gatk:4.2.6.1-57-g9e03432",
"gatk_docker": "us.gcr.io/broad-dsde-methods/gatk-sv/gatk:2024-01-15-4.5.0.0-3-gb68fadc5f-NIGHTLY-SNAPSHOT",
"gatk_docker": "us.gcr.io/broad-dsde-methods/eph/gatk:2024-02-16-4.5.0.0-8-gcfd4d87ec-NIGHTLY-SNAPSHOT",
"gatk_docker_pesr_override": "us.gcr.io/broad-dsde-methods/tsharpe/gatk:4.2.6.1-57-g9e03432",
"genomes_in_the_cloud_docker": "us.gcr.io/broad-gotc-prod/genomes-in-the-cloud:2.3.2-1510681135",
"linux_docker": "marketplace.gcr.io/google/ubuntu1804",
"manta_docker": "us.gcr.io/broad-dsde-methods/gatk-sv/manta:2023-09-14-v0.28.3-beta-3f22f94d",
"melt_docker": "us.gcr.io/talkowski-sv-gnomad/melt:3159ce1",
"melt_docker": "us.gcr.io/talkowski-sv-gnomad/melt:a85c92f",
"scramble_docker": "us.gcr.io/broad-dsde-methods/markw/scramble:mw-scramble-99af4c50",
"samtools_cloud_docker": "us.gcr.io/broad-dsde-methods/gatk-sv/samtools-cloud:2024-01-24-v0.28.4-beta-9debd6d7",
"sv_base_docker": "us.gcr.io/broad-dsde-methods/gatk-sv/sv-base:2024-01-24-v0.28.4-beta-9debd6d7",
"sv_base_mini_docker": "us.gcr.io/broad-dsde-methods/gatk-sv/sv-base-mini:2024-01-24-v0.28.4-beta-9debd6d7",
"sv_pipeline_docker": "us.gcr.io/broad-dsde-methods/gatk-sv/sv-pipeline:2024-01-24-v0.28.4-beta-9debd6d7",
"sv_pipeline_updates_docker": "us.gcr.io/broad-dsde-methods/gatk-sv/sv-pipeline:2024-01-24-v0.28.4-beta-9debd6d7",
"sv_pipeline_qc_docker": "us.gcr.io/broad-dsde-methods/gatk-sv/sv-pipeline:2024-01-24-v0.28.4-beta-9debd6d7",
"sv_pipeline_rdtest_docker": "us.gcr.io/broad-dsde-methods/gatk-sv/sv-pipeline:2024-01-24-v0.28.4-beta-9debd6d7",
"sv_pipeline_docker": "us.gcr.io/broad-dsde-methods/gatk-sv/sv-pipeline:2024-04-29-v0.28.4-beta-10c8a221",
"sv_pipeline_updates_docker": "us.gcr.io/broad-dsde-methods/gatk-sv/sv-pipeline:2024-04-29-v0.28.4-beta-10c8a221",
"sv_pipeline_qc_docker": "us.gcr.io/broad-dsde-methods/gatk-sv/sv-pipeline:2024-04-29-v0.28.4-beta-10c8a221",
"sv_pipeline_rdtest_docker": "us.gcr.io/broad-dsde-methods/gatk-sv/sv-pipeline:2024-04-29-v0.28.4-beta-10c8a221",
"wham_docker": "us.gcr.io/broad-dsde-methods/gatk-sv/wham:2024-01-24-v0.28.4-beta-9debd6d7",
"igv_docker": "us.gcr.io/broad-dsde-methods/gatk-sv/igv:mw-xz-fixes-2-b1be6a9",
"duphold_docker": "us.gcr.io/broad-dsde-methods/gatk-sv/duphold:mw-xz-fixes-2-b1be6a9",
Expand All @@ -24,10 +24,10 @@
"pangenie_docker": "us.gcr.io/broad-dsde-methods/vjalili/pangenie:vj-127571f",
"sv-base-virtual-env": "us.gcr.io/broad-dsde-methods/vjalili/sv-base-virtual-env:5994670",
"cnmops-virtual-env": "us.gcr.io/broad-dsde-methods/vjalili/cnmops-virtual-env:5994670",
"sv-pipeline-virtual-env": "us.gcr.io/broad-dsde-methods/gatk-sv/sv-pipeline-virtual-env:2024-01-24-v0.28.4-beta-9debd6d7",
"sv-pipeline-virtual-env": "us.gcr.io/broad-dsde-methods/markw/sv-pipeline-virtual-env:mw-train-genotype-filtering-a9479501",
"samtools-cloud-virtual-env": "us.gcr.io/broad-dsde-methods/vjalili/samtools-cloud-virtual-env:5994670",
"sv-utils-env": "us.gcr.io/broad-dsde-methods/gatk-sv/sv-utils-env:2023-02-01-v0.26.8-beta-9b25c72d",
"sv_utils_docker": "us.gcr.io/broad-dsde-methods/gatk-sv/sv-utils:2024-01-24-v0.28.4-beta-9debd6d7",
"sv_utils_docker": "us.gcr.io/broad-dsde-methods/markw/sv-utils:mw-train-genotype-filtering-a9479501",
"gq_recalibrator_docker": "us.gcr.io/broad-dsde-methods/markw/gatk:mw-tb-form-sv-filter-training-data-899360a",
"str": "us.gcr.io/broad-dsde-methods/gatk-sv/str:2023-05-23-v0.27.3-beta-e537bdd6"
}
12 changes: 6 additions & 6 deletions inputs/values/dockers_azure.json
Original file line number Diff line number Diff line change
Expand Up @@ -2,20 +2,20 @@
"name": "dockers",
"cnmops_docker": "vahid.azurecr.io/gatk-sv/cnmops:2024-01-24-v0.28.4-beta-9debd6d7",
"condense_counts_docker": "vahid.azurecr.io/tsharpe/gatk:4.2.6.1-57-g9e03432",
"gatk_docker": "vahid.azurecr.io/gatk-sv/gatk:2024-01-15-4.5.0.0-3-gb68fadc5f-NIGHTLY-SNAPSHOT",
"gatk_docker": "vahid.azurecr.io/gatk-sv/gatk:2024-02-16-4.5.0.0-8-gcfd4d87ec-NIGHTLY-SNAPSHOT",
"gatk_docker_pesr_override": "vahid.azurecr.io/tsharpe/gatk:4.2.6.1-57-g9e03432",
"genomes_in_the_cloud_docker": "vahid.azurecr.io/genomes-in-the-cloud:2.3.2-1510681135",
"linux_docker": "vahid.azurecr.io/google/ubuntu1804",
"manta_docker": "vahid.azurecr.io/gatk-sv/manta:2023-09-14-v0.28.3-beta-3f22f94d",
"melt_docker": "vahid.azurecr.io/melt:3159ce1",
"melt_docker": "vahid.azurecr.io/melt:a85c92f",
"scramble_docker": "vahid.azurecr.io/scramble:mw-scramble-99af4c50",
"samtools_cloud_docker": "vahid.azurecr.io/gatk-sv/samtools-cloud:2024-01-24-v0.28.4-beta-9debd6d7",
"sv_base_docker": "vahid.azurecr.io/gatk-sv/sv-base:2024-01-24-v0.28.4-beta-9debd6d7",
"sv_base_mini_docker": "vahid.azurecr.io/gatk-sv/sv-base-mini:2024-01-24-v0.28.4-beta-9debd6d7",
"sv_pipeline_docker": "vahid.azurecr.io/gatk-sv/sv-pipeline:2024-01-24-v0.28.4-beta-9debd6d7",
"sv_pipeline_updates_docker": "vahid.azurecr.io/gatk-sv/sv-pipeline:2024-01-24-v0.28.4-beta-9debd6d7",
"sv_pipeline_qc_docker": "vahid.azurecr.io/gatk-sv/sv-pipeline:2024-01-24-v0.28.4-beta-9debd6d7",
"sv_pipeline_rdtest_docker": "vahid.azurecr.io/gatk-sv/sv-pipeline:2024-01-24-v0.28.4-beta-9debd6d7",
"sv_pipeline_docker": "vahid.azurecr.io/gatk-sv/sv-pipeline:2024-04-29-v0.28.4-beta-10c8a221",
"sv_pipeline_updates_docker": "vahid.azurecr.io/gatk-sv/sv-pipeline:2024-04-29-v0.28.4-beta-10c8a221",
"sv_pipeline_qc_docker": "vahid.azurecr.io/gatk-sv/sv-pipeline:2024-04-29-v0.28.4-beta-10c8a221",
"sv_pipeline_rdtest_docker": "vahid.azurecr.io/gatk-sv/sv-pipeline:2024-04-29-v0.28.4-beta-10c8a221",
"wham_docker": "vahid.azurecr.io/gatk-sv/wham:2024-01-24-v0.28.4-beta-9debd6d7",
"igv_docker": "vahid.azurecr.io/gatk-sv/igv:mw-xz-fixes-2-b1be6a9",
"duphold_docker": "vahid.azurecr.io/gatk-sv/duphold:mw-xz-fixes-2-b1be6a9",
Expand Down
6 changes: 3 additions & 3 deletions inputs/values/hgdp.json
Original file line number Diff line number Diff line change
Expand Up @@ -32,9 +32,9 @@
"concordance_vcf": "gs://gatk-sv-hgdp/mw-sv-concordance-update/hgdp.concordance.vcf.gz",
"concordance_vcf_index": "gs://gatk-sv-hgdp/mw-sv-concordance-update/hgdp.concordance.vcf.gz.tbi",

"pacbio_sample_concordance_vcf": "gs://gatk-sv-hgdp/mw-sv-concordance-update/training/hgdp.concordance.subset.vcf.gz",
"pacbio_sample_concordance_vcf_index": "gs://gatk-sv-hgdp/mw-sv-concordance-update/training/hgdp.concordance.subset.vcf.gz.tbi",
"recalibrate_gq_truth_json": "gs://gatk-sv-hgdp/mw-sv-concordance-update/training/hgdp.gq_training_labels.json",
"pacbio_sample_concordance_vcf": "gs://gatk-sv-hgdp/mw-train-genotype-filtering/hgdp.pacbio_samples.vcf.gz",
"pacbio_sample_concordance_vcf_index": "gs://gatk-sv-hgdp/mw-train-genotype-filtering/hgdp.pacbio_samples.vcf.gz.tbi",
"recalibrate_gq_truth_json": "gs://gatk-sv-hgdp/mw-train-genotype-filtering/hgdp.gq_training_labels.json",

"aou_recalibrated_vcf": "gs://gatk-sv-hgdp/mw-sv-concordance-update/hgdp.concordance.aou_gq_recalibrated.vcf.gz",
"aou_recalibrated_vcf_index": "gs://gatk-sv-hgdp/mw-sv-concordance-update/hgdp.concordance.aou_gq_recalibrated.vcf.gz.tbi",
Expand Down
Loading

0 comments on commit e1a4dd1

Please sign in to comment.