Skip to content

Commit

Permalink
Apptainer def files, env input yamls for build.
Browse files Browse the repository at this point in the history
README instructions for build and run.
  • Loading branch information
meyerkm committed Feb 3, 2025
1 parent 8d8e0cb commit 5ded441
Show file tree
Hide file tree
Showing 5 changed files with 215 additions and 0 deletions.
98 changes: 98 additions & 0 deletions containers/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,98 @@
# Build Container
apptainer build deeprvat_preprocessing.sif apptainer_deeprvat_preprocessing.def
apptainer build deeprvat.sif apptainer_deeprvat.def

NOTE: deeprvat image ~= 5 GB, deeprvat_preprocessing image ~= 1 GB

# Verify Working Image
apptainer exec deeprvat_preprocessing.sif pip list | grep "deeprvat"

# Run Preprocessing
cd ./my_exp_dir
.
├── data
│ └── vcf
│ ├── test_vcf_data_c21_b1.vcf.gz
│ └── test_vcf_data_c22_b1.vcf.gz
├── deeprvat_preprocess_config.yaml
├── deeprvat_preprocessing.sif
└── vcf_files_list.txt


[~/my_exp_dir]$ apptainer run deeprvat_preprocessing.sif snakemake -j 1 --snakefile /opt/deeprvat/pipelines/preprocess_no_qc.snakefile --configfile deeprvat_preprocess_config.yaml --ri -n


# Run Training + Association Testing
cd ./my_exp_dir
.
├── annotations.parquet
├── baseline_results
│   ├── Apolipoprotein_A
│   │   ├── missense
│   │   │   ├── burden
│   │   │   │   └── eval
│   │   │   │   └── burden_associations.parquet
│   │   │   └── skat
│   │   │   └── eval
│   │   │   └── burden_associations.parquet
│   │   └── plof
│   │   ├── burden
│   │   │   └── eval
│   │   │   └── burden_associations.parquet
│   │   └── skat
│   │   └── eval
│   │   └── burden_associations.parquet
│   ├── Calcium
│   │   ├── missense
│   │   │   ├── burden
│   │   │   │   └── eval
│   │   │   │   └── burden_associations.parquet
│   │   │   └── skat
│   │   │   └── eval
│   │   │   └── burden_associations.parquet
│   │   └── plof
│   │   ├── burden
│   │   │   └── eval
│   │   │   └── burden_associations.parquet
│   │   └── skat
│   │   └── eval
│   │   └── burden_associations.parquet
│   ├── Cholesterol
│   │   ├── missense
│   │   │   ├── burden
│   │   │   │   └── eval
│   │   │   │   └── burden_associations.parquet
│   │   │   └── skat
│   │   │   └── eval
│   │   │   └── burden_associations.parquet
│   │   └── plof
│   │   ├── burden
│   │   │   └── eval
│   │   │   └── burden_associations.parquet
│   │   └── skat
│   │   └── eval
│   │   └── burden_associations.parquet
│   ├── Platelet_count
│   │   ├── missense
│   │   │   ├── burden
│   │   │   │   └── eval
│   │   │   │   └── burden_associations.parquet
│   │   │   └── skat
│   │   │   └── eval
│   │   │   └── burden_associations.parquet
│   │   └── plof
│   │   ├── burden
│   │   │   └── eval
│   │   │   └── burden_associations.parquet
│   │   └── skat
│   │   └── eval
│   │   └── burden_associations.parquet
├── deeprvat_config.yaml
├── deeprvat.sif
├── gencode.v38.basic.annotation.gtf.gz
├── genotypes.h5
├── phenotypes.parquet
├── protein_coding_genes.parquet
└── variants.parquet

[~/my_exp_dir]$ apptainer run deeprvat.sif snakemake -j 1 --snakefile /opt/deeprvat/pipelines/training_association_testing.snakefile --configfile deeprvat_config.yaml --ri -n
29 changes: 29 additions & 0 deletions containers/apptainer_deeprvat.def
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
Bootstrap: docker
From: mambaorg/micromamba:2.0-ubuntu22.04

%files
deeprvat_env.yaml /opt/deeprvat_env.yaml
%environment
export MAMBA_ROOT_PREFIX=/opt/conda
export PATH="$MAMBA_ROOT_PREFIX/bin:$PATH"
%post
apt-get -y update && apt-get -y install cargo g++
micromamba install -q -y -n base -f /opt/deeprvat_env.yaml
micromamba clean --all --yes

micromamba run -n base git clone https://github.com/PMBio/deeprvat /opt/deeprvat
micromamba run -n base pip install -e /opt/deeprvat
%runscript
exec micromamba run -n base "$@"
%labels
OWNER: https://github.com/PMBio/deeprvat
EnvironmentFile: /opt/deeprvat_env.yaml
%help
This container containes the deepRVAT repository and the relevant
Python environment created from an environment file.
To run Python from the environment:
apptainer run my_environment.sif python
To see the environment file used to create the environment:
apptainer run my_environment.sif cat /opt/deeprvat_env.yaml
To see what packags are in the environment:
apptainer run my_environment.sif micromamba list
28 changes: 28 additions & 0 deletions containers/apptainer_deeprvat_preprocessing.def
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
Bootstrap: docker
From: mambaorg/micromamba:2.0-ubuntu22.04

%files
deeprvat_preprocessing_env.yml /opt/deeprvat_preprocessing_env.yml
%environment
export MAMBA_ROOT_PREFIX=/opt/conda
export PATH="$MAMBA_ROOT_PREFIX/bin:$PATH"
%post
micromamba install -q -y -n base -f /opt/deeprvat_preprocessing_env.yml
micromamba clean --all --yes

micromamba run -n base git clone https://github.com/PMBio/deeprvat /opt/deeprvat
micromamba run -n base pip install -e /opt/deeprvat
%runscript
exec micromamba run -n base "$@"
%labels
OWNER: https://github.com/PMBio/deeprvat
EnvironmentFile: /opt/deeprvat_preprocessing_env.yml
%help
This container containes the deepRVAT repository and the relevant
Python environment created from an environment file.
To run Python from the environment:
apptainer run my_environment.sif python
To see the environment file used to create the environment:
apptainer run my_environment.sif cat /opt/deeprvat_preprocessing_env.yml
To see what packags are in the environment:
apptainer run my_environment.sif micromamba list
41 changes: 41 additions & 0 deletions containers/deeprvat_env.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,41 @@
name: deeprvat
channels:
- pytorch
- nvidia
- conda-forge
- bioconda
dependencies:
- click=8.1
- cudatoolkit=11.8
- dask=2023.5
- fastparquet=0.5
- git
- h5py=3.1
- mkl==2022.1.0
- numcodecs=0.11
- numpy=1.21
- optuna=2.10
- pandas=1.5
- pyarrow=11.0
- pyranges=0.0.129
- python=3.8
- pytorch=1.13
- pytorch-cuda=11
- pytorch-lightning=1.5
- pyyaml=5.4
- regenie=3.4.1
- scikit-learn=1.1
- scipy=1.10
- setuptools=59.5
- snakemake=7.17
- sqlalchemy=1.4
- statsmodels=0.13
- tqdm=4.59
- zarr=2.13
- Cython=0.29
- parallel=20230922
- pip=23.0.1
- plotnine=0.10.1
- pip:
- git+https://github.com/HealthML/seak@v0.4.3
- bgen==1.6.3
19 changes: 19 additions & 0 deletions containers/deeprvat_preprocessing_env.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
name: deeprvat_preprocess
channels:
- conda-forge
- bioconda
dependencies:
- python=3.8
- click=8.1.3
- h5py=3.8.0
- numpy=1.21.2
- pandas=1.5.1
- tqdm=4.59.0
- pytest=8.2.0
- pyarrow=11.0.0
- snakemake=7.17.1
- bcftools=1.17
- htslib=1.19.1
- samtools=1.17
- mamba=1.4.2
- git

0 comments on commit 5ded441

Please sign in to comment.