Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

bug fixes for REGENIE pipelines #92

Merged
merged 5 commits into from
May 24, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 5 additions & 5 deletions deeprvat/deeprvat/associate.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@
import statsmodels.api as sm
import yaml
from bgen import BgenWriter
from numcodecs import Blosc
from numcodecs import Blosc, JSON
from seak import scoretest
from statsmodels.tools.tools import add_constant
from torch.utils.data import DataLoader, Dataset, Subset
Expand Down Expand Up @@ -295,7 +295,7 @@ def compute_burdens_(
chunk_burden = np.zeros(shape=(n_samples,) + this_burdens.shape[1:])
chunk_y = np.zeros(shape=(n_samples,) + this_y.shape[1:])
chunk_x = np.zeros(shape=(n_samples,) + this_x.shape[1:])
chunk_sampleid = np.zeros(shape=(n_samples))
chunk_sampleid = [""] * n_samples

logger.info(f"Batch size: {batch['rare_variant_annotations'].shape}")

Expand Down Expand Up @@ -333,8 +333,8 @@ def compute_burdens_(
mode="a",
shape=(n_total_samples),
chunks=(None),
dtype=np.float32,
compressor=Blosc(clevel=compression_level),
dtype=object,
object_codec=JSON(),
)
start_idx = i * batch_size
end_idx = min(start_idx + batch_size, chunk_end) # read from chunk shape
Expand Down Expand Up @@ -513,7 +513,7 @@ def make_regenie_input_(
with BgenWriter(
bgen,
n_samples,
samples=list(sample_ids),
samples=list(sample_ids.astype(str)),
metadata="Pseudovariants containing DeepRVAT gene impairment scores. One pseudovariant per gene.",
) as f:
for i in trange(n_genes):
Expand Down
15 changes: 15 additions & 0 deletions example/config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,21 @@ n_repeats: 2

do_scoretest: True

gtf_file: gencode.v38.basic.annotation.gtf.gz

regenie:
step_1:
bgen: imputation.bgen
snplist: imputation.snplist
bsize: 1000
options:
- "--sample imputation.sample"
- "--qt"
step_2:
bsize: 400
options:
- "--qt"

training:
min_variant_count: 1
n_bags: 1
Expand Down
Binary file added example/gencode.v38.basic.annotation.gtf.gz
Binary file not shown.
Binary file added example/imputation.bgen
Binary file not shown.
Binary file added example/imputation.bgen.bgi
Binary file not shown.
Loading