Skip to content

Commit

Permalink
Merge pull request #111 from ArnovanHilten/epistasis-fix
Browse files Browse the repository at this point in the history
Fix installation issues
  • Loading branch information
ArnovanHilten authored Feb 3, 2025
2 parents d961d20 + c07c126 commit 7d8c18b
Show file tree
Hide file tree
Showing 14 changed files with 323 additions and 99 deletions.
40 changes: 40 additions & 0 deletions .dockerignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,40 @@
# Ignore Python cache and compiled files
__pycache__/
*.py[cod]
*.pyo
# Ignore environment folders
venv/
env/
.venv/
conda_env/
# Ignore any distribution or build artifacts
build/
dist/
*.egg-info
# Ignore version control system folders
.git/
.gitignore
# Ignore OS-specific hidden files
.DS_Store
Thumbs.db
# If you have a big data directory or logs, ignore those too
data/
logs/

# jupyter notebooks
.ipynb_checkpoints
__pycache__/
# processed data
processed_data/*
!processed_data/.gitkeep
# results
results/*
!results/.gitkeep
# idea
.idea/
.idea/*
.swp
*.swp
.log
*.log
examples/A_to_Z/processed_data/*
2 changes: 1 addition & 1 deletion .github/workflows/tests.yml
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ jobs:
runs-on: ubuntu-latest
strategy:
matrix:
python-version: ["3.7"]
python-version: ["3.10.12"]

steps:
- uses: actions/checkout@v3
Expand Down
30 changes: 30 additions & 0 deletions Dockerfile
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
# Step 1: Use the official TensorFlow 2.11 image as the base
FROM tensorflow/tensorflow:2.11.0

# Step 2: Install system dependencies
RUN apt-get update && apt-get install -y \
git \
wget \
&& rm -rf /var/lib/apt/lists/*

# Step 3: Upgrade pip to the latest version
RUN python3 -m pip install --upgrade pip

# Step 4: Copy your requirements file into the container
COPY requirements_GenNet.txt /tmp/requirements_GenNet.txt

# Step 5: Install Python packages
RUN pip install --no-cache-dir -r /tmp/requirements_GenNet.txt

# Step 6: Set the working directory
WORKDIR /app

# Step 7: Copy your project files into the container
COPY . /app

# Step 8: Set environment variables (optional)
ENV RESULT_PATH="/app/results"
ENV DATA_PATH="/app/examples"

# Step 9: Define the entrypoint to simplify CLI usage
ENTRYPOINT ["python", "GenNet.py"]
8 changes: 7 additions & 1 deletion GenNet.py
Original file line number Diff line number Diff line change
Expand Up @@ -203,7 +203,7 @@ def make_parser_train(self, parser_train):
"-mixed_precision",
action='store_true',
default=False,
help='Flag for mixed precision to save memory (can reduce performance)')
help='Flag for mixed precision to save memory (can reduce performance)')
parser_train.add_argument(
"-suffix",
metavar="extra_info",
Expand Down Expand Up @@ -252,6 +252,12 @@ def make_parser_train(self, parser_train):
action='store_true',
default=False,
help='initialize the one-hot encoding for the neural network with a linear assumption')
parser_train.add_argument(
"-improved_norm",
action='store_true',
default=False,
help='Use the pervariantnorm layer instead of batchnorm for better normalization for interpretation')

return parser_train

def make_parser_plot(self, parser_plot):
Expand Down
64 changes: 52 additions & 12 deletions GenNet_utils/Create_network.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,9 @@
import scipy
import tables
tf.keras.backend.set_epsilon(0.0000001)

from GenNet_utils.Normalization import PerVariantNormalization

tf_version = tf.__version__ # ToDo use packaging.version

if tf_version <= '1.13.1':
Expand All @@ -23,7 +26,7 @@
from GenNet_utils.LocallyDirected1D import LocallyDirected1D


def example_network():
def example_network(inputsize, num_covariates=0):
mask = scipy.sparse.load_npz('./folder/snps_gene.npz')
masks = [mask]

Expand Down Expand Up @@ -52,7 +55,7 @@ def regression_properties(datapath):
return mean_ytrain, negative_values_ytrain


def layer_block(model, mask, i, regression, L1_act =0.01):
def layer_block(model, mask, i, regression, L1_act=0.01, batchnorm=True):
if regression:
activation_type="relu"
else:
Expand All @@ -61,7 +64,12 @@ def layer_block(model, mask, i, regression, L1_act =0.01):
model = LocallyDirected1D(mask=mask, filters=1, input_shape=(mask.shape[0], 1),
name="LocallyDirected_" + str(i), activity_regularizer=K.regularizers.l1(L1_act))(model)
model = K.layers.Activation(activation_type)(model)
model = K.layers.BatchNormalization(center=False, scale=False)(model)

if batchnorm:
model = K.layers.BatchNormalization(center=False, scale=False)(model)
else:
model = PerVariantNormalization()(model)

return model


Expand All @@ -87,11 +95,18 @@ def one_hot_input(input_layer):
return model


def add_covariates(model, input_cov, num_covariates, regression, negative_values_ytrain, mean_ytrain, l1_value, L1_act):
def add_covariates(model, input_cov, num_covariates, regression, negative_values_ytrain,
mean_ytrain, l1_value, L1_act, batchnorm=True):
if num_covariates > 0:
model = activation_layer(model, regression, negative_values_ytrain)
model = K.layers.concatenate([model, input_cov], axis=1, name="concatenate_cov")
model = K.layers.BatchNormalization(center=False, scale=False, name="batchnorm_cov")(model)


if batchnorm:
model = K.layers.BatchNormalization(center=False, scale=False, name="batchnorm_cov")(model)
else:
model = PerVariantNormalization(name="pervariantnorm_cov")(model)

model = K.layers.Dense(units=1, name="output_layer_cov",
kernel_regularizer=tf.keras.regularizers.l1(l=l1_value),
activity_regularizer=K.regularizers.l1(L1_act),
Expand All @@ -106,7 +121,8 @@ def create_network_from_npz(datapath,
regression=False,
one_hot = False,
num_covariates=0,
mask_order = []):
mask_order = [],
batchnorm = True):
print("Creating networks from npz masks")
print("regression", regression)
print("one_hot", one_hot)
Expand Down Expand Up @@ -171,7 +187,7 @@ def create_network_from_npz(datapath,

for i in range(len(masks)):
mask = masks[i]
model = layer_block(model, mask, i, regression, L1_act=L1_act)
model = layer_block(model, mask, i, regression, L1_act=L1_act, batchnorm=True)

model = K.layers.Flatten()(model)

Expand All @@ -184,7 +200,8 @@ def create_network_from_npz(datapath,
activity_regularizer=K.regularizers.l1(L1_act),
bias_initializer= tf.keras.initializers.Constant(mean_ytrain))(model)

model = add_covariates(model, input_cov, num_covariates, regression, negative_values_ytrain, mean_ytrain, l1_value, L1_act)
model = add_covariates(model, input_cov, num_covariates, regression, negative_values_ytrain,
mean_ytrain, l1_value, L1_act, batchnorm=batchnorm)

output_layer = activation_layer(model, regression, negative_values_ytrain)
model = K.Model(inputs=[input_layer, input_cov], outputs=output_layer)
Expand All @@ -202,7 +219,8 @@ def create_network_from_csv(datapath,
L1_act =0.01,
regression=False,
one_hot=False,
num_covariates=0):
num_covariates=0,
batchnorm=True):

print("Creating networks from npz masks")
print("regression", regression)
Expand Down Expand Up @@ -240,7 +258,7 @@ def create_network_from_csv(datapath,
matrixshape = (network_csv[columns[i]].max() + 1, network_csv[columns[i + 1]].max() + 1)
mask = scipy.sparse.coo_matrix(((matrix_ones), matrix_coord), shape = matrixshape)
masks.append(mask)
model = layer_block(model, mask, i, regression, L1_act=L1_act)
model = layer_block(model, mask, i, regression, L1_act=L1_act, batchnorm=batchnorm)

model = K.layers.Flatten()(model)

Expand All @@ -249,7 +267,8 @@ def create_network_from_csv(datapath,
activity_regularizer=K.regularizers.l1(L1_act),
bias_initializer= tf.keras.initializers.Constant(mean_ytrain))(model)

model = add_covariates(model, input_cov, num_covariates, regression, negative_values_ytrain, mean_ytrain, l1_value, L1_act)
model = add_covariates(model, input_cov, num_covariates, regression, negative_values_ytrain,
mean_ytrain, l1_value, L1_act, batchnorm=batchnorm)

output_layer = activation_layer(model, regression, negative_values_ytrain)

Expand All @@ -261,8 +280,17 @@ def create_network_from_csv(datapath,
return model, masks


def lasso(inputsize, l1_value, num_covariates=0, regression=False):
def lasso(datapath, inputsize, l1_value, num_covariates=0, regression=False, L1_act =0.01):
masks=[]

if regression:
mean_ytrain, negative_values_ytrain = regression_properties(datapath)
print('mean_ytrain',mean_ytrain)
print('negative_values_ytrain',negative_values_ytrain)
else:
mean_ytrain = 0
negative_values_ytrain = False

inputs = K.Input((inputsize,), name='inputs')
input_cov = K.Input((num_covariates,), name='inputs_cov')
model = K.layers.BatchNormalization(center=False, scale=False, name="inter_out")(inputs)
Expand Down Expand Up @@ -491,6 +519,18 @@ def remove_batchnorm_model(model, masks, keep_cov = False):
x = inputs

mask_num = 0

# check if ther eis a batchnorm layer to remove
batchnorm_present = False
for layer in original_model.layers:
if isinstance(layer, tf.keras.layers.BatchNormalization):
batchnorm_present = True

if batchnorm_present == False:
print("No batchnorm layer present to remove")
return model


for layer in original_model.layers[1:]:
# Skip BatchNormalization layers
if not isinstance(layer, tf.keras.layers.BatchNormalization):
Expand Down
5 changes: 4 additions & 1 deletion GenNet_utils/Dataloader.py
Original file line number Diff line number Diff line change
Expand Up @@ -275,7 +275,7 @@ def multi_genotype_matrix(self, idx):
def get_data(self, sample_pat=0):

genotype_hdf = tables.open_file(self.genotype_path + "/genotype.h5", "r")
ybatch = self.eval_subjects["labels"]


if sample_pat > 0:
self.eval_subjects = self.eval_subjects.sample(n=sample_pat, random_state=1)
Expand All @@ -286,7 +286,10 @@ def get_data(self, sample_pat=0):
xcov = xcov.values
xbatch = genotype_hdf.root.data[xbatchid,...]
xbatch = self.if_one_hot(xbatch)

ybatch = self.eval_subjects["labels"]
ybatch = np.reshape(np.array(ybatch), (-1, 1))

genotype_hdf.close()
return [xbatch, xcov], ybatch

Expand Down
52 changes: 44 additions & 8 deletions GenNet_utils/Interpret.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@

import shap
import interpretation.DFIM as DFIM
from tensorflow.keras.optimizers.legacy import Adam

from interpretation.weight_importance import make_importance_values_input
from interpretation.NID import Get_weight_tsang, GenNet_pairwise_interactions_topn
Expand Down Expand Up @@ -62,18 +63,30 @@ def get_DeepExplainer_scores(args):
inputsize=-1, evalset="test").get_data(sample_pat= args.num_sample_pat)


if np.unique(np.array(ytest)).shape[0] > 2:
args.regression = True
else:
args.regression = False



print("Loaded the data")

model = remove_batchnorm_model(model, masks, keep_cov=False)

print("compile")
model.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=1e-3),
model.compile(optimizer=Adam(learning_rate=1e-3),
loss=tf.keras.losses.BinaryCrossentropy())

xval = xval[0]
xtest = xtest[0]

explainer = shap.DeepExplainer((model.input, model.output), xval)
yval = yval.flatten()
ytest = ytest.flatten()

xval = xval if args.regression else xval[yval==0,:]
xtest = xtest if args.regression else xtest[ytest==1,:]

explainer = shap.DeepExplainer((model.input, model.output), )
print("Created explainer")

if os.path.exists( args.resultpath+ "/DeepExplain_test.npy"):
Expand All @@ -91,8 +104,6 @@ def get_NID_scores(args):
print("Interpreting with NID:")
model, masks = load_trained_network(args)

mask = masks[0]

if args.layer == None:
if args.onehot == 1:
interp_layer = 3
Expand All @@ -112,7 +123,7 @@ def get_NID_scores(args):

pairwise_interactions_dfs = []
for filter in range(w_in.shape[1]): # for all the filters
pairwise_interactions = GenNet_pairwise_interactions_topn(w_in[:,filter] ,w_out[:,filter], mask, n="auto")
pairwise_interactions = GenNet_pairwise_interactions_topn(w_in[:,filter] ,w_out[:,filter], masks, n="auto")
pairwise_interactions_dfs.append(pairwise_interactions)

interaction_ranking = pd.concat(pairwise_interactions_dfs)
Expand Down Expand Up @@ -153,12 +164,23 @@ def get_DFIM_scores(args):
model = remove_batchnorm_model(model, masks, keep_cov=False)

print("compile")
model.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=1e-3),
model.compile(optimizer=Adam(learning_rate=1e-3),
loss=tf.keras.losses.BinaryCrossentropy())

xval = xval[0]
xtest = xtest[0]
yval = yval.flatten()
ytest = ytest.flatten()

if np.unique(np.array(ytest)).shape[0] > 2:
args.regression = True
else:
args.regression = False


xval = xval if args.regression else xval[yval==0,:]
xtest = xtest if args.regression else xtest[ytest==1,:]

explainer = shap.DeepExplainer((model.input, model.output), xval)
print("Created explainer")

Expand Down Expand Up @@ -216,7 +238,21 @@ def get_pathexplain_scores(args):

xval = xval[0]
xtest = xtest[0]
print("Shapes",xval.shape, xtest.shape)

yval = yval.flatten()
ytest = ytest.flatten()


if np.unique(np.array(ytest)).shape[0] > 2:
args.regression = True
else:
args.regression = False



xval = xval if args.regression else xval[yval==0,:]
xtest = xtest if args.regression else xtest[ytest==1,:]


explainer = PathExplainerTF(model)
n_top_values = min(num_snps_to_eval, xtest.shape[1])
Expand Down
Loading

0 comments on commit 7d8c18b

Please sign in to comment.