From 125ae100bff2c015ef348fdac49fba6a51b30e10 Mon Sep 17 00:00:00 2001 From: thorstenwagner Date: Tue, 5 Dec 2023 08:03:00 +0100 Subject: [PATCH 1/4] add embeddings path --- tomotwin/modules/tools/umap.py | 1 + 1 file changed, 1 insertion(+) diff --git a/tomotwin/modules/tools/umap.py b/tomotwin/modules/tools/umap.py index 5ee9e89..9061105 100644 --- a/tomotwin/modules/tools/umap.py +++ b/tomotwin/modules/tools/umap.py @@ -147,6 +147,7 @@ def run(self, args): print("Write embeedings to disk") df_embeddings.columns = [f"umap_{i}" for i in range(umap_embeddings.shape[1])] + df_embeddings.attrs['embeddings_path'] = os.path.realpath(args.input) df_embeddings.to_pickle(os.path.join(out_pth,fname+".tumap")) print("Write umap model to disk") From 6d73f93c23efcfeccc33ed2ec76034a1cf44b721 Mon Sep 17 00:00:00 2001 From: thorstenwagner Date: Tue, 5 Dec 2023 15:24:32 +0100 Subject: [PATCH 2/4] positions are now added --- tomotwin/modules/tools/umap.py | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/tomotwin/modules/tools/umap.py b/tomotwin/modules/tools/umap.py index 9061105..9da9e39 100644 --- a/tomotwin/modules/tools/umap.py +++ b/tomotwin/modules/tools/umap.py @@ -144,10 +144,15 @@ def run(self, args): os.makedirs(out_pth,exist_ok=True) fname = os.path.splitext(os.path.basename(args.input))[0] df_embeddings = pd.DataFrame(umap_embeddings) - + df_embeddings.reset_index(drop=True, inplace=True) + embeddings.reset_index(drop=True, inplace=True) + print("DF", df_embeddings.shape) + print("EMBEDDINGS", embeddings.shape, embeddings.dtypes) print("Write embeedings to disk") df_embeddings.columns = [f"umap_{i}" for i in range(umap_embeddings.shape[1])] + df_embeddings = pd.concat([embeddings[['X', 'Y', 'Z']], df_embeddings], axis=1) df_embeddings.attrs['embeddings_path'] = os.path.realpath(args.input) + df_embeddings.to_pickle(os.path.join(out_pth,fname+".tumap")) print("Write umap model to disk") From 7f25b9a181f531b25e7bdf39abfd8fdf09b0badf Mon Sep 17 00:00:00 2001 From: thorstenwagner Date: Mon, 11 Dec 2023 10:52:09 +0100 Subject: [PATCH 3/4] add embedding attrs to umap attrs --- tomotwin/modules/tools/umap.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tomotwin/modules/tools/umap.py b/tomotwin/modules/tools/umap.py index 9da9e39..36735cb 100644 --- a/tomotwin/modules/tools/umap.py +++ b/tomotwin/modules/tools/umap.py @@ -146,11 +146,11 @@ def run(self, args): df_embeddings = pd.DataFrame(umap_embeddings) df_embeddings.reset_index(drop=True, inplace=True) embeddings.reset_index(drop=True, inplace=True) - print("DF", df_embeddings.shape) - print("EMBEDDINGS", embeddings.shape, embeddings.dtypes) + print("Write embeedings to disk") df_embeddings.columns = [f"umap_{i}" for i in range(umap_embeddings.shape[1])] df_embeddings = pd.concat([embeddings[['X', 'Y', 'Z']], df_embeddings], axis=1) + df_embeddings.attrs = embeddings.attrs df_embeddings.attrs['embeddings_path'] = os.path.realpath(args.input) df_embeddings.to_pickle(os.path.join(out_pth,fname+".tumap")) From 3311ff27fe03a2a265f7717a6c11397ae527858b Mon Sep 17 00:00:00 2001 From: thorstenwagner Date: Wed, 13 Dec 2023 09:30:39 +0100 Subject: [PATCH 4/4] remove calculate embedding mask from umap tool --- tomotwin/modules/tools/umap.py | 36 +--------------------------------- 1 file changed, 1 insertion(+), 35 deletions(-) diff --git a/tomotwin/modules/tools/umap.py b/tomotwin/modules/tools/umap.py index 36735cb..0e83252 100644 --- a/tomotwin/modules/tools/umap.py +++ b/tomotwin/modules/tools/umap.py @@ -9,7 +9,6 @@ except ImportError: print("cuml can't be loaded") -import mrcfile import numpy as np import pandas as pd from numpy.typing import ArrayLike @@ -98,28 +97,6 @@ def calcuate_umap( return embedding, reducer - def create_embedding_mask(self, embeddings: pd.DataFrame): - """ - Creates mask where each individual subvolume of the running windows gets an individual ID - """ - print("Create embedding mask") - Z = embeddings.attrs["tomogram_input_shape"][0] - Y = embeddings.attrs["tomogram_input_shape"][1] - X = embeddings.attrs["tomogram_input_shape"][2] - stride = embeddings.attrs["stride"][0] - segmentation_array = np.zeros(shape=(Z, Y, X), dtype=np.float32) - z = np.array(embeddings["Z"], dtype=int) - y = np.array(embeddings["Y"], dtype=int) - x = np.array(embeddings["X"], dtype=int) - - values = np.array(range(1, len(x) + 1)) - for stride_x in tqdm(list(range(stride))): - for stride_y in range(stride): - for stride_z in range(stride): - index = (z + stride_z, y + stride_y, x + stride_x) - segmentation_array[index] = values - - return segmentation_array def run(self, args): print("Read data") @@ -150,7 +127,7 @@ def run(self, args): print("Write embeedings to disk") df_embeddings.columns = [f"umap_{i}" for i in range(umap_embeddings.shape[1])] df_embeddings = pd.concat([embeddings[['X', 'Y', 'Z']], df_embeddings], axis=1) - df_embeddings.attrs = embeddings.attrs + df_embeddings.attrs['embeddings_attrs'] = embeddings.attrs df_embeddings.attrs['embeddings_path'] = os.path.realpath(args.input) df_embeddings.to_pickle(os.path.join(out_pth,fname+".tumap")) @@ -158,15 +135,4 @@ def run(self, args): print("Write umap model to disk") pickle.dump(fitted_umap, open(os.path.join(out_pth, fname + "_umap_model.pkl"), "wb")) - print("Calculate label mask and write it to disk") - embedding_mask = self.create_embedding_mask(embeddings) - with mrcfile.new( - os.path.join( - args.output, - fname + "_label_mask.mrci", - ), - overwrite=True, - ) as mrc: - mrc.set_data(embedding_mask) - print("Done")