Skip to content

Commit

Permalink
Bug fix in GUI
Browse files Browse the repository at this point in the history
Problem of overhead memory in BERTrend serialization
  • Loading branch information
picaultj committed Feb 17, 2025
1 parent 84e98ff commit 50259d6
Show file tree
Hide file tree
Showing 3 changed files with 18 additions and 6 deletions.
8 changes: 5 additions & 3 deletions bertrend/BERTrend.py
Original file line number Diff line number Diff line change
Expand Up @@ -649,11 +649,13 @@ def save_models(self, models_path: Path = MODELS_DIR):
topic_model.topic_info_df.to_pickle(model_dir / TOPIC_INFO_DF_FILE)

# Serialize BERTrend (excluding topic models for separate reuse if needed)
topic_models_bak = copy.deepcopy(self.topic_models)
self.topic_models = None
# topic_models_bak = copy.deepcopy(self.topic_models)
# FIXME: the commented code introduced a too-heavy memory overhead, to be improved; the idea is to serialize
# the topics models separetely from the rest of the BERTrend object
# self.topic_models = None
with open(models_path / BERTREND_FILE, "wb") as f:
dill.dump(self, f)
self.topic_models = topic_models_bak
# self.topic_models = topic_models_bak

logger.info(f"Models saved to: {models_path}")

Expand Down
14 changes: 12 additions & 2 deletions bertrend_apps/prospective_demo/dashboard_common.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,14 @@ def get_df_topics(model_interpretation_path=None) -> dict[str, pd.DataFrame]:

def update_key(key: str, new_value: Any):
st.session_state[key] = new_value
# reset ts value in order to avoid errors if a previous ts value is not available for the new key value


def update_key_and_ts(key: str, new_value: Any):
update_key(key, new_value)
# reset ts value in order to avoid errors if a previous ts value is not available for the new key value
if "reference_ts" in st.session_state:
del st.session_state["reference_ts"]


def choose_id_and_ts():
Expand All @@ -43,7 +51,9 @@ def choose_id_and_ts():
options=options,
index=options.index(st.session_state.model_id),
key=model_id_key, # to avoid pb of unicity if displayed on several places
on_change=lambda: update_key("model_id", st.session_state[model_id_key]),
on_change=lambda: update_key_and_ts(
"model_id", st.session_state[model_id_key]
),
)
with col2:
list_models = get_models_info(model_id)
Expand All @@ -58,7 +68,7 @@ def choose_id_and_ts():
if "reference_ts" not in st.session_state:
st.session_state.reference_ts = list_models[-1]
ts_key = uuid.uuid4()
reference_ts = st.select_slider(
st.select_slider(
"Date d'analyse",
options=list_models,
value=st.session_state.reference_ts,
Expand Down
2 changes: 1 addition & 1 deletion bertrend_apps/prospective_demo/models_info.py
Original file line number Diff line number Diff line change
Expand Up @@ -222,7 +222,7 @@ def handle_regenerate_models(row_dict: dict):
with col1:
if yes_btn := st.button("Oui", type="primary"):
# Delete previously stored model
# delete_cached_models(model_id)
delete_cached_models(model_id)
logger.info(f"Modèles en cache supprimés pour la veille {model_id} !")

# Regenerate new models
Expand Down

0 comments on commit 50259d6

Please sign in to comment.