Separation of signals dataframes and detailed analysis in separate tabs

rte-france · Feb 11, 2025 · 2c989bc · 2c989bc
1 parent 750480a
commit 2c989bc
Show file tree

Hide file tree

Showing 7 changed files with 168 additions and 79 deletions.
diff --git a/bertrend_apps/prospective_demo/__init__.py b/bertrend_apps/prospective_demo/__init__.py
@@ -4,6 +4,9 @@
 #  This file is part of BERTrend.
 from pathlib import Path
 
+import pandas as pd
+import streamlit
+
 from bertrend import MODELS_DIR, FEED_BASE_PATH, CONFIG_PATH
 
 # Config path for users
@@ -38,18 +41,32 @@
 }
 
 
+@streamlit.cache_data
 def get_user_feed_path(user_name: str, feed_id: str) -> Path:
     feed_path = CONFIG_FEEDS_BASE_PATH / user_name / f"{feed_id}_feed.toml"
     return feed_path
 
 
+@streamlit.cache_data
 def get_user_models_path(user_name: str, model_id: str) -> Path:
     # Path to previously saved models for those data and this user
     models_path = BASE_MODELS_DIR / user_name / model_id
     models_path.mkdir(parents=True, exist_ok=True)
     return models_path
 
 
+@streamlit.cache_data
 def get_model_cfg_path(user_name: str, model_id: str) -> Path:
     model_cfg_path = CONFIG_FEEDS_BASE_PATH / user_name / f"{model_id}_analysis.toml"
     return model_cfg_path
+
+
+@streamlit.cache_data
+def get_model_interpretation_path(
+    user_name: str, model_id: str, reference_ts: pd.Timestamp
+) -> Path:
+    return (
+        get_user_models_path(user_name=user_name, model_id=model_id)
+        / INTERPRETATION_PATH
+        / reference_ts.strftime("%Y-%m-%d")
+    )
diff --git a/bertrend_apps/prospective_demo/app.py b/bertrend_apps/prospective_demo/app.py
@@ -27,7 +27,7 @@
 from bertrend_apps.prospective_demo.feeds_data import display_data_status
 from bertrend_apps.prospective_demo.models_info import models_monitoring
 from bertrend_apps.prospective_demo.report_generation import reporting
-from bertrend_apps.prospective_demo.signal_analysis import signal_analysis
+from bertrend_apps.prospective_demo.dashboard_signals import signal_analysis
 
 # UI Settings
 # PAGE_TITLE = "BERTrend - Prospective Analysis demo"

diff --git a/bertrend_apps/prospective_demo/dashboard_analysis.py b/bertrend_apps/prospective_demo/dashboard_analysis.py
@@ -18,82 +18,34 @@
     STRONG_SIGNALS,
     NOISE,
     LLM_TOPIC_DESCRIPTION_COLUMN,
+    get_model_interpretation_path,
+)
+from bertrend_apps.prospective_demo.dashboard_common import (
+    choose_id_and_ts,
+    get_df_topics,
 )
-from bertrend_apps.prospective_demo.models_info import get_models_info
-
-COLS_RATIO = [2 / 7, 5 / 7]
 
 
 @st.fragment()
 def dashboard_analysis():
     """Dashboard to analyze information monitoring results"""
     st.session_state.signal_interpretations = {}
-
-    col1, col2 = st.columns(COLS_RATIO)
-    with col1:
-        model_id = st.selectbox(
-            "Sélection de la veille", options=sorted(st.session_state.user_feeds.keys())
-        )
-    with col2:
-        list_models = get_models_info(model_id)
-        if not list_models:
-            st.warning(f"{WARNING_ICON} Pas de modèle disponible")
-            st.stop()
-        elif len(list_models) < 2:
-            st.warning(
-                f"{WARNING_ICON} 2 modèles minimum pour analyser les tendances !"
-            )
-            st.stop()
-        reference_ts = st.select_slider(
-            "Date d'analyse",
-            options=list_models,
-            value=list_models[-1],
-            format_func=lambda ts: ts.strftime("%d/%m/%Y"),
-            help="Sélection de la date d'analyse parmi celles disponibles",
-        )
+    choose_id_and_ts()
 
     # LLM-based interpretation
-    model_interpretation_path = (
-        get_user_models_path(user_name=st.session_state.username, model_id=model_id)
-        / INTERPRETATION_PATH
-        / reference_ts.strftime("%Y-%m-%d")
-    )
-
-    dfs_topics = {}
-    for df_id in [NOISE, WEAK_SIGNALS, STRONG_SIGNALS]:
-        df_path = model_interpretation_path / f"{df_id}.parquet"
-        dfs_topics[df_id] = (
-            pd.read_parquet(df_path) if df_path.exists() else pd.DataFrame()
-        )
+    model_id = st.session_state.model_id
+    reference_ts = st.session_state.reference_ts
 
-    cols = st.columns(COLS_RATIO)
-    with cols[0]:
-        # Display data frames
-        columns = [
-            "Topic",
-            LLM_TOPIC_DESCRIPTION_COLUMN,
-            "Representation",
-            "Latest_Popularity",
-            "Docs_Count",
-            "Paragraphs_Count",
-            "Latest_Timestamp",
-            "Documents",
-            "Sources",
-            "Source_Diversity",
-        ]
-
-        display_signal_categories_df(
-            dfs_topics[NOISE],
-            dfs_topics[WEAK_SIGNALS],
-            dfs_topics[STRONG_SIGNALS],
-            reference_ts,
-            columns=columns,
-        )
+    model_interpretation_path = get_model_interpretation_path(
+        user_name=st.session_state.username,
+        model_id=model_id,
+        reference_ts=reference_ts,
+    )
 
-    with cols[1]:
-        # Detailed analysis
-        st.subheader("Analyse détaillée par sujet")
-        display_detailed_analysis(model_id, model_interpretation_path, dfs_topics)
+    # Detailed analysis
+    st.subheader("Analyse détaillée par sujet")
+    dfs_topics = get_df_topics(model_interpretation_path)
+    display_detailed_analysis(model_id, model_interpretation_path, dfs_topics)
 
 
 @st.fragment()

diff --git a/bertrend_apps/prospective_demo/dashboard_common.py b/bertrend_apps/prospective_demo/dashboard_common.py
@@ -0,0 +1,69 @@
+#  Copyright (c) 2024, RTE (https://www.rte-france.com)
+#  See AUTHORS.txt
+#  SPDX-License-Identifier: MPL-2.0
+#  This file is part of BERTrend.
+
+import uuid
+from typing import Any
+
+import pandas as pd
+import streamlit as st
+
+from bertrend.demos.demos_utils.icons import WARNING_ICON
+from bertrend_apps.prospective_demo import NOISE, WEAK_SIGNALS, STRONG_SIGNALS
+from bertrend_apps.prospective_demo.models_info import get_models_info
+
+COLS_RATIO_ID_TS = [2 / 7, 5 / 7]
+
+
+@st.cache_data
+def get_df_topics(model_interpretation_path=None) -> dict[str, pd.DataFrame]:
+    dfs_topics = {}
+    for df_id in [NOISE, WEAK_SIGNALS, STRONG_SIGNALS]:
+        df_path = model_interpretation_path / f"{df_id}.parquet"
+        dfs_topics[df_id] = (
+            pd.read_parquet(df_path) if df_path.exists() else pd.DataFrame()
+        )
+    return dfs_topics
+
+
+def update_key(key: str, new_value: Any):
+    st.session_state[key] = new_value
+
+
+def choose_id_and_ts():
+    col1, col2 = st.columns(COLS_RATIO_ID_TS)
+    with col1:
+        options = sorted(st.session_state.user_feeds.keys())
+        if "model_id" not in st.session_state:
+            st.session_state.model_id = options[0]
+        model_id_key = uuid.uuid4()
+        model_id = st.selectbox(
+            "Sélection de la veille",
+            options=options,
+            index=options.index(st.session_state.model_id),
+            key=model_id_key,  # to avoid pb of unicity if displayed on several places
+            on_change=lambda: update_key("model_id", st.session_state[model_id_key]),
+        )
+    with col2:
+        list_models = get_models_info(model_id)
+        if not list_models:
+            st.warning(f"{WARNING_ICON} Pas de modèle disponible")
+            st.stop()
+        elif len(list_models) < 2:
+            st.warning(
+                f"{WARNING_ICON} 2 modèles minimum pour analyser les tendances !"
+            )
+            st.stop()
+        if "reference_ts" not in st.session_state:
+            st.session_state.reference_ts = list_models[-1]
+        ts_key = uuid.uuid4()
+        reference_ts = st.select_slider(
+            "Date d'analyse",
+            options=list_models,
+            value=st.session_state.reference_ts,
+            format_func=lambda ts: ts.strftime("%d/%m/%Y"),
+            help="Sélection de la date d'analyse parmi celles disponibles",
+            key=ts_key,  # to avoid pb of unicity if displayed on several places
+            on_change=lambda: update_key("reference_ts", st.session_state[ts_key]),
+        )
diff --git a/bertrend_apps/prospective_demo/dashboard_signals.py b/bertrend_apps/prospective_demo/dashboard_signals.py
@@ -0,0 +1,61 @@
+#  Copyright (c) 2024, RTE (https://www.rte-france.com)
+#  See AUTHORS.txt
+#  SPDX-License-Identifier: MPL-2.0
+#  This file is part of BERTrend.
+
+import streamlit as st
+
+from bertrend.demos.weak_signals.visualizations_utils import (
+    display_signal_categories_df,
+)
+from bertrend_apps.prospective_demo import (
+    LLM_TOPIC_DESCRIPTION_COLUMN,
+    NOISE,
+    WEAK_SIGNALS,
+    STRONG_SIGNALS,
+    get_model_interpretation_path,
+)
+from bertrend_apps.prospective_demo.dashboard_common import (
+    choose_id_and_ts,
+    get_df_topics,
+)
+
+
+def signal_analysis():
+    st.write(
+        "Ici mettre seulement les tableaux weak / strong + les liens vers les articles"
+    )
+    # ID and timestamp selection
+    choose_id_and_ts()
+    model_id = st.session_state.model_id
+    reference_ts = st.session_state.reference_ts
+
+    model_interpretation_path = get_model_interpretation_path(
+        user_name=st.session_state.username,
+        model_id=model_id,
+        reference_ts=reference_ts,
+    )
+
+    # Display dataframes for weak_signals, strong, etc
+    # Display data frames
+    columns = [
+        "Topic",
+        LLM_TOPIC_DESCRIPTION_COLUMN,
+        "Representation",
+        "Latest_Popularity",
+        "Docs_Count",
+        "Paragraphs_Count",
+        "Latest_Timestamp",
+        "Documents",
+        "Sources",
+        "Source_Diversity",
+    ]
+
+    dfs_topics = get_df_topics(model_interpretation_path)
+    display_signal_categories_df(
+        dfs_topics[NOISE],
+        dfs_topics[WEAK_SIGNALS],
+        dfs_topics[STRONG_SIGNALS],
+        reference_ts,
+        columns=columns,
+    )
diff --git a/bertrend_apps/prospective_demo/process_new_data.py b/bertrend_apps/prospective_demo/process_new_data.py
@@ -136,7 +136,9 @@ def train_new_model(
                     ),
                     axis=1,
                 )
-                df.to_parquet(f"{interpretation_path}/{df_name}.parquet")
+                output_path = interpretation_path / f"{df_name}.parquet"
+                df.to_parquet(output_path)
+                logger.success(f"{df_name} saved to: {output_path}")
 
                 # Obtain detailed LLM-based interpretion for signals
                 generate_llm_interpretation(

diff --git a/bertrend_apps/prospective_demo/signal_analysis.py b/bertrend_apps/prospective_demo/signal_analysis.py