Skip to content

Commit

Permalink
Separation of signals dataframes and detailed analysis in separate tabs
Browse files Browse the repository at this point in the history
  • Loading branch information
picaultj committed Feb 11, 2025
1 parent 750480a commit 2c989bc
Show file tree
Hide file tree
Showing 7 changed files with 168 additions and 79 deletions.
17 changes: 17 additions & 0 deletions bertrend_apps/prospective_demo/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,9 @@
# This file is part of BERTrend.
from pathlib import Path

import pandas as pd
import streamlit

from bertrend import MODELS_DIR, FEED_BASE_PATH, CONFIG_PATH

# Config path for users
Expand Down Expand Up @@ -38,18 +41,32 @@
}


@streamlit.cache_data
def get_user_feed_path(user_name: str, feed_id: str) -> Path:
feed_path = CONFIG_FEEDS_BASE_PATH / user_name / f"{feed_id}_feed.toml"
return feed_path


@streamlit.cache_data
def get_user_models_path(user_name: str, model_id: str) -> Path:
# Path to previously saved models for those data and this user
models_path = BASE_MODELS_DIR / user_name / model_id
models_path.mkdir(parents=True, exist_ok=True)
return models_path


@streamlit.cache_data
def get_model_cfg_path(user_name: str, model_id: str) -> Path:
model_cfg_path = CONFIG_FEEDS_BASE_PATH / user_name / f"{model_id}_analysis.toml"
return model_cfg_path


@streamlit.cache_data
def get_model_interpretation_path(
user_name: str, model_id: str, reference_ts: pd.Timestamp
) -> Path:
return (
get_user_models_path(user_name=user_name, model_id=model_id)
/ INTERPRETATION_PATH
/ reference_ts.strftime("%Y-%m-%d")
)
2 changes: 1 addition & 1 deletion bertrend_apps/prospective_demo/app.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@
from bertrend_apps.prospective_demo.feeds_data import display_data_status
from bertrend_apps.prospective_demo.models_info import models_monitoring
from bertrend_apps.prospective_demo.report_generation import reporting
from bertrend_apps.prospective_demo.signal_analysis import signal_analysis
from bertrend_apps.prospective_demo.dashboard_signals import signal_analysis

# UI Settings
# PAGE_TITLE = "BERTrend - Prospective Analysis demo"
Expand Down
82 changes: 17 additions & 65 deletions bertrend_apps/prospective_demo/dashboard_analysis.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,82 +18,34 @@
STRONG_SIGNALS,
NOISE,
LLM_TOPIC_DESCRIPTION_COLUMN,
get_model_interpretation_path,
)
from bertrend_apps.prospective_demo.dashboard_common import (
choose_id_and_ts,
get_df_topics,
)
from bertrend_apps.prospective_demo.models_info import get_models_info

COLS_RATIO = [2 / 7, 5 / 7]


@st.fragment()
def dashboard_analysis():
"""Dashboard to analyze information monitoring results"""
st.session_state.signal_interpretations = {}

col1, col2 = st.columns(COLS_RATIO)
with col1:
model_id = st.selectbox(
"Sélection de la veille", options=sorted(st.session_state.user_feeds.keys())
)
with col2:
list_models = get_models_info(model_id)
if not list_models:
st.warning(f"{WARNING_ICON} Pas de modèle disponible")
st.stop()
elif len(list_models) < 2:
st.warning(
f"{WARNING_ICON} 2 modèles minimum pour analyser les tendances !"
)
st.stop()
reference_ts = st.select_slider(
"Date d'analyse",
options=list_models,
value=list_models[-1],
format_func=lambda ts: ts.strftime("%d/%m/%Y"),
help="Sélection de la date d'analyse parmi celles disponibles",
)
choose_id_and_ts()

# LLM-based interpretation
model_interpretation_path = (
get_user_models_path(user_name=st.session_state.username, model_id=model_id)
/ INTERPRETATION_PATH
/ reference_ts.strftime("%Y-%m-%d")
)

dfs_topics = {}
for df_id in [NOISE, WEAK_SIGNALS, STRONG_SIGNALS]:
df_path = model_interpretation_path / f"{df_id}.parquet"
dfs_topics[df_id] = (
pd.read_parquet(df_path) if df_path.exists() else pd.DataFrame()
)
model_id = st.session_state.model_id
reference_ts = st.session_state.reference_ts

cols = st.columns(COLS_RATIO)
with cols[0]:
# Display data frames
columns = [
"Topic",
LLM_TOPIC_DESCRIPTION_COLUMN,
"Representation",
"Latest_Popularity",
"Docs_Count",
"Paragraphs_Count",
"Latest_Timestamp",
"Documents",
"Sources",
"Source_Diversity",
]

display_signal_categories_df(
dfs_topics[NOISE],
dfs_topics[WEAK_SIGNALS],
dfs_topics[STRONG_SIGNALS],
reference_ts,
columns=columns,
)
model_interpretation_path = get_model_interpretation_path(
user_name=st.session_state.username,
model_id=model_id,
reference_ts=reference_ts,
)

with cols[1]:
# Detailed analysis
st.subheader("Analyse détaillée par sujet")
display_detailed_analysis(model_id, model_interpretation_path, dfs_topics)
# Detailed analysis
st.subheader("Analyse détaillée par sujet")
dfs_topics = get_df_topics(model_interpretation_path)
display_detailed_analysis(model_id, model_interpretation_path, dfs_topics)


@st.fragment()
Expand Down
69 changes: 69 additions & 0 deletions bertrend_apps/prospective_demo/dashboard_common.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,69 @@
# Copyright (c) 2024, RTE (https://www.rte-france.com)
# See AUTHORS.txt
# SPDX-License-Identifier: MPL-2.0
# This file is part of BERTrend.

import uuid
from typing import Any

import pandas as pd
import streamlit as st

from bertrend.demos.demos_utils.icons import WARNING_ICON
from bertrend_apps.prospective_demo import NOISE, WEAK_SIGNALS, STRONG_SIGNALS
from bertrend_apps.prospective_demo.models_info import get_models_info

COLS_RATIO_ID_TS = [2 / 7, 5 / 7]


@st.cache_data
def get_df_topics(model_interpretation_path=None) -> dict[str, pd.DataFrame]:
dfs_topics = {}
for df_id in [NOISE, WEAK_SIGNALS, STRONG_SIGNALS]:
df_path = model_interpretation_path / f"{df_id}.parquet"
dfs_topics[df_id] = (
pd.read_parquet(df_path) if df_path.exists() else pd.DataFrame()
)
return dfs_topics


def update_key(key: str, new_value: Any):
st.session_state[key] = new_value


def choose_id_and_ts():
col1, col2 = st.columns(COLS_RATIO_ID_TS)
with col1:
options = sorted(st.session_state.user_feeds.keys())
if "model_id" not in st.session_state:
st.session_state.model_id = options[0]
model_id_key = uuid.uuid4()
model_id = st.selectbox(
"Sélection de la veille",
options=options,
index=options.index(st.session_state.model_id),
key=model_id_key, # to avoid pb of unicity if displayed on several places
on_change=lambda: update_key("model_id", st.session_state[model_id_key]),
)
with col2:
list_models = get_models_info(model_id)
if not list_models:
st.warning(f"{WARNING_ICON} Pas de modèle disponible")
st.stop()
elif len(list_models) < 2:
st.warning(
f"{WARNING_ICON} 2 modèles minimum pour analyser les tendances !"
)
st.stop()
if "reference_ts" not in st.session_state:
st.session_state.reference_ts = list_models[-1]
ts_key = uuid.uuid4()
reference_ts = st.select_slider(
"Date d'analyse",
options=list_models,
value=st.session_state.reference_ts,
format_func=lambda ts: ts.strftime("%d/%m/%Y"),
help="Sélection de la date d'analyse parmi celles disponibles",
key=ts_key, # to avoid pb of unicity if displayed on several places
on_change=lambda: update_key("reference_ts", st.session_state[ts_key]),
)
61 changes: 61 additions & 0 deletions bertrend_apps/prospective_demo/dashboard_signals.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,61 @@
# Copyright (c) 2024, RTE (https://www.rte-france.com)
# See AUTHORS.txt
# SPDX-License-Identifier: MPL-2.0
# This file is part of BERTrend.

import streamlit as st

from bertrend.demos.weak_signals.visualizations_utils import (
display_signal_categories_df,
)
from bertrend_apps.prospective_demo import (
LLM_TOPIC_DESCRIPTION_COLUMN,
NOISE,
WEAK_SIGNALS,
STRONG_SIGNALS,
get_model_interpretation_path,
)
from bertrend_apps.prospective_demo.dashboard_common import (
choose_id_and_ts,
get_df_topics,
)


def signal_analysis():
st.write(
"Ici mettre seulement les tableaux weak / strong + les liens vers les articles"
)
# ID and timestamp selection
choose_id_and_ts()
model_id = st.session_state.model_id
reference_ts = st.session_state.reference_ts

model_interpretation_path = get_model_interpretation_path(
user_name=st.session_state.username,
model_id=model_id,
reference_ts=reference_ts,
)

# Display dataframes for weak_signals, strong, etc
# Display data frames
columns = [
"Topic",
LLM_TOPIC_DESCRIPTION_COLUMN,
"Representation",
"Latest_Popularity",
"Docs_Count",
"Paragraphs_Count",
"Latest_Timestamp",
"Documents",
"Sources",
"Source_Diversity",
]

dfs_topics = get_df_topics(model_interpretation_path)
display_signal_categories_df(
dfs_topics[NOISE],
dfs_topics[WEAK_SIGNALS],
dfs_topics[STRONG_SIGNALS],
reference_ts,
columns=columns,
)
4 changes: 3 additions & 1 deletion bertrend_apps/prospective_demo/process_new_data.py
Original file line number Diff line number Diff line change
Expand Up @@ -136,7 +136,9 @@ def train_new_model(
),
axis=1,
)
df.to_parquet(f"{interpretation_path}/{df_name}.parquet")
output_path = interpretation_path / f"{df_name}.parquet"
df.to_parquet(output_path)
logger.success(f"{df_name} saved to: {output_path}")

# Obtain detailed LLM-based interpretion for signals
generate_llm_interpretation(
Expand Down
12 changes: 0 additions & 12 deletions bertrend_apps/prospective_demo/signal_analysis.py

This file was deleted.

0 comments on commit 2c989bc

Please sign in to comment.