From 7009d59c2850f46a99d61d1f09eea54bbaecc451 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?J=C3=A9r=C3=B4me=20Picault?= <jerome.picault@rte-france.com>
Date: Mon, 27 Jan 2025 09:13:53 +0100
Subject: [PATCH] Updated train topics function for incremental topic learning
 Updated notebook

---
 bertrend/BERTrend.py                      |    7 +-
 bertrend/trend_analysis/weak_signals.py   |   11 +-
 getting_started/bertrend_quickstart.ipynb | 2051 +++++++++++++++++++++
 pyproject.toml                            |    3 +-
 4 files changed, 2063 insertions(+), 9 deletions(-)
 create mode 100644 getting_started/bertrend_quickstart.ipynb

diff --git a/bertrend/BERTrend.py b/bertrend/BERTrend.py
index 8c9d376..c1e93da 100644
--- a/bertrend/BERTrend.py
+++ b/bertrend/BERTrend.py
@@ -259,12 +259,13 @@ def train_topic_models(
 
         self._is_fitted = True
 
+        # Merge the newly obtained topic models with new ones
         # Update topic_models: Dictionary of trained BERTopic models for each timestamp.
-        self.topic_models = topic_models
+        self.topic_models.update(topic_models)
         # Update doc_groups: Dictionary of document groups for each timestamp.
-        self.doc_groups = doc_groups
+        self.doc_groups.update(doc_groups)
         # Update emb_groups: Dictionary of document embeddings for each timestamp.
-        self.emb_groups = emb_groups
+        self.emb_groups.update(emb_groups)
         logger.success("Finished training all topic models")
 
     def merge_all_models(
diff --git a/bertrend/trend_analysis/weak_signals.py b/bertrend/trend_analysis/weak_signals.py
index ddb7255..0a278c4 100644
--- a/bertrend/trend_analysis/weak_signals.py
+++ b/bertrend/trend_analysis/weak_signals.py
@@ -8,6 +8,7 @@
 import scipy
 from bertopic import BERTopic
 from loguru import logger
+from pandas import Timestamp
 
 from bertrend.llm_utils.openai_client import OpenAI_Client
 from bertrend import LLM_CONFIG
@@ -15,24 +16,24 @@
 
 
 def detect_weak_signals_zeroshot(
-    topic_models: dict[pd.Timestamp, BERTopic],
+    topic_models: dict[Timestamp, BERTopic],
     zeroshot_topic_list: list[str],
     granularity: int,
     decay_factor: float = 0.01,
     decay_power: float = 2,
-) -> dict[str, dict[pd.Timestamp, dict[str, any]]]:
+) -> dict[str, dict[Timestamp, dict[str, any]]]:
     """
     Detect weak signals based on the zero-shot list of topics to monitor.
 
     Args:
-        topic_models (Dict[pd.Timestamp, BERTopic]): Dictionary of BERTopic models for each timestamp.
+        topic_models (Dict[Timestamp, BERTopic]): Dictionary of BERTopic models for each timestamp.
         zeroshot_topic_list (List[str]): List of topics to monitor for weak signals.
         granularity (int): The granularity of the timestamps in days.
         decay_factor (float): The decay factor for exponential decay.
         decay_power (float): The decay power for exponential decay.
 
     Returns:
-        Dict[str, Dict[pd.Timestamp, Dict[str, any]]]: Dictionary of weak signal trends for each monitored topic.
+        Dict[str, Dict[Timestamp, Dict[str, any]]]: Dictionary of weak signal trends for each monitored topic.
     """
     weak_signal_trends = {}
 
@@ -329,7 +330,7 @@ def _apply_decay_to_inactive_topics(
             topic_last_popularity[topic] = decayed_popularity
 
 
-def analyze_signal(bertrend, topic_number: int, current_date):
+def analyze_signal(bertrend, topic_number: int, current_date: Timestamp):
     topic_merge_rows = bertrend.all_merge_histories_df[
         bertrend.all_merge_histories_df["Topic1"] == topic_number
     ].sort_values("Timestamp")
diff --git a/getting_started/bertrend_quickstart.ipynb b/getting_started/bertrend_quickstart.ipynb
new file mode 100644
index 0000000..f552fac
--- /dev/null
+++ b/getting_started/bertrend_quickstart.ipynb
@@ -0,0 +1,2051 @@
+{
+ "cells": [
+  {
+   "metadata": {},
+   "cell_type": "markdown",
+   "source": [
+    "\n",
+    "\n",
+    "\n",
+    "\n"
+   ],
+   "id": "73870050e69c50e6"
+  },
+  {
+   "metadata": {},
+   "cell_type": "markdown",
+   "source": [
+    "# BERTrend quickstart\n",
+    "The purpose of this notebook is to complement the existing demos available in the directory `bertrend/demos` with some code examples that explain how to integrate BERTrend with your application code."
+   ],
+   "id": "10a9d82c667c7fbe"
+  },
+  {
+   "metadata": {
+    "ExecuteTime": {
+     "end_time": "2025-01-20T15:00:24.370757Z",
+     "start_time": "2025-01-20T15:00:24.349873Z"
+    }
+   },
+   "cell_type": "code",
+   "source": [
+    "\n",
+    "%load_ext autoreload\n",
+    "%autoreload 2"
+   ],
+   "id": "849734b0d71f2495",
+   "outputs": [],
+   "execution_count": 1
+  },
+  {
+   "metadata": {},
+   "cell_type": "markdown",
+   "source": "## BERTrend installation",
+   "id": "a795490c2d3e539e"
+  },
+  {
+   "metadata": {
+    "ExecuteTime": {
+     "end_time": "2025-01-26T21:07:28.378082Z",
+     "start_time": "2025-01-26T21:07:28.370941Z"
+    }
+   },
+   "cell_type": "code",
+   "source": [
+    "import json\n",
+    "from pathlib import Path\n",
+    "import pandas as pd\n",
+    "from pandas import Timestamp\n",
+    "from IPython.display import display\n",
+    "from loguru import logger\n",
+    "\n",
+    "from bertrend import DATA_PATH\n",
+    "from bertrend.BERTrend import BERTrend\n",
+    "from bertrend import MODELS_DIR\n",
+    "from bertrend.utils.data_loading import load_data, split_data, TEXT_COLUMN\n",
+    "from bertrend.services.embedding_service import EmbeddingService\n",
+    "from bertrend.BERTopicModel import BERTopicModel\n",
+    "from bertrend.topic_analysis.topic_description import generate_topic_description\n",
+    "from bertrend.trend_analysis.weak_signals import analyze_signal\n"
+   ],
+   "id": "ba4a7eacde91b892",
+   "outputs": [],
+   "execution_count": 54
+  },
+  {
+   "metadata": {
+    "ExecuteTime": {
+     "end_time": "2025-01-26T21:07:30.328141Z",
+     "start_time": "2025-01-26T21:07:30.324568Z"
+    }
+   },
+   "cell_type": "code",
+   "source": "#!pip install bertrend",
+   "id": "74702a2391f80f72",
+   "outputs": [],
+   "execution_count": 55
+  },
+  {
+   "metadata": {},
+   "cell_type": "markdown",
+   "source": "### Configuration of topic models",
+   "id": "ca03bdd5398b56b3"
+  },
+  {
+   "metadata": {
+    "ExecuteTime": {
+     "end_time": "2025-01-20T15:00:35.343828Z",
+     "start_time": "2025-01-20T15:00:35.298417Z"
+    }
+   },
+   "cell_type": "code",
+   "source": [
+    "# Topic model with default parameters - each parameter of BERTopic can be modified from the constructor or can be read from a configuration file\n",
+    "# overrides the default config to use English\n",
+    "config = '''\n",
+    "# Default configuration file to be used for topic model\n",
+    "\n",
+    "# Global parameters\n",
+    "[global]\n",
+    "language = \"English\"\n",
+    "\n",
+    "# BERTopic parameters: https://maartengr.github.io/BERTopic/api/bertopic.html#bertopic._bertopic.BERTopic.__init__\n",
+    "[bertopic_model]\n",
+    "top_n_words = 10\n",
+    "verbose = true\n",
+    "representation_model = [\"MaximalMarginalRelevance\"] # KeyBERTInspired, OpenAI\n",
+    "zeroshot_topic_list = []\n",
+    "zeroshot_min_similarity = 0\n",
+    "\n",
+    "# UMAP parameters: https://umap-learn.readthedocs.io/en/latest/api.html\n",
+    "[umap_model]\n",
+    "n_neighbors = 5\n",
+    "n_components = 5\n",
+    "min_dist = 0.0\n",
+    "metric = \"cosine\"\n",
+    "random_state = 42\n",
+    "\n",
+    "# HDBSCAN parameters: https://hdbscan.readthedocs.io/en/latest/api.html\n",
+    "[hdbscan_model]\n",
+    "min_cluster_size = 5\n",
+    "min_samples = 5\n",
+    "metric = \"euclidean\"\n",
+    "cluster_selection_method = \"eom\"\n",
+    "prediction_data = true\n",
+    "\n",
+    "# CountVectorizer: https://scikit-learn.org/1.5/modules/generated/sklearn.feature_extraction.text.CountVectorizer.html\n",
+    "[vectorizer_model]\n",
+    "ngram_range = [1, 1]\n",
+    "stop_words = true # If true, will check `language` parameter and load associated stopwords file\n",
+    "min_df = 2\n",
+    "\n",
+    "# ClassTfidfTransformer: https://maartengr.github.io/BERTopic/api/ctfidf.html\n",
+    "[ctfidf_model]\n",
+    "bm25_weighting = false\n",
+    "reduce_frequent_words = true\n",
+    "\n",
+    "# MaximalMarginalRelevance: https://maartengr.github.io/BERTopic/api/representation/mmr.html\n",
+    "[mmr_model]\n",
+    "diversity = 0.3\n",
+    "\n",
+    "# Reduce outliers: https://maartengr.github.io/BERTopic/api/bertopic.html#bertopic._bertopic.BERTopic.reduce_outliers\n",
+    "[reduce_outliers]\n",
+    "strategy = \"c-tf-idf\"\n",
+    "'''\n",
+    "\n",
+    "topic_model = BERTopicModel(config)"
+   ],
+   "id": "b97d93ac81a4d420",
+   "outputs": [],
+   "execution_count": 6
+  },
+  {
+   "metadata": {
+    "ExecuteTime": {
+     "end_time": "2025-01-20T15:00:35.547370Z",
+     "start_time": "2025-01-20T15:00:35.486087Z"
+    }
+   },
+   "cell_type": "code",
+   "source": "# The TopicModel class is mainly a wrapper around BERTopic and can be used as-is, for example for a first analysis of data (without considering evolving trends, but this is not mandatory at all)\n",
+   "id": "fa92f4b55e7b7b72",
+   "outputs": [],
+   "execution_count": 8
+  },
+  {
+   "metadata": {},
+   "cell_type": "markdown",
+   "source": "## Using BERTrend for retrospective analysis",
+   "id": "7cfd832467877a23"
+  },
+  {
+   "metadata": {},
+   "cell_type": "markdown",
+   "source": "### Instantiation of BERTrend\n",
+   "id": "6a07ec11284b82cb"
+  },
+  {
+   "metadata": {},
+   "cell_type": "markdown",
+   "source": [
+    "In the case of a **retrospective trend analysis** task, the goal is to identify and evaluate patterns or changes over time within a dataset, allowing for insights into historical performance, behaviors, or events that can inform future decision-making and strategy development.\n",
+    "\n",
+    "In this context, the general principle consists in splitting the past data into different time slices. Then each dataset is used to train a separate topic models. Each topic model description corresponding to the older data slice is merged with the next one and decay factors are applied. This allows to have a vision of topic evolution over time"
+   ],
+   "id": "c5118dce73f8cfce"
+  },
+  {
+   "metadata": {
+    "ExecuteTime": {
+     "end_time": "2025-01-20T15:00:35.784959Z",
+     "start_time": "2025-01-20T15:00:35.745153Z"
+    }
+   },
+   "cell_type": "code",
+   "outputs": [],
+   "execution_count": 9,
+   "source": [
+    "# Basic creation of the object and parametrization\n",
+    "# BERTrend uses several topic models; therefore, it is necessary to pass a topic_model object as a reference\n",
+    "bertrend = BERTrend(topic_model=topic_model)"
+   ],
+   "id": "52bc66eed5bb040"
+  },
+  {
+   "metadata": {},
+   "cell_type": "markdown",
+   "source": "### 1. Gather historical data to be analyzed\n",
+   "id": "bf7cd6699bf77299"
+  },
+  {
+   "metadata": {
+    "ExecuteTime": {
+     "end_time": "2025-01-20T15:00:35.978219Z",
+     "start_time": "2025-01-20T15:00:35.813108Z"
+    }
+   },
+   "cell_type": "code",
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "                 ID         timestamp  \\\n",
+       "0  @realDonaldTrump  2017-01-20 06:31   \n",
+       "1  @realDonaldTrump  2017-01-20 11:51   \n",
+       "2  @realDonaldTrump  2017-01-20 11:51   \n",
+       "3  @realDonaldTrump  2017-01-20 11:52   \n",
+       "4  @realDonaldTrump  2017-01-20 11:53   \n",
+       "\n",
+       "                                                 url  \\\n",
+       "0  https://twitter.com/realDonaldTrump/status/822...   \n",
+       "1  https://twitter.com/realDonaldTrump/status/822...   \n",
+       "2  https://twitter.com/realDonaldTrump/status/822...   \n",
+       "3  https://twitter.com/realDonaldTrump/status/822...   \n",
+       "4  https://twitter.com/realDonaldTrump/status/822...   \n",
+       "\n",
+       "                                                text            source  \\\n",
+       "0  It all begins today! I will see you at 11:00 A...  @realDonaldTrump   \n",
+       "1  Today we are not merely transferring power fro...  @realDonaldTrump   \n",
+       "2  power from Washington, D.C. and giving it back...  @realDonaldTrump   \n",
+       "3  What truly matters is not which party controls...  @realDonaldTrump   \n",
+       "4  January 20th 2017, will be remembered as the d...  @realDonaldTrump   \n",
+       "\n",
+       "   document_id  \n",
+       "0            0  \n",
+       "1            1  \n",
+       "2            2  \n",
+       "3            3  \n",
+       "4            4  "
+      ],
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>ID</th>\n",
+       "      <th>timestamp</th>\n",
+       "      <th>url</th>\n",
+       "      <th>text</th>\n",
+       "      <th>source</th>\n",
+       "      <th>document_id</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>0</th>\n",
+       "      <td>@realDonaldTrump</td>\n",
+       "      <td>2017-01-20 06:31</td>\n",
+       "      <td>https://twitter.com/realDonaldTrump/status/822...</td>\n",
+       "      <td>It all begins today! I will see you at 11:00 A...</td>\n",
+       "      <td>@realDonaldTrump</td>\n",
+       "      <td>0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>1</th>\n",
+       "      <td>@realDonaldTrump</td>\n",
+       "      <td>2017-01-20 11:51</td>\n",
+       "      <td>https://twitter.com/realDonaldTrump/status/822...</td>\n",
+       "      <td>Today we are not merely transferring power fro...</td>\n",
+       "      <td>@realDonaldTrump</td>\n",
+       "      <td>1</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2</th>\n",
+       "      <td>@realDonaldTrump</td>\n",
+       "      <td>2017-01-20 11:51</td>\n",
+       "      <td>https://twitter.com/realDonaldTrump/status/822...</td>\n",
+       "      <td>power from Washington, D.C. and giving it back...</td>\n",
+       "      <td>@realDonaldTrump</td>\n",
+       "      <td>2</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>3</th>\n",
+       "      <td>@realDonaldTrump</td>\n",
+       "      <td>2017-01-20 11:52</td>\n",
+       "      <td>https://twitter.com/realDonaldTrump/status/822...</td>\n",
+       "      <td>What truly matters is not which party controls...</td>\n",
+       "      <td>@realDonaldTrump</td>\n",
+       "      <td>3</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>4</th>\n",
+       "      <td>@realDonaldTrump</td>\n",
+       "      <td>2017-01-20 11:53</td>\n",
+       "      <td>https://twitter.com/realDonaldTrump/status/822...</td>\n",
+       "      <td>January 20th 2017, will be remembered as the d...</td>\n",
+       "      <td>@realDonaldTrump</td>\n",
+       "      <td>4</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "</div>"
+      ]
+     },
+     "execution_count": 10,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "execution_count": 10,
+   "source": [
+    "# Here some Trump tweets from: https://github.com/MarkHershey/CompleteTrumpTweetsArchive/blob/master/data/realDonaldTrump_in_office.csv\n",
+    "#!wget \"https://raw.githubusercontent.com/MarkHershey/CompleteTrumpTweetsArchive/refs/heads/master/data/realDonaldTrump_in_office.csv\"\n",
+    "df = pd.read_csv(\"realDonaldTrump_in_office.csv\",  sep=',',quotechar='\"', skipinitialspace=True)\n",
+    "# BERTrend expects specific data format\n",
+    "df = df.rename(columns={'Time': 'timestamp', 'Tweet URL': 'url', \"Tweet Text\": \"text\"})\n",
+    "df[\"source\"]=df[\"ID\"]\n",
+    "df[\"document_id\"] = df.index\n",
+    "df.reset_index(inplace=True, drop=True)\n",
+    "df.head(5)"
+   ],
+   "id": "154fb553f7004986"
+  },
+  {
+   "metadata": {
+    "ExecuteTime": {
+     "end_time": "2025-01-20T15:00:36.089939Z",
+     "start_time": "2025-01-20T15:00:36.031108Z"
+    }
+   },
+   "cell_type": "code",
+   "source": "df.index",
+   "id": "d2e8b96b46718241",
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "RangeIndex(start=0, stop=23075, step=1)"
+      ]
+     },
+     "execution_count": 11,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "execution_count": 11
+  },
+  {
+   "metadata": {},
+   "cell_type": "markdown",
+   "source": "### 2. Embed data",
+   "id": "9d26753d9496a25"
+  },
+  {
+   "metadata": {},
+   "cell_type": "code",
+   "source": [
+    "# Selection of a subset of data\n",
+    "df = df.head(1000)\n",
+    "\n",
+    "embedding_service_cfg = {\"local\": False, \"host\":\"10.132.5.44\",  \"port\": 6464}\n",
+    "\n",
+    "embedding_service = EmbeddingService(**embedding_service_cfg)\n",
+    "embeddings, token_strings, token_embeddings = embedding_service.embed(\n",
+    "                texts=df[\"text\"],\n",
+    "            )"
+   ],
+   "id": "1ca3e17198fdbb6a",
+   "outputs": [],
+   "execution_count": null
+  },
+  {
+   "metadata": {},
+   "cell_type": "code",
+   "outputs": [],
+   "execution_count": null,
+   "source": "",
+   "id": "d32f4a70dfe634ba"
+  },
+  {
+   "metadata": {},
+   "cell_type": "code",
+   "outputs": [],
+   "execution_count": null,
+   "source": "",
+   "id": "25ef45b3d3e34f4"
+  },
+  {
+   "metadata": {},
+   "cell_type": "code",
+   "outputs": [],
+   "execution_count": null,
+   "source": "",
+   "id": "a732b1c0303ce39e"
+  },
+  {
+   "metadata": {
+    "ExecuteTime": {
+     "end_time": "2025-01-20T15:01:16.783505Z",
+     "start_time": "2025-01-20T15:00:36.289891Z"
+    }
+   },
+   "cell_type": "code",
+   "outputs": [
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "\u001B[32m2025-01-20 16:00:36.345\u001B[0m | \u001B[34m\u001B[1mDEBUG   \u001B[0m | \u001B[36mbertrend.services.embedding_service\u001B[0m:\u001B[36m_remote_embed_documents\u001B[0m:\u001B[36m203\u001B[0m - \u001B[34m\u001B[1mComputing embeddings...\u001B[0m\n",
+      "\u001B[32m2025-01-20 16:01:16.205\u001B[0m | \u001B[34m\u001B[1mDEBUG   \u001B[0m | \u001B[36mbertrend.services.embedding_service\u001B[0m:\u001B[36m_remote_embed_documents\u001B[0m:\u001B[36m210\u001B[0m - \u001B[34m\u001B[1mComputing embeddings done for batch\u001B[0m\n",
+      "\u001B[32m2025-01-20 16:01:16.779\u001B[0m | \u001B[34m\u001B[1mDEBUG   \u001B[0m | \u001B[36mbertrend.services.embedding_service\u001B[0m:\u001B[36m_get_remote_model_name\u001B[0m:\u001B[36m226\u001B[0m - \u001B[34m\u001B[1mModel name: OrdalieTech/Solon-embeddings-large-0.1\u001B[0m\n"
+     ]
+    }
+   ],
+   "execution_count": 13,
+   "source": "",
+   "id": "7e02db73cd68797a"
+  },
+  {
+   "metadata": {
+    "ExecuteTime": {
+     "end_time": "2025-01-20T15:01:16.856529Z",
+     "start_time": "2025-01-20T15:01:16.812294Z"
+    }
+   },
+   "cell_type": "code",
+   "source": "embedding_model_name = embedding_service.embedding_model_name\n",
+   "id": "72df96f5c7d8d52b",
+   "outputs": [],
+   "execution_count": 14
+  },
+  {
+   "metadata": {},
+   "cell_type": "markdown",
+   "source": [
+    "### 3. Split the data into time slices\n",
+    "\n",
+    "This can be done manually for some reason or can be done automatically based on a specified time granularity"
+   ],
+   "id": "2e94b24d1ef107a2"
+  },
+  {
+   "metadata": {
+    "ExecuteTime": {
+     "end_time": "2025-01-20T15:01:16.964906Z",
+     "start_time": "2025-01-20T15:01:16.921763Z"
+    }
+   },
+   "cell_type": "code",
+   "source": [
+    "from bertrend.utils.data_loading import group_by_days, load_data\n",
+    "\n",
+    "day_granularity = 30\n",
+    "grouped_data = group_by_days(df=df, day_granularity=day_granularity)"
+   ],
+   "id": "9ea313bff64c8cce",
+   "outputs": [],
+   "execution_count": 16
+  },
+  {
+   "metadata": {
+    "ExecuteTime": {
+     "end_time": "2025-01-20T15:01:17.040491Z",
+     "start_time": "2025-01-20T15:01:16.997388Z"
+    }
+   },
+   "cell_type": "code",
+   "source": [
+    "# Number of sliced data\n",
+    "len(grouped_data)"
+   ],
+   "id": "a89b3c810c4575bc",
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "6"
+      ]
+     },
+     "execution_count": 17,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "execution_count": 17
+  },
+  {
+   "metadata": {},
+   "cell_type": "markdown",
+   "source": "### 4. Train topic models",
+   "id": "9d7ffa03a6ed9330"
+  },
+  {
+   "metadata": {
+    "ExecuteTime": {
+     "end_time": "2025-01-20T15:07:11.584568Z",
+     "start_time": "2025-01-20T15:01:17.180822Z"
+    }
+   },
+   "cell_type": "code",
+   "source": "bertrend.train_topic_models(grouped_data=grouped_data, embedding_model=embedding_model_name, embeddings=embeddings)",
+   "id": "8e11789ecb115639",
+   "outputs": [
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "\u001B[32m2025-01-20 16:01:17.216\u001B[0m | \u001B[1mINFO    \u001B[0m | \u001B[36mbertrend.BERTrend\u001B[0m:\u001B[36mtrain_topic_models\u001B[0m:\u001B[36m240\u001B[0m - \u001B[1mTraining topic model 1/6...\u001B[0m\n",
+      "\u001B[32m2025-01-20 16:01:17.217\u001B[0m | \u001B[34m\u001B[1mDEBUG   \u001B[0m | \u001B[36mbertrend.BERTrend\u001B[0m:\u001B[36m_train_by_period\u001B[0m:\u001B[36m148\u001B[0m - \u001B[34m\u001B[1mProcessing period: 2017-01-20 00:00:00\u001B[0m\n",
+      "\u001B[32m2025-01-20 16:01:17.218\u001B[0m | \u001B[34m\u001B[1mDEBUG   \u001B[0m | \u001B[36mbertrend.BERTrend\u001B[0m:\u001B[36m_train_by_period\u001B[0m:\u001B[36m149\u001B[0m - \u001B[34m\u001B[1mNumber of documents: 184\u001B[0m\n",
+      "\u001B[32m2025-01-20 16:01:17.218\u001B[0m | \u001B[34m\u001B[1mDEBUG   \u001B[0m | \u001B[36mbertrend.BERTrend\u001B[0m:\u001B[36m_train_by_period\u001B[0m:\u001B[36m151\u001B[0m - \u001B[34m\u001B[1mCreating topic model...\u001B[0m\n",
+      "\u001B[32m2025-01-20 16:01:17.219\u001B[0m | \u001B[34m\u001B[1mDEBUG   \u001B[0m | \u001B[36mbertrend.BERTopicModel\u001B[0m:\u001B[36mfit\u001B[0m:\u001B[36m212\u001B[0m - \u001B[34m\u001B[1m\tInitializing BERTopic model\u001B[0m\n",
+      "\u001B[32m2025-01-20 16:01:17.221\u001B[0m | \u001B[32m\u001B[1mSUCCESS \u001B[0m | \u001B[36mbertrend.BERTopicModel\u001B[0m:\u001B[36mfit\u001B[0m:\u001B[36m222\u001B[0m - \u001B[32m\u001B[1m\tBERTopic model instance created successfully\u001B[0m\n",
+      "\u001B[32m2025-01-20 16:01:17.221\u001B[0m | \u001B[34m\u001B[1mDEBUG   \u001B[0m | \u001B[36mbertrend.BERTopicModel\u001B[0m:\u001B[36mfit\u001B[0m:\u001B[36m224\u001B[0m - \u001B[34m\u001B[1m\tFitting BERTopic model\u001B[0m\n",
+      "2025-01-20 16:01:19,876 - BERTopic - Dimensionality - Fitting the dimensionality reduction algorithm\n",
+      "2025-01-20 16:01:25,485 - BERTopic - Dimensionality - Completed ✓\n",
+      "2025-01-20 16:01:25,486 - BERTopic - Cluster - Start clustering the reduced embeddings\n",
+      "2025-01-20 16:01:25,494 - BERTopic - Cluster - Completed ✓\n",
+      "2025-01-20 16:01:25,497 - BERTopic - Representation - Extracting topics from clusters using representation models.\n",
+      "2025-01-20 16:01:44,786 - BERTopic - Representation - Completed ✓\n",
+      "\u001B[32m2025-01-20 16:01:44.829\u001B[0m | \u001B[34m\u001B[1mDEBUG   \u001B[0m | \u001B[36mbertrend.BERTopicModel\u001B[0m:\u001B[36mfit\u001B[0m:\u001B[36m231\u001B[0m - \u001B[34m\u001B[1m\tReducing outliers\u001B[0m\n",
+      "2025-01-20 16:01:44,832 - BERTopic - WARNING: Using a custom list of topic assignments may lead to errors if topic reduction techniques are used afterwards. Make sure that manually assigning topics is the last step in the pipeline.Note that topic embeddings will also be created through weightedc-TF-IDF embeddings instead of centroid embeddings.\n",
+      "\u001B[32m2025-01-20 16:01:58.869\u001B[0m | \u001B[32m\u001B[1mSUCCESS \u001B[0m | \u001B[36mbertrend.BERTopicModel\u001B[0m:\u001B[36mfit\u001B[0m:\u001B[36m259\u001B[0m - \u001B[32m\u001B[1m\tBERTopic model fitted successfully\u001B[0m\n",
+      "\u001B[32m2025-01-20 16:01:58.871\u001B[0m | \u001B[34m\u001B[1mDEBUG   \u001B[0m | \u001B[36mbertrend.BERTrend\u001B[0m:\u001B[36m_train_by_period\u001B[0m:\u001B[36m158\u001B[0m - \u001B[34m\u001B[1mTopic model created successfully\u001B[0m\n",
+      "\u001B[32m2025-01-20 16:01:58.911\u001B[0m | \u001B[34m\u001B[1mDEBUG   \u001B[0m | \u001B[36mbertrend.BERTrend\u001B[0m:\u001B[36mtrain_topic_models\u001B[0m:\u001B[36m244\u001B[0m - \u001B[34m\u001B[1mSuccessfully processed period: 2017-01-20 00:00:00\u001B[0m\n",
+      "\u001B[32m2025-01-20 16:01:58.912\u001B[0m | \u001B[1mINFO    \u001B[0m | \u001B[36mbertrend.BERTrend\u001B[0m:\u001B[36mtrain_topic_models\u001B[0m:\u001B[36m240\u001B[0m - \u001B[1mTraining topic model 2/6...\u001B[0m\n",
+      "\u001B[32m2025-01-20 16:01:58.914\u001B[0m | \u001B[34m\u001B[1mDEBUG   \u001B[0m | \u001B[36mbertrend.BERTrend\u001B[0m:\u001B[36m_train_by_period\u001B[0m:\u001B[36m148\u001B[0m - \u001B[34m\u001B[1mProcessing period: 2017-02-19 00:00:00\u001B[0m\n",
+      "\u001B[32m2025-01-20 16:01:58.915\u001B[0m | \u001B[34m\u001B[1mDEBUG   \u001B[0m | \u001B[36mbertrend.BERTrend\u001B[0m:\u001B[36m_train_by_period\u001B[0m:\u001B[36m149\u001B[0m - \u001B[34m\u001B[1mNumber of documents: 123\u001B[0m\n",
+      "\u001B[32m2025-01-20 16:01:58.921\u001B[0m | \u001B[34m\u001B[1mDEBUG   \u001B[0m | \u001B[36mbertrend.BERTrend\u001B[0m:\u001B[36m_train_by_period\u001B[0m:\u001B[36m151\u001B[0m - \u001B[34m\u001B[1mCreating topic model...\u001B[0m\n",
+      "\u001B[32m2025-01-20 16:01:58.922\u001B[0m | \u001B[34m\u001B[1mDEBUG   \u001B[0m | \u001B[36mbertrend.BERTopicModel\u001B[0m:\u001B[36mfit\u001B[0m:\u001B[36m212\u001B[0m - \u001B[34m\u001B[1m\tInitializing BERTopic model\u001B[0m\n",
+      "\u001B[32m2025-01-20 16:01:58.924\u001B[0m | \u001B[32m\u001B[1mSUCCESS \u001B[0m | \u001B[36mbertrend.BERTopicModel\u001B[0m:\u001B[36mfit\u001B[0m:\u001B[36m222\u001B[0m - \u001B[32m\u001B[1m\tBERTopic model instance created successfully\u001B[0m\n",
+      "\u001B[32m2025-01-20 16:01:58.925\u001B[0m | \u001B[34m\u001B[1mDEBUG   \u001B[0m | \u001B[36mbertrend.BERTopicModel\u001B[0m:\u001B[36mfit\u001B[0m:\u001B[36m224\u001B[0m - \u001B[34m\u001B[1m\tFitting BERTopic model\u001B[0m\n",
+      "2025-01-20 16:02:00,684 - BERTopic - Dimensionality - Fitting the dimensionality reduction algorithm\n",
+      "2025-01-20 16:02:00,878 - BERTopic - Dimensionality - Completed ✓\n",
+      "2025-01-20 16:02:00,879 - BERTopic - Cluster - Start clustering the reduced embeddings\n",
+      "2025-01-20 16:02:00,886 - BERTopic - Cluster - Completed ✓\n",
+      "2025-01-20 16:02:00,889 - BERTopic - Representation - Extracting topics from clusters using representation models.\n",
+      "2025-01-20 16:02:25,983 - BERTopic - Representation - Completed ✓\n",
+      "\u001B[32m2025-01-20 16:02:26.023\u001B[0m | \u001B[34m\u001B[1mDEBUG   \u001B[0m | \u001B[36mbertrend.BERTopicModel\u001B[0m:\u001B[36mfit\u001B[0m:\u001B[36m231\u001B[0m - \u001B[34m\u001B[1m\tReducing outliers\u001B[0m\n",
+      "2025-01-20 16:02:26,029 - BERTopic - WARNING: Using a custom list of topic assignments may lead to errors if topic reduction techniques are used afterwards. Make sure that manually assigning topics is the last step in the pipeline.Note that topic embeddings will also be created through weightedc-TF-IDF embeddings instead of centroid embeddings.\n",
+      "\u001B[32m2025-01-20 16:02:43.040\u001B[0m | \u001B[32m\u001B[1mSUCCESS \u001B[0m | \u001B[36mbertrend.BERTopicModel\u001B[0m:\u001B[36mfit\u001B[0m:\u001B[36m259\u001B[0m - \u001B[32m\u001B[1m\tBERTopic model fitted successfully\u001B[0m\n",
+      "\u001B[32m2025-01-20 16:02:43.043\u001B[0m | \u001B[34m\u001B[1mDEBUG   \u001B[0m | \u001B[36mbertrend.BERTrend\u001B[0m:\u001B[36m_train_by_period\u001B[0m:\u001B[36m158\u001B[0m - \u001B[34m\u001B[1mTopic model created successfully\u001B[0m\n",
+      "\u001B[32m2025-01-20 16:02:43.101\u001B[0m | \u001B[34m\u001B[1mDEBUG   \u001B[0m | \u001B[36mbertrend.BERTrend\u001B[0m:\u001B[36mtrain_topic_models\u001B[0m:\u001B[36m244\u001B[0m - \u001B[34m\u001B[1mSuccessfully processed period: 2017-02-19 00:00:00\u001B[0m\n",
+      "\u001B[32m2025-01-20 16:02:43.102\u001B[0m | \u001B[1mINFO    \u001B[0m | \u001B[36mbertrend.BERTrend\u001B[0m:\u001B[36mtrain_topic_models\u001B[0m:\u001B[36m240\u001B[0m - \u001B[1mTraining topic model 3/6...\u001B[0m\n",
+      "\u001B[32m2025-01-20 16:02:43.103\u001B[0m | \u001B[34m\u001B[1mDEBUG   \u001B[0m | \u001B[36mbertrend.BERTrend\u001B[0m:\u001B[36m_train_by_period\u001B[0m:\u001B[36m148\u001B[0m - \u001B[34m\u001B[1mProcessing period: 2017-03-21 00:00:00\u001B[0m\n",
+      "\u001B[32m2025-01-20 16:02:43.104\u001B[0m | \u001B[34m\u001B[1mDEBUG   \u001B[0m | \u001B[36mbertrend.BERTrend\u001B[0m:\u001B[36m_train_by_period\u001B[0m:\u001B[36m149\u001B[0m - \u001B[34m\u001B[1mNumber of documents: 132\u001B[0m\n",
+      "\u001B[32m2025-01-20 16:02:43.107\u001B[0m | \u001B[34m\u001B[1mDEBUG   \u001B[0m | \u001B[36mbertrend.BERTrend\u001B[0m:\u001B[36m_train_by_period\u001B[0m:\u001B[36m151\u001B[0m - \u001B[34m\u001B[1mCreating topic model...\u001B[0m\n",
+      "\u001B[32m2025-01-20 16:02:43.111\u001B[0m | \u001B[34m\u001B[1mDEBUG   \u001B[0m | \u001B[36mbertrend.BERTopicModel\u001B[0m:\u001B[36mfit\u001B[0m:\u001B[36m212\u001B[0m - \u001B[34m\u001B[1m\tInitializing BERTopic model\u001B[0m\n",
+      "\u001B[32m2025-01-20 16:02:43.115\u001B[0m | \u001B[32m\u001B[1mSUCCESS \u001B[0m | \u001B[36mbertrend.BERTopicModel\u001B[0m:\u001B[36mfit\u001B[0m:\u001B[36m222\u001B[0m - \u001B[32m\u001B[1m\tBERTopic model instance created successfully\u001B[0m\n",
+      "\u001B[32m2025-01-20 16:02:43.119\u001B[0m | \u001B[34m\u001B[1mDEBUG   \u001B[0m | \u001B[36mbertrend.BERTopicModel\u001B[0m:\u001B[36mfit\u001B[0m:\u001B[36m224\u001B[0m - \u001B[34m\u001B[1m\tFitting BERTopic model\u001B[0m\n",
+      "2025-01-20 16:02:45,433 - BERTopic - Dimensionality - Fitting the dimensionality reduction algorithm\n",
+      "2025-01-20 16:02:45,619 - BERTopic - Dimensionality - Completed ✓\n",
+      "2025-01-20 16:02:45,620 - BERTopic - Cluster - Start clustering the reduced embeddings\n",
+      "2025-01-20 16:02:45,628 - BERTopic - Cluster - Completed ✓\n",
+      "2025-01-20 16:02:45,631 - BERTopic - Representation - Extracting topics from clusters using representation models.\n",
+      "2025-01-20 16:03:14,636 - BERTopic - Representation - Completed ✓\n",
+      "\u001B[32m2025-01-20 16:03:14.732\u001B[0m | \u001B[34m\u001B[1mDEBUG   \u001B[0m | \u001B[36mbertrend.BERTopicModel\u001B[0m:\u001B[36mfit\u001B[0m:\u001B[36m231\u001B[0m - \u001B[34m\u001B[1m\tReducing outliers\u001B[0m\n",
+      "2025-01-20 16:03:14,738 - BERTopic - WARNING: Using a custom list of topic assignments may lead to errors if topic reduction techniques are used afterwards. Make sure that manually assigning topics is the last step in the pipeline.Note that topic embeddings will also be created through weightedc-TF-IDF embeddings instead of centroid embeddings.\n",
+      "\u001B[32m2025-01-20 16:03:36.833\u001B[0m | \u001B[32m\u001B[1mSUCCESS \u001B[0m | \u001B[36mbertrend.BERTopicModel\u001B[0m:\u001B[36mfit\u001B[0m:\u001B[36m259\u001B[0m - \u001B[32m\u001B[1m\tBERTopic model fitted successfully\u001B[0m\n",
+      "\u001B[32m2025-01-20 16:03:36.835\u001B[0m | \u001B[34m\u001B[1mDEBUG   \u001B[0m | \u001B[36mbertrend.BERTrend\u001B[0m:\u001B[36m_train_by_period\u001B[0m:\u001B[36m158\u001B[0m - \u001B[34m\u001B[1mTopic model created successfully\u001B[0m\n",
+      "\u001B[32m2025-01-20 16:03:36.871\u001B[0m | \u001B[34m\u001B[1mDEBUG   \u001B[0m | \u001B[36mbertrend.BERTrend\u001B[0m:\u001B[36mtrain_topic_models\u001B[0m:\u001B[36m244\u001B[0m - \u001B[34m\u001B[1mSuccessfully processed period: 2017-03-21 00:00:00\u001B[0m\n",
+      "\u001B[32m2025-01-20 16:03:36.874\u001B[0m | \u001B[1mINFO    \u001B[0m | \u001B[36mbertrend.BERTrend\u001B[0m:\u001B[36mtrain_topic_models\u001B[0m:\u001B[36m240\u001B[0m - \u001B[1mTraining topic model 4/6...\u001B[0m\n",
+      "\u001B[32m2025-01-20 16:03:36.879\u001B[0m | \u001B[34m\u001B[1mDEBUG   \u001B[0m | \u001B[36mbertrend.BERTrend\u001B[0m:\u001B[36m_train_by_period\u001B[0m:\u001B[36m148\u001B[0m - \u001B[34m\u001B[1mProcessing period: 2017-04-20 00:00:00\u001B[0m\n",
+      "\u001B[32m2025-01-20 16:03:36.882\u001B[0m | \u001B[34m\u001B[1mDEBUG   \u001B[0m | \u001B[36mbertrend.BERTrend\u001B[0m:\u001B[36m_train_by_period\u001B[0m:\u001B[36m149\u001B[0m - \u001B[34m\u001B[1mNumber of documents: 168\u001B[0m\n",
+      "\u001B[32m2025-01-20 16:03:36.884\u001B[0m | \u001B[34m\u001B[1mDEBUG   \u001B[0m | \u001B[36mbertrend.BERTrend\u001B[0m:\u001B[36m_train_by_period\u001B[0m:\u001B[36m151\u001B[0m - \u001B[34m\u001B[1mCreating topic model...\u001B[0m\n",
+      "\u001B[32m2025-01-20 16:03:36.885\u001B[0m | \u001B[34m\u001B[1mDEBUG   \u001B[0m | \u001B[36mbertrend.BERTopicModel\u001B[0m:\u001B[36mfit\u001B[0m:\u001B[36m212\u001B[0m - \u001B[34m\u001B[1m\tInitializing BERTopic model\u001B[0m\n",
+      "\u001B[32m2025-01-20 16:03:36.887\u001B[0m | \u001B[32m\u001B[1mSUCCESS \u001B[0m | \u001B[36mbertrend.BERTopicModel\u001B[0m:\u001B[36mfit\u001B[0m:\u001B[36m222\u001B[0m - \u001B[32m\u001B[1m\tBERTopic model instance created successfully\u001B[0m\n",
+      "\u001B[32m2025-01-20 16:03:36.910\u001B[0m | \u001B[34m\u001B[1mDEBUG   \u001B[0m | \u001B[36mbertrend.BERTopicModel\u001B[0m:\u001B[36mfit\u001B[0m:\u001B[36m224\u001B[0m - \u001B[34m\u001B[1m\tFitting BERTopic model\u001B[0m\n",
+      "2025-01-20 16:03:39,750 - BERTopic - Dimensionality - Fitting the dimensionality reduction algorithm\n",
+      "2025-01-20 16:03:40,226 - BERTopic - Dimensionality - Completed ✓\n",
+      "2025-01-20 16:03:40,227 - BERTopic - Cluster - Start clustering the reduced embeddings\n",
+      "2025-01-20 16:03:40,253 - BERTopic - Cluster - Completed ✓\n",
+      "2025-01-20 16:03:40,261 - BERTopic - Representation - Extracting topics from clusters using representation models.\n",
+      "2025-01-20 16:04:04,727 - BERTopic - Representation - Completed ✓\n",
+      "\u001B[32m2025-01-20 16:04:04.818\u001B[0m | \u001B[34m\u001B[1mDEBUG   \u001B[0m | \u001B[36mbertrend.BERTopicModel\u001B[0m:\u001B[36mfit\u001B[0m:\u001B[36m231\u001B[0m - \u001B[34m\u001B[1m\tReducing outliers\u001B[0m\n",
+      "2025-01-20 16:04:04,823 - BERTopic - WARNING: Using a custom list of topic assignments may lead to errors if topic reduction techniques are used afterwards. Make sure that manually assigning topics is the last step in the pipeline.Note that topic embeddings will also be created through weightedc-TF-IDF embeddings instead of centroid embeddings.\n",
+      "\u001B[32m2025-01-20 16:04:18.895\u001B[0m | \u001B[32m\u001B[1mSUCCESS \u001B[0m | \u001B[36mbertrend.BERTopicModel\u001B[0m:\u001B[36mfit\u001B[0m:\u001B[36m259\u001B[0m - \u001B[32m\u001B[1m\tBERTopic model fitted successfully\u001B[0m\n",
+      "\u001B[32m2025-01-20 16:04:18.898\u001B[0m | \u001B[34m\u001B[1mDEBUG   \u001B[0m | \u001B[36mbertrend.BERTrend\u001B[0m:\u001B[36m_train_by_period\u001B[0m:\u001B[36m158\u001B[0m - \u001B[34m\u001B[1mTopic model created successfully\u001B[0m\n",
+      "\u001B[32m2025-01-20 16:04:18.951\u001B[0m | \u001B[34m\u001B[1mDEBUG   \u001B[0m | \u001B[36mbertrend.BERTrend\u001B[0m:\u001B[36mtrain_topic_models\u001B[0m:\u001B[36m244\u001B[0m - \u001B[34m\u001B[1mSuccessfully processed period: 2017-04-20 00:00:00\u001B[0m\n",
+      "\u001B[32m2025-01-20 16:04:18.954\u001B[0m | \u001B[1mINFO    \u001B[0m | \u001B[36mbertrend.BERTrend\u001B[0m:\u001B[36mtrain_topic_models\u001B[0m:\u001B[36m240\u001B[0m - \u001B[1mTraining topic model 5/6...\u001B[0m\n",
+      "\u001B[32m2025-01-20 16:04:18.956\u001B[0m | \u001B[34m\u001B[1mDEBUG   \u001B[0m | \u001B[36mbertrend.BERTrend\u001B[0m:\u001B[36m_train_by_period\u001B[0m:\u001B[36m148\u001B[0m - \u001B[34m\u001B[1mProcessing period: 2017-05-20 00:00:00\u001B[0m\n",
+      "\u001B[32m2025-01-20 16:04:18.959\u001B[0m | \u001B[34m\u001B[1mDEBUG   \u001B[0m | \u001B[36mbertrend.BERTrend\u001B[0m:\u001B[36m_train_by_period\u001B[0m:\u001B[36m149\u001B[0m - \u001B[34m\u001B[1mNumber of documents: 161\u001B[0m\n",
+      "\u001B[32m2025-01-20 16:04:18.960\u001B[0m | \u001B[34m\u001B[1mDEBUG   \u001B[0m | \u001B[36mbertrend.BERTrend\u001B[0m:\u001B[36m_train_by_period\u001B[0m:\u001B[36m151\u001B[0m - \u001B[34m\u001B[1mCreating topic model...\u001B[0m\n",
+      "\u001B[32m2025-01-20 16:04:18.961\u001B[0m | \u001B[34m\u001B[1mDEBUG   \u001B[0m | \u001B[36mbertrend.BERTopicModel\u001B[0m:\u001B[36mfit\u001B[0m:\u001B[36m212\u001B[0m - \u001B[34m\u001B[1m\tInitializing BERTopic model\u001B[0m\n",
+      "\u001B[32m2025-01-20 16:04:18.965\u001B[0m | \u001B[32m\u001B[1mSUCCESS \u001B[0m | \u001B[36mbertrend.BERTopicModel\u001B[0m:\u001B[36mfit\u001B[0m:\u001B[36m222\u001B[0m - \u001B[32m\u001B[1m\tBERTopic model instance created successfully\u001B[0m\n",
+      "\u001B[32m2025-01-20 16:04:18.969\u001B[0m | \u001B[34m\u001B[1mDEBUG   \u001B[0m | \u001B[36mbertrend.BERTopicModel\u001B[0m:\u001B[36mfit\u001B[0m:\u001B[36m224\u001B[0m - \u001B[34m\u001B[1m\tFitting BERTopic model\u001B[0m\n",
+      "2025-01-20 16:04:21,148 - BERTopic - Dimensionality - Fitting the dimensionality reduction algorithm\n",
+      "2025-01-20 16:04:21,368 - BERTopic - Dimensionality - Completed ✓\n",
+      "2025-01-20 16:04:21,368 - BERTopic - Cluster - Start clustering the reduced embeddings\n",
+      "2025-01-20 16:04:21,377 - BERTopic - Cluster - Completed ✓\n",
+      "2025-01-20 16:04:21,381 - BERTopic - Representation - Extracting topics from clusters using representation models.\n",
+      "2025-01-20 16:04:55,543 - BERTopic - Representation - Completed ✓\n",
+      "\u001B[32m2025-01-20 16:04:55.638\u001B[0m | \u001B[34m\u001B[1mDEBUG   \u001B[0m | \u001B[36mbertrend.BERTopicModel\u001B[0m:\u001B[36mfit\u001B[0m:\u001B[36m231\u001B[0m - \u001B[34m\u001B[1m\tReducing outliers\u001B[0m\n",
+      "2025-01-20 16:04:55,642 - BERTopic - WARNING: Using a custom list of topic assignments may lead to errors if topic reduction techniques are used afterwards. Make sure that manually assigning topics is the last step in the pipeline.Note that topic embeddings will also be created through weightedc-TF-IDF embeddings instead of centroid embeddings.\n",
+      "\u001B[32m2025-01-20 16:05:25.159\u001B[0m | \u001B[32m\u001B[1mSUCCESS \u001B[0m | \u001B[36mbertrend.BERTopicModel\u001B[0m:\u001B[36mfit\u001B[0m:\u001B[36m259\u001B[0m - \u001B[32m\u001B[1m\tBERTopic model fitted successfully\u001B[0m\n",
+      "\u001B[32m2025-01-20 16:05:25.161\u001B[0m | \u001B[34m\u001B[1mDEBUG   \u001B[0m | \u001B[36mbertrend.BERTrend\u001B[0m:\u001B[36m_train_by_period\u001B[0m:\u001B[36m158\u001B[0m - \u001B[34m\u001B[1mTopic model created successfully\u001B[0m\n",
+      "\u001B[32m2025-01-20 16:05:25.192\u001B[0m | \u001B[34m\u001B[1mDEBUG   \u001B[0m | \u001B[36mbertrend.BERTrend\u001B[0m:\u001B[36mtrain_topic_models\u001B[0m:\u001B[36m244\u001B[0m - \u001B[34m\u001B[1mSuccessfully processed period: 2017-05-20 00:00:00\u001B[0m\n",
+      "\u001B[32m2025-01-20 16:05:25.194\u001B[0m | \u001B[1mINFO    \u001B[0m | \u001B[36mbertrend.BERTrend\u001B[0m:\u001B[36mtrain_topic_models\u001B[0m:\u001B[36m240\u001B[0m - \u001B[1mTraining topic model 6/6...\u001B[0m\n",
+      "\u001B[32m2025-01-20 16:05:25.194\u001B[0m | \u001B[34m\u001B[1mDEBUG   \u001B[0m | \u001B[36mbertrend.BERTrend\u001B[0m:\u001B[36m_train_by_period\u001B[0m:\u001B[36m148\u001B[0m - \u001B[34m\u001B[1mProcessing period: 2017-06-19 00:00:00\u001B[0m\n",
+      "\u001B[32m2025-01-20 16:05:25.195\u001B[0m | \u001B[34m\u001B[1mDEBUG   \u001B[0m | \u001B[36mbertrend.BERTrend\u001B[0m:\u001B[36m_train_by_period\u001B[0m:\u001B[36m149\u001B[0m - \u001B[34m\u001B[1mNumber of documents: 232\u001B[0m\n",
+      "\u001B[32m2025-01-20 16:05:25.196\u001B[0m | \u001B[34m\u001B[1mDEBUG   \u001B[0m | \u001B[36mbertrend.BERTrend\u001B[0m:\u001B[36m_train_by_period\u001B[0m:\u001B[36m151\u001B[0m - \u001B[34m\u001B[1mCreating topic model...\u001B[0m\n",
+      "\u001B[32m2025-01-20 16:05:25.196\u001B[0m | \u001B[34m\u001B[1mDEBUG   \u001B[0m | \u001B[36mbertrend.BERTopicModel\u001B[0m:\u001B[36mfit\u001B[0m:\u001B[36m212\u001B[0m - \u001B[34m\u001B[1m\tInitializing BERTopic model\u001B[0m\n",
+      "\u001B[32m2025-01-20 16:05:25.197\u001B[0m | \u001B[32m\u001B[1mSUCCESS \u001B[0m | \u001B[36mbertrend.BERTopicModel\u001B[0m:\u001B[36mfit\u001B[0m:\u001B[36m222\u001B[0m - \u001B[32m\u001B[1m\tBERTopic model instance created successfully\u001B[0m\n",
+      "\u001B[32m2025-01-20 16:05:25.198\u001B[0m | \u001B[34m\u001B[1mDEBUG   \u001B[0m | \u001B[36mbertrend.BERTopicModel\u001B[0m:\u001B[36mfit\u001B[0m:\u001B[36m224\u001B[0m - \u001B[34m\u001B[1m\tFitting BERTopic model\u001B[0m\n",
+      "2025-01-20 16:05:26,934 - BERTopic - Dimensionality - Fitting the dimensionality reduction algorithm\n",
+      "2025-01-20 16:05:27,265 - BERTopic - Dimensionality - Completed ✓\n",
+      "2025-01-20 16:05:27,265 - BERTopic - Cluster - Start clustering the reduced embeddings\n",
+      "2025-01-20 16:05:27,276 - BERTopic - Cluster - Completed ✓\n",
+      "2025-01-20 16:05:27,278 - BERTopic - Representation - Extracting topics from clusters using representation models.\n",
+      "2025-01-20 16:06:25,302 - BERTopic - Representation - Completed ✓\n",
+      "\u001B[32m2025-01-20 16:06:25.384\u001B[0m | \u001B[34m\u001B[1mDEBUG   \u001B[0m | \u001B[36mbertrend.BERTopicModel\u001B[0m:\u001B[36mfit\u001B[0m:\u001B[36m231\u001B[0m - \u001B[34m\u001B[1m\tReducing outliers\u001B[0m\n",
+      "2025-01-20 16:06:25,388 - BERTopic - WARNING: Using a custom list of topic assignments may lead to errors if topic reduction techniques are used afterwards. Make sure that manually assigning topics is the last step in the pipeline.Note that topic embeddings will also be created through weightedc-TF-IDF embeddings instead of centroid embeddings.\n",
+      "\u001B[32m2025-01-20 16:07:11.512\u001B[0m | \u001B[32m\u001B[1mSUCCESS \u001B[0m | \u001B[36mbertrend.BERTopicModel\u001B[0m:\u001B[36mfit\u001B[0m:\u001B[36m259\u001B[0m - \u001B[32m\u001B[1m\tBERTopic model fitted successfully\u001B[0m\n",
+      "\u001B[32m2025-01-20 16:07:11.516\u001B[0m | \u001B[34m\u001B[1mDEBUG   \u001B[0m | \u001B[36mbertrend.BERTrend\u001B[0m:\u001B[36m_train_by_period\u001B[0m:\u001B[36m158\u001B[0m - \u001B[34m\u001B[1mTopic model created successfully\u001B[0m\n",
+      "\u001B[32m2025-01-20 16:07:11.563\u001B[0m | \u001B[34m\u001B[1mDEBUG   \u001B[0m | \u001B[36mbertrend.BERTrend\u001B[0m:\u001B[36mtrain_topic_models\u001B[0m:\u001B[36m244\u001B[0m - \u001B[34m\u001B[1mSuccessfully processed period: 2017-06-19 00:00:00\u001B[0m\n",
+      "\u001B[32m2025-01-20 16:07:11.566\u001B[0m | \u001B[32m\u001B[1mSUCCESS \u001B[0m | \u001B[36mbertrend.BERTrend\u001B[0m:\u001B[36mtrain_topic_models\u001B[0m:\u001B[36m268\u001B[0m - \u001B[32m\u001B[1mFinished training all topic models\u001B[0m\n"
+     ]
+    }
+   ],
+   "execution_count": 19
+  },
+  {
+   "metadata": {},
+   "cell_type": "markdown",
+   "source": "### 5. (Optional) Save trained_models",
+   "id": "855c151c8cd9f93d"
+  },
+  {
+   "metadata": {
+    "ExecuteTime": {
+     "end_time": "2025-01-20T15:07:12.523789Z",
+     "start_time": "2025-01-20T15:07:12.377692Z"
+    }
+   },
+   "cell_type": "code",
+   "source": "bertrend.save_models()",
+   "id": "2a54146c6b5f591b",
+   "outputs": [
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "\u001B[32m2025-01-20 16:07:12.514\u001B[0m | \u001B[1mINFO    \u001B[0m | \u001B[36mbertrend.BERTrend\u001B[0m:\u001B[36msave_models\u001B[0m:\u001B[36m652\u001B[0m - \u001B[1mModels saved to: /home/jerome/dev/cache/bertrend/models\u001B[0m\n"
+     ]
+    }
+   ],
+   "execution_count": 21
+  },
+  {
+   "metadata": {},
+   "cell_type": "markdown",
+   "source": "### 6. Merge models",
+   "id": "6d76285c9be44e92"
+  },
+  {
+   "metadata": {
+    "ExecuteTime": {
+     "end_time": "2025-01-20T15:07:13.179985Z",
+     "start_time": "2025-01-20T15:07:12.853779Z"
+    }
+   },
+   "cell_type": "code",
+   "source": "bertrend.merge_all_models()",
+   "id": "a95fd062728118e9",
+   "outputs": [
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "\u001B[32m2025-01-20 16:07:13.172\u001B[0m | \u001B[32m\u001B[1mSUCCESS \u001B[0m | \u001B[36mbertrend.BERTrend\u001B[0m:\u001B[36mmerge_all_models\u001B[0m:\u001B[36m351\u001B[0m - \u001B[32m\u001B[1mAll models merged successfully\u001B[0m\n"
+     ]
+    }
+   ],
+   "execution_count": 23
+  },
+  {
+   "metadata": {},
+   "cell_type": "markdown",
+   "source": "### 7. Calculate signal popularity",
+   "id": "d5cbf21f65102cd5"
+  },
+  {
+   "metadata": {
+    "ExecuteTime": {
+     "end_time": "2025-01-20T15:07:13.819430Z",
+     "start_time": "2025-01-20T15:07:13.579473Z"
+    }
+   },
+   "cell_type": "code",
+   "source": "bertrend.calculate_signal_popularity()",
+   "id": "94859eb8b9944224",
+   "outputs": [],
+   "execution_count": 25
+  },
+  {
+   "metadata": {
+    "ExecuteTime": {
+     "end_time": "2025-01-20T15:07:13.939621Z",
+     "start_time": "2025-01-20T15:07:13.854683Z"
+    }
+   },
+   "cell_type": "code",
+   "source": [
+    "# List of topic models\n",
+    "bertrend.topic_models"
+   ],
+   "id": "7a989f7d97083e70",
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "{Timestamp('2017-01-20 00:00:00'): <bertopic._bertopic.BERTopic at 0x760aff0ede50>,\n",
+       " Timestamp('2017-02-19 00:00:00'): <bertopic._bertopic.BERTopic at 0x760c231cff50>,\n",
+       " Timestamp('2017-03-21 00:00:00'): <bertopic._bertopic.BERTopic at 0x760c20863e90>,\n",
+       " Timestamp('2017-04-20 00:00:00'): <bertopic._bertopic.BERTopic at 0x760c15964f10>,\n",
+       " Timestamp('2017-05-20 00:00:00'): <bertopic._bertopic.BERTopic at 0x760c23a23350>,\n",
+       " Timestamp('2017-06-19 00:00:00'): <bertopic._bertopic.BERTopic at 0x760c209eab90>}"
+      ]
+     },
+     "execution_count": 26,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "execution_count": 26
+  },
+  {
+   "metadata": {
+    "ExecuteTime": {
+     "end_time": "2025-01-20T15:07:14.331855Z",
+     "start_time": "2025-01-20T15:07:14.116053Z"
+    }
+   },
+   "cell_type": "code",
+   "source": [
+    "window_size = 30\n",
+    "\n",
+    "# List of strong and weak signals over time\n",
+    "for ts in bertrend.topic_models.keys():\n",
+    "    print(ts)\n",
+    "    noise_topics_df, weak_signal_topics_df, strong_signal_topics_df = bertrend.classify_signals(window_size, ts)\n",
+    "    if not weak_signal_topics_df.empty:\n",
+    "        print(\"Weak signals\")\n",
+    "        display(weak_signal_topics_df[[\"Topic\",\"Representation\"]].head(5))\n",
+    "    if not strong_signal_topics_df.empty:\n",
+    "        print(\"Strong signals\")\n",
+    "        display(strong_signal_topics_df[[\"Topic\",\"Representation\"]].head(5))\n",
+    "    print()\n"
+   ],
+   "id": "dcba20eeaef6b472",
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "2017-01-20 00:00:00\n",
+      "Strong signals\n"
+     ]
+    },
+    {
+     "data": {
+      "text/plain": [
+       "   Topic                                     Representation\n",
+       "0      0  healthcare_getting_together_disaster_new_despi...\n",
+       "1      1  https_great_at_meeting_amp_american_trump_we_f..."
+      ],
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>Topic</th>\n",
+       "      <th>Representation</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>0</th>\n",
+       "      <td>0</td>\n",
+       "      <td>healthcare_getting_together_disaster_new_despi...</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>1</th>\n",
+       "      <td>1</td>\n",
+       "      <td>https_great_at_meeting_amp_american_trump_we_f...</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "</div>"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\n",
+      "2017-02-19 00:00:00\n",
+      "Weak signals\n"
+     ]
+    },
+    {
+     "data": {
+      "text/plain": [
+       "   Topic                                     Representation\n",
+       "0      0  win_republicans_immigration_illegal_dems_until...\n",
+       "1      1  https_our_today_jobs_american_great_at_preside..."
+      ],
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>Topic</th>\n",
+       "      <th>Representation</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>0</th>\n",
+       "      <td>0</td>\n",
+       "      <td>win_republicans_immigration_illegal_dems_until...</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>1</th>\n",
+       "      <td>1</td>\n",
+       "      <td>https_our_today_jobs_american_great_at_preside...</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "</div>"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\n",
+      "2017-03-21 00:00:00\n",
+      "Weak signals\n"
+     ]
+    },
+    {
+     "data": {
+      "text/plain": [
+       "   Topic                                     Representation\n",
+       "0      4  night_interviewed_saturday_foxnews_next_tax_me...\n",
+       "1     10  healthcare_obamacare_plan_dead_lie_great_compa..."
+      ],
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>Topic</th>\n",
+       "      <th>Representation</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>0</th>\n",
+       "      <td>4</td>\n",
+       "      <td>night_interviewed_saturday_foxnews_next_tax_me...</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>1</th>\n",
+       "      <td>10</td>\n",
+       "      <td>healthcare_obamacare_plan_dead_lie_great_compa...</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "</div>"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Strong signals\n"
+     ]
+    },
+    {
+     "data": {
+      "text/plain": [
+       "   Topic                                     Representation\n",
+       "0      0  fake_news_said_possible_amp_yates_while_china_...\n",
+       "1      1  https_today_order_at_presidential_foxandfriend...\n",
+       "2      2  democrats_our_wall_insurance_companies_governm..."
+      ],
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>Topic</th>\n",
+       "      <th>Representation</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>0</th>\n",
+       "      <td>0</td>\n",
+       "      <td>fake_news_said_possible_amp_yates_while_china_...</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>1</th>\n",
+       "      <td>1</td>\n",
+       "      <td>https_today_order_at_presidential_foxandfriend...</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2</th>\n",
+       "      <td>2</td>\n",
+       "      <td>democrats_our_wall_insurance_companies_governm...</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "</div>"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\n",
+      "2017-04-20 00:00:00\n",
+      "Weak signals\n"
+     ]
+    },
+    {
+     "data": {
+      "text/plain": [
+       "   Topic                                     Representation\n",
+       "0      2  help_country_our_justice_must_before_peace_his...\n",
+       "1      4  g7_jobs_terrorism_italy_trip_melania_security_...\n",
+       "2      8  nato_hard_east_saudi_trying_countries_2016_sho...\n",
+       "3      9  healthcare_cuts_obamacare_montana_republican_w..."
+      ],
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>Topic</th>\n",
+       "      <th>Representation</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>0</th>\n",
+       "      <td>2</td>\n",
+       "      <td>help_country_our_justice_must_before_peace_his...</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>1</th>\n",
+       "      <td>4</td>\n",
+       "      <td>g7_jobs_terrorism_italy_trip_melania_security_...</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2</th>\n",
+       "      <td>8</td>\n",
+       "      <td>nato_hard_east_saudi_trying_countries_2016_sho...</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>3</th>\n",
+       "      <td>9</td>\n",
+       "      <td>healthcare_cuts_obamacare_montana_republican_w...</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "</div>"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Strong signals\n"
+     ]
+    },
+    {
+     "data": {
+      "text/plain": [
+       "   Topic                                     Representation\n",
+       "0      0  news_media_dems_they_now_no_london_phony_faken...\n",
+       "1      1  deal_workers_trump_again_promise_realdonaldtru..."
+      ],
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>Topic</th>\n",
+       "      <th>Representation</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>0</th>\n",
+       "      <td>0</td>\n",
+       "      <td>news_media_dems_they_now_no_london_phony_faken...</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>1</th>\n",
+       "      <td>1</td>\n",
+       "      <td>deal_workers_trump_again_promise_realdonaldtru...</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "</div>"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\n",
+      "2017-05-20 00:00:00\n",
+      "Weak signals\n"
+     ]
+    },
+    {
+     "data": {
+      "text/plain": [
+       "   Topic                                     Representation\n",
+       "0     12  gop_georgia_foxnews_steel_congressional_foxand..."
+      ],
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>Topic</th>\n",
+       "      <th>Representation</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>0</th>\n",
+       "      <td>12</td>\n",
+       "      <td>gop_georgia_foxnews_steel_congressional_foxand...</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "</div>"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Strong signals\n"
+     ]
+    },
+    {
+     "data": {
+      "text/plain": [
+       "   Topic                                     Representation\n",
+       "0      0  fbi_cia_asked_disgraceful_hoax_refused_seat_ta...\n",
+       "1      1  realdonaldtrump_potus_rt_weekly_friends_trump_...\n",
+       "2      3  obama_meddling_election_nothing_2016_russian_w...\n",
+       "3      7  south_deals_uswomensopen_women_meetings_moon_m...\n",
+       "4      9  democrats_healthcare_would_dems_senate_failed_..."
+      ],
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>Topic</th>\n",
+       "      <th>Representation</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>0</th>\n",
+       "      <td>0</td>\n",
+       "      <td>fbi_cia_asked_disgraceful_hoax_refused_seat_ta...</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>1</th>\n",
+       "      <td>1</td>\n",
+       "      <td>realdonaldtrump_potus_rt_weekly_friends_trump_...</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2</th>\n",
+       "      <td>3</td>\n",
+       "      <td>obama_meddling_election_nothing_2016_russian_w...</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>3</th>\n",
+       "      <td>7</td>\n",
+       "      <td>south_deals_uswomensopen_women_meetings_moon_m...</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>4</th>\n",
+       "      <td>9</td>\n",
+       "      <td>democrats_healthcare_would_dems_senate_failed_...</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "</div>"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\n",
+      "2017-06-19 00:00:00\n",
+      "Weak signals\n"
+     ]
+    },
+    {
+     "data": {
+      "text/plain": [
+       "   Topic                                     Representation\n",
+       "0      3  obama_meddling_election_nothing_2016_russian_w...\n",
+       "1      7  south_deals_uswomensopen_women_meetings_moon_m...\n",
+       "2      9  democrats_healthcare_would_dems_senate_failed_...\n",
+       "3     12  gop_georgia_foxnews_steel_congressional_foxand...\n",
+       "4     13  market_jobs_another_deal_syria_like_border_ste..."
+      ],
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>Topic</th>\n",
+       "      <th>Representation</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>0</th>\n",
+       "      <td>3</td>\n",
+       "      <td>obama_meddling_election_nothing_2016_russian_w...</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>1</th>\n",
+       "      <td>7</td>\n",
+       "      <td>south_deals_uswomensopen_women_meetings_moon_m...</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2</th>\n",
+       "      <td>9</td>\n",
+       "      <td>democrats_healthcare_would_dems_senate_failed_...</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>3</th>\n",
+       "      <td>12</td>\n",
+       "      <td>gop_georgia_foxnews_steel_congressional_foxand...</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>4</th>\n",
+       "      <td>13</td>\n",
+       "      <td>market_jobs_another_deal_syria_like_border_ste...</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "</div>"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Strong signals\n"
+     ]
+    },
+    {
+     "data": {
+      "text/plain": [
+       "   Topic                                     Representation\n",
+       "0      0  fbi_cia_asked_disgraceful_hoax_refused_seat_ta...\n",
+       "1      1  realdonaldtrump_potus_rt_weekly_friends_trump_..."
+      ],
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>Topic</th>\n",
+       "      <th>Representation</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>0</th>\n",
+       "      <td>0</td>\n",
+       "      <td>fbi_cia_asked_disgraceful_hoax_refused_seat_ta...</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>1</th>\n",
+       "      <td>1</td>\n",
+       "      <td>realdonaldtrump_potus_rt_weekly_friends_trump_...</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "</div>"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\n"
+     ]
+    }
+   ],
+   "execution_count": 27
+  },
+  {
+   "metadata": {
+    "ExecuteTime": {
+     "end_time": "2025-01-26T19:58:55.911033Z",
+     "start_time": "2025-01-26T19:58:55.907556Z"
+    }
+   },
+   "cell_type": "code",
+   "source": [
+    "# selection of one particular timestamp to look at\n",
+    "selected_timestamp = Timestamp('2017-04-20 00:00:00')\n",
+    "selected_topic_model = bertrend.topic_models.get(selected_timestamp)\n"
+   ],
+   "id": "4582c0cb6c1f6186",
+   "outputs": [],
+   "execution_count": 1
+  },
+  {
+   "metadata": {},
+   "cell_type": "markdown",
+   "source": "### Get topic description\n",
+   "id": "e31285ee5eb9d9f6"
+  },
+  {
+   "metadata": {
+    "ExecuteTime": {
+     "end_time": "2025-01-20T15:09:22.901513Z",
+     "start_time": "2025-01-20T15:09:22.731495Z"
+    }
+   },
+   "cell_type": "code",
+   "source": "desc = generate_topic_description(topic_model=selected_topic_model, topic_number=5, filtered_docs=df, language_code=\"en\")\n",
+   "id": "c945b625df18d881",
+   "outputs": [
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "\u001B[32m2025-01-20 16:09:22.895\u001B[0m | \u001B[31m\u001B[1mERROR   \u001B[0m | \u001B[36mbertrend.topic_analysis.topic_description\u001B[0m:\u001B[36mgenerate_topic_description\u001B[0m:\u001B[36m51\u001B[0m - \u001B[31m\u001B[1mError calling OpenAI API: ' \"title\"'\u001B[0m\n"
+     ]
+    },
+    {
+     "data": {
+      "text/plain": [
+       "'Error generating description: \\' \"title\"\\''"
+      ]
+     },
+     "execution_count": 38,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "execution_count": 38
+  },
+  {
+   "metadata": {
+    "ExecuteTime": {
+     "end_time": "2025-01-20T15:23:13.250764Z",
+     "start_time": "2025-01-20T15:23:11.647929Z"
+    }
+   },
+   "cell_type": "code",
+   "outputs": [
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "\u001B[32m2025-01-20 16:23:13.247\u001B[0m | \u001B[34m\u001B[1mDEBUG   \u001B[0m | \u001B[36mbertrend.llm_utils.openai_client\u001B[0m:\u001B[36mgenerate_from_history\u001B[0m:\u001B[36m128\u001B[0m - \u001B[34m\u001B[1mAPI returned: ChatCompletion(id='chatcmpl-ArnuKCesKptMpkREbYsHA1tBs6qI2', choices=[Choice(finish_reason='stop', index=0, logprobs=None, message=ChatCompletionMessage(content='{\\n  \"title\": \"Inauguration Night: Power Shift and Future Meetings\",\\n  \"description\": \"Ce thème explore la dynamique de la nuit d\\'inauguration, marquée par des interviews et des discussions sur les prochaines étapes du gouvernement. Les événements de samedi, notamment sur Fox News, mettent en lumière les enjeux fiscaux et les réunions à venir avec des représentants étrangers. L\\'accent est mis sur le retour du pouvoir aux citoyens américains, soulignant l\\'importance des visites à domicile et des interactions directes. Ce moment symbolique représente un tournant dans la politique américaine, où les attentes et les promesses de changement sont au cœur des préoccupations.\"\\n}', refusal=None, role='assistant', audio=None, function_call=None, tool_calls=None), content_filter_results={'hate': {'filtered': False, 'severity': 'safe'}, 'protected_material_code': {'filtered': False, 'detected': False}, 'protected_material_text': {'filtered': False, 'detected': False}, 'self_harm': {'filtered': False, 'severity': 'safe'}, 'sexual': {'filtered': False, 'severity': 'safe'}, 'violence': {'filtered': False, 'severity': 'safe'}})], created=1737386592, model='gpt-4o-mini-2024-07-18', object='chat.completion', service_tier=None, system_fingerprint='fp_5154047bf2', usage=CompletionUsage(completion_tokens=136, prompt_tokens=240, total_tokens=376, completion_tokens_details=CompletionTokensDetails(accepted_prediction_tokens=0, audio_tokens=0, reasoning_tokens=0, rejected_prediction_tokens=0), prompt_tokens_details=PromptTokensDetails(audio_tokens=0, cached_tokens=0)), prompt_filter_results=[{'prompt_index': 0, 'content_filter_results': {'hate': {'filtered': False, 'severity': 'safe'}, 'jailbreak': {'filtered': False, 'detected': False}, 'self_harm': {'filtered': False, 'severity': 'safe'}, 'sexual': {'filtered': False, 'severity': 'safe'}, 'violence': {'filtered': False, 'severity': 'safe'}}}])\u001B[0m\n"
+     ]
+    }
+   ],
+   "execution_count": 68,
+   "source": "desc[\"title\"]",
+   "id": "e61b903379a0fbd1"
+  },
+  {
+   "metadata": {},
+   "cell_type": "code",
+   "outputs": [],
+   "execution_count": null,
+   "source": "desc[\"description\"]",
+   "id": "c4dbdd4998e0956a"
+  },
+  {
+   "metadata": {},
+   "cell_type": "markdown",
+   "source": "### Get topic analysis",
+   "id": "e27e46b0adc6e88b"
+  },
+  {
+   "metadata": {
+    "ExecuteTime": {
+     "end_time": "2025-01-20T15:07:17.430211Z",
+     "start_time": "2025-01-20T15:07:16.745674Z"
+    }
+   },
+   "cell_type": "code",
+   "source": "summary, analysis, formatted_html = analyze_signal(bertrend, 7, selected_timestamp)",
+   "id": "cdc44ef6f558aac0",
+   "outputs": [
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "\u001B[32m2025-01-20 16:07:16.814\u001B[0m | \u001B[31m\u001B[1mERROR   \u001B[0m | \u001B[36mbertrend.trend_analysis.weak_signals\u001B[0m:\u001B[36manalyze_signal\u001B[0m:\u001B[36m416\u001B[0m - \u001B[31m\u001B[1mNo data available for topic 7 within the specified date range. Please enter a valid topic number.\u001B[0m\n"
+     ]
+    },
+    {
+     "ename": "Exception",
+     "evalue": "No data available for topic 7 within the specified date range. Please enter a valid topic number.",
+     "output_type": "error",
+     "traceback": [
+      "\u001B[0;31m---------------------------------------------------------------------------\u001B[0m",
+      "\u001B[0;31mException\u001B[0m                                 Traceback (most recent call last)",
+      "Cell \u001B[0;32mIn[33], line 1\u001B[0m\n\u001B[0;32m----> 1\u001B[0m summary, analysis, formatted_html \u001B[38;5;241m=\u001B[39m \u001B[43manalyze_signal\u001B[49m\u001B[43m(\u001B[49m\n\u001B[1;32m      2\u001B[0m \u001B[43m            \u001B[49m\u001B[38;5;241;43m7\u001B[39;49m\u001B[43m,\u001B[49m\n\u001B[1;32m      3\u001B[0m \u001B[43m            \u001B[49m\u001B[43mselected_timestamp\u001B[49m\u001B[43m,\u001B[49m\n\u001B[1;32m      4\u001B[0m \u001B[43m            \u001B[49m\u001B[43mbertrend\u001B[49m\u001B[38;5;241;43m.\u001B[39;49m\u001B[43mall_merge_histories_df\u001B[49m\u001B[43m,\u001B[49m\n\u001B[1;32m      5\u001B[0m \u001B[43m            \u001B[49m\u001B[43mbertrend\u001B[49m\u001B[38;5;241;43m.\u001B[39;49m\u001B[43mconfig\u001B[49m\u001B[43m[\u001B[49m\u001B[38;5;124;43m\"\u001B[39;49m\u001B[38;5;124;43mgranularity\u001B[39;49m\u001B[38;5;124;43m\"\u001B[39;49m\u001B[43m]\u001B[49m\u001B[43m,\u001B[49m\n\u001B[1;32m      6\u001B[0m \u001B[43m            \u001B[49m\u001B[38;5;124;43m\"\u001B[39;49m\u001B[38;5;124;43mEnglish\u001B[39;49m\u001B[38;5;124;43m\"\u001B[39;49m\u001B[43m,\u001B[49m\n\u001B[1;32m      7\u001B[0m \u001B[43m        \u001B[49m\u001B[43m)\u001B[49m\n",
+      "File \u001B[0;32m~/dev/BERTrend/bertrend/trend_analysis/weak_signals.py:417\u001B[0m, in \u001B[0;36manalyze_signal\u001B[0;34m(topic_number, current_date, all_merge_histories_df, granularity, language)\u001B[0m\n\u001B[1;32m    415\u001B[0m error_msg \u001B[38;5;241m=\u001B[39m \u001B[38;5;124mf\u001B[39m\u001B[38;5;124m\"\u001B[39m\u001B[38;5;124mNo data available for topic \u001B[39m\u001B[38;5;132;01m{\u001B[39;00mtopic_number\u001B[38;5;132;01m}\u001B[39;00m\u001B[38;5;124m within the specified date range. Please enter a valid topic number.\u001B[39m\u001B[38;5;124m\"\u001B[39m\n\u001B[1;32m    416\u001B[0m logger\u001B[38;5;241m.\u001B[39merror(error_msg)\n\u001B[0;32m--> 417\u001B[0m \u001B[38;5;28;01mraise\u001B[39;00m \u001B[38;5;167;01mException\u001B[39;00m(error_msg)\n",
+      "\u001B[0;31mException\u001B[0m: No data available for topic 7 within the specified date range. Please enter a valid topic number."
+     ]
+    }
+   ],
+   "execution_count": 33
+  },
+  {
+   "metadata": {
+    "ExecuteTime": {
+     "end_time": "2025-01-20T15:07:17.433647087Z",
+     "start_time": "2025-01-19T14:38:52.904786Z"
+    }
+   },
+   "cell_type": "code",
+   "source": [
+    "from IPython.display import display, HTML\n",
+    "display(HTML(formatted_html))"
+   ],
+   "id": "531558c5b600cb30",
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "<IPython.core.display.HTML object>"
+      ],
+      "text/html": [
+       "```html\n",
+       "<!--\n",
+       "  ~ Copyright (c) 2024, RTE (https://www.rte-france.com)\n",
+       "  ~ See AUTHORS.txt\n",
+       "  ~ SPDX-License-Identifier: MPL-2.0\n",
+       "  ~ This file is part of BERTrend.\n",
+       "  -->\n",
+       "\n",
+       "<!DOCTYPE html>\n",
+       "<html lang=\"en\">\n",
+       "<head>\n",
+       "    <meta charset=\"UTF-8\">\n",
+       "    <meta name=\"viewport\" content=\"width=device-width, initial-scale=1.0\">\n",
+       "    <title>Tensions and Diplomatic Engagements with North Korea Analysis Dashboard</title>\n",
+       "    <style>\n",
+       "        body {\n",
+       "            font-family: Arial, sans-serif;\n",
+       "            line-height: 1.6;\n",
+       "            color: #333;\n",
+       "            background-color: #f0f0f0;\n",
+       "            margin: 0;\n",
+       "            padding: 20px;\n",
+       "        }\n",
+       "        .dashboard {\n",
+       "            display: flex;\n",
+       "            gap: 20px;\n",
+       "            max-width: 1400px;\n",
+       "            margin: 0 auto;\n",
+       "        }\n",
+       "        .column {\n",
+       "            flex: 1;\n",
+       "            background-color: #fff;\n",
+       "            border-radius: 8px;\n",
+       "            padding: 20px;\n",
+       "            box-shadow: 0 2px 4px rgba(0,0,0,0.1);\n",
+       "        }\n",
+       "        h2 {\n",
+       "            color: #3498db;\n",
+       "            border-bottom: 2px solid #3498db;\n",
+       "            padding-bottom: 10px;\n",
+       "        }\n",
+       "        .event, .section {\n",
+       "            background-color: #f9f9f9;\n",
+       "            border-radius: 4px;\n",
+       "            padding: 15px;\n",
+       "            margin-bottom: 15px;\n",
+       "        }\n",
+       "        .event h3, .section h3 {\n",
+       "            margin-top: 0;\n",
+       "            color: #2980b9;\n",
+       "        }\n",
+       "        .whats-new {\n",
+       "            background-color: #fffde7;\n",
+       "            border-left: 4px solid #fbc02d;\n",
+       "            padding: 10px;\n",
+       "            margin-top: 10px;\n",
+       "        }\n",
+       "        .grid {\n",
+       "            display: grid;\n",
+       "            grid-template-columns: 1fr 1fr;\n",
+       "            gap: 15px;\n",
+       "        }\n",
+       "        ul {\n",
+       "            padding-left: 20px;\n",
+       "            margin: 0;\n",
+       "        }\n",
+       "        li {\n",
+       "            margin-bottom: 5px;\n",
+       "        }\n",
+       "        .scenario {\n",
+       "            border-left: 4px solid;\n",
+       "            padding-left: 10px;\n",
+       "            margin-bottom: 15px;\n",
+       "        }\n",
+       "        .optimistic {\n",
+       "            border-color: #2ecc71;\n",
+       "        }\n",
+       "        .pessimistic {\n",
+       "            border-color: #e74c3c;\n",
+       "        }\n",
+       "    </style>\n",
+       "</head>\n",
+       "<body>\n",
+       "    <div class=\"dashboard\">\n",
+       "        <div class=\"column\">\n",
+       "            <h2>Topic Evolution</h2>\n",
+       "            <div class=\"event\">\n",
+       "                <h3>Date: May 20, 2017: Tensions and Diplomatic Engagements with North Korea</h3>\n",
+       "                <p>- John McCain and Lindsey Graham criticized for being weak on immigration in a joint statement.<br>\n",
+       "                - President Trump hosted Japanese Prime Minister Shinzo Abe at Mar-a-Lago, emphasizing strong bilateral relations.<br>\n",
+       "                - A joint statement regarding North Korea was released following productive talks with Prime Minister Abe.</p>\n",
+       "                <div class=\"whats-new\">\n",
+       "                    <strong>What's New:</strong> This period introduces a more aggressive U.S. approach to North Korea, contrasting with the previous focus on diplomatic engagement. Additionally, the emphasis on trade negotiations signals a growing concern over economic relationships and their impact on national security.\n",
+       "                </div>\n",
+       "            </div>\n",
+       "            <div class=\"event\">\n",
+       "                <h3>Date: May 22, 2017: Strategic Shifts in U.S. Foreign Policy and Trade</h3>\n",
+       "                <p>- President Trump declared the end of strategic patience with North Korea, signaling a shift in U.S. policy.<br>\n",
+       "                - A productive meeting with South Korean President Moon Jae-in focused on North Korea and trade deals.<br>\n",
+       "                - The G20 Summit discussions included trade negotiations and international relations, particularly with China.</p>\n",
+       "                <div class=\"whats-new\">\n",
+       "                    <strong>What's New:</strong> The transition from strategic patience to a more assertive stance on North Korea marks a significant shift in U.S. foreign policy, potentially leading to increased tensions in the region.\n",
+       "                </div>\n",
+       "            </div>\n",
+       "            \n",
+       "            <h2>Evolution Scenarios</h2>\n",
+       "            <div class=\"scenario optimistic\">\n",
+       "                <h3>Optimistic Scenario</h3>\n",
+       "                <p>The U.S. could leverage its strengthened alliances to engage North Korea in meaningful negotiations, leading to denuclearization talks and a reduction in military tensions.</p>\n",
+       "                <ul>\n",
+       "                    <li>Successful diplomacy could lead to a more stable Asia-Pacific region.</li>\n",
+       "                    <li>Economic cooperation may enhance regional stability.</li>\n",
+       "                    <li>Strengthened alliances could deter North Korean provocations.</li>\n",
+       "                    <li>Improved economic conditions for all parties involved.</li>\n",
+       "                </ul>\n",
+       "            </div>\n",
+       "            <div class=\"scenario pessimistic\">\n",
+       "                <h3>Pessimistic Scenario</h3>\n",
+       "                <p>The aggressive stance could lead to miscalculations and an unintended military confrontation, resulting in significant loss of life and destabilization of the region.</p>\n",
+       "                <ul>\n",
+       "                    <li>Escalation to conflict could destabilize the Asia-Pacific region.</li>\n",
+       "                    <li>Increased isolationism may diminish U.S. global influence.</li>\n",
+       "                    <li>Failure to manage relations could lead to a vacuum filled by adversarial powers.</li>\n",
+       "                    <li>Humanitarian crises may arise from prolonged sanctions.</li>\n",
+       "                </ul>\n",
+       "            </div>\n",
+       "        </div>\n",
+       "        <div class=\"column\">\n",
+       "            <h2>Topic Analysis</h2>\n",
+       "            <div class=\"grid\">\n",
+       "                <div class=\"section\">\n",
+       "                    <h3>Short-term Implications</h3>\n",
+       "                    <ul>\n",
+       "                        <li>Geopolitical tensions could lead to immediate escalations in military posturing.</li>\n",
+       "                        <li>Economic repercussions may disrupt international trade, particularly with China.</li>\n",
+       "                        <li>Domestic political landscape may polarize further around immigration and foreign policy.</li>\n",
+       "                        <li>Increased military exercises in the region could provoke North Korea.</li>\n",
+       "                    </ul>\n",
+       "                </div>\n",
+       "                <div class=\"section\">\n",
+       "                    <h3>Long-term Implications</h3>\n",
+       "                    <ul>\n",
+       "                        <li>Strengthening ties with Japan and South Korea could lead to a cohesive security framework.</li>\n",
+       "                        <li>Normalization of military engagement may prioritize military readiness over diplomacy.</li>\n",
+       "                        <li>Economic isolation of North Korea could lead to long-term instability.</li>\n",
+       "                        <li>Potential regime change or humanitarian crises in North Korea.</li>\n",
+       "                    </ul>\n",
+       "                </div>\n",
+       "                <div class=\"section\">\n",
+       "                    <h3>Ripple Effects</h3>\n",
+       "                    <ul>\n",
+       "                        <li>Increased tensions may prompt neighboring countries to enhance military capabilities.</li>\n",
+       "                        <li>Humanitarian concerns could exacerbate issues in North Korea, leading to refugee flows.</li>\n",
+       "                        <li>Global trade dynamics may shift due to U.S. trade policy changes.</li>\n",
+       "                        <li>Potential realignments in global trade partnerships affecting economies worldwide.</li>\n",
+       "                    </ul>\n",
+       "                </div>\n",
+       "                <div class=\"section\">\n",
+       "                    <h3>Interconnections</h3>\n",
+       "                    <ul>\n",
+       "                        <li>Emerging technologies may drive investments in defense technologies.</li>\n",
+       "                        <li>Global supply chain resilience may lead to innovations in logistics.</li>\n",
+       "                        <li>Human rights advocacy could galvanize international organizations.</li>\n",
+       "                        <li>Increased awareness of humanitarian issues may influence U.S. policy changes.</li>\n",
+       "                    </ul>\n",
+       "                </div>\n",
+       "                <div class=\"section\">\n",
+       "                    <h3>Drivers</h3>\n",
+       "                    <ul>\n",
+       "                        <li>A strong political mandate could accelerate aggressive policies.</li>\n",
+       "                        <li>Public sentiment may bolster support for military readiness.</li>\n",
+       "                        <li>International support from allies could amplify U.S. efforts.</li>\n",
+       "                        <li>Growing concerns over national security could drive policy changes.</li>\n",
+       "                    </ul>\n",
+       "                </div>\n",
+       "                <div class=\"section\">\n",
+       "                    <h3>Inhibitors</h3>\n",
+       "                    <ul>\n",
+       "                        <li>Diplomatic resistance from countries like China and Russia may complicate efforts.</li>\n",
+       "                        <li>Economic consequences from trade disruptions could lead to political pressure.</li>\n",
+       "                        <li>Humanitarian concerns may lead to calls for more diplomatic engagement.</li>\n",
+       "                        <li>Backlash from U.S. businesses over trade policies could hinder aggressive actions.</li>\n",
+       "                    </ul>\n",
+       "                </div>\n",
+       "            </div>\n",
+       "        </div>\n",
+       "    </div>\n",
+       "</body>\n",
+       "</html>\n",
+       "```"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    }
+   ],
+   "execution_count": 39
+  },
+  {
+   "metadata": {},
+   "cell_type": "markdown",
+   "source": "",
+   "id": "d4c54df2e25f24c9"
+  },
+  {
+   "metadata": {},
+   "cell_type": "markdown",
+   "source": "## Using BERTrend for prospective analysis",
+   "id": "c922549ec07859a9"
+  },
+  {
+   "metadata": {},
+   "cell_type": "markdown",
+   "source": "In the case of a **prospective trend analysis task**, the goal is to **forecast future** developments or outcomes based on current data and trends, enabling organizations to make informed decisions, allocate resources effectively, and strategize for upcoming challenges or opportunities.\n",
+   "id": "cd3a1210eb53e1e2"
+  },
+  {
+   "metadata": {},
+   "cell_type": "markdown",
+   "source": [
+    "In this example, we are going to simulate a prospective task:\n",
+    "- we simulate new data coming in\n",
+    "- for each new data, we will compute the new topic model, merge it to previous one and detect at each iteration strong and weak signals\n"
+   ],
+   "id": "100f841b083ce637"
+  },
+  {
+   "metadata": {
+    "ExecuteTime": {
+     "end_time": "2025-01-26T20:42:31.675644Z",
+     "start_time": "2025-01-26T20:42:31.671870Z"
+    }
+   },
+   "cell_type": "code",
+   "source": [
+    "MY_DATA_DIR = DATA_PATH / \"feeds/feed_sobriete\"\n",
+    "\n",
+    "input_data = [\n",
+    "    MY_DATA_DIR / \"2024-12-30_feed_sobriete.jsonl\",\n",
+    "    MY_DATA_DIR / \"2025-01-06_feed_sobriete.jsonl\",\n",
+    "    MY_DATA_DIR / \"2025-01-20_feed_sobriete.jsonl\",\n",
+    "]\n",
+    "\n",
+    "window_size = 7"
+   ],
+   "id": "4d88b099fc25b600",
+   "outputs": [],
+   "execution_count": 18
+  },
+  {
+   "metadata": {
+    "ExecuteTime": {
+     "end_time": "2025-01-26T21:04:04.405304Z",
+     "start_time": "2025-01-26T21:04:04.401150Z"
+    }
+   },
+   "cell_type": "code",
+   "source": [
+    "embedding_service_cfg = {\"local\": False, \"host\":\"10.132.5.44\",  \"port\": 6464}\n",
+    "\n",
+    "embedding_service = EmbeddingService(**embedding_service_cfg)\n",
+    "embedding_model_name = embedding_service.embedding_model_name"
+   ],
+   "id": "a4619e8b7e9fbf91",
+   "outputs": [],
+   "execution_count": 48
+  },
+  {
+   "metadata": {
+    "ExecuteTime": {
+     "end_time": "2025-01-26T20:41:23.289362Z",
+     "start_time": "2025-01-26T20:41:23.284555Z"
+    }
+   },
+   "cell_type": "code",
+   "outputs": [],
+   "execution_count": 13,
+   "source": "BERTREND_MODELS_PATH = MODELS_DIR / \"sobriete_models\"",
+   "id": "29f00b403ea81df1"
+  },
+  {
+   "metadata": {
+    "ExecuteTime": {
+     "end_time": "2025-01-26T21:08:01.471923Z",
+     "start_time": "2025-01-26T21:08:01.464866Z"
+    }
+   },
+   "cell_type": "code",
+   "source": [
+    "def process_new_data(data_slice_path: Path, timestamp: pd.Timestamp):\n",
+    "    logger.debug(f\"Processing new data: {data_slice_path}\")\n",
+    "\n",
+    "    # Restore previous models\n",
+    "    try:\n",
+    "        bertrend = BERTrend.restore_models(BERTREND_MODELS_PATH)\n",
+    "    except:\n",
+    "        logger.warning(\"Cannot restore previous models, creating new one\")\n",
+    "        bertrend = BERTrend(topic_model=BERTopicModel())\n",
+    "\n",
+    "    # Read data\n",
+    "    df = load_data(data_slice_path, language=\"French\")\n",
+    "    df = split_data(df)\n",
+    "    text = df[TEXT_COLUMN]\n",
+    "\n",
+    "    # Embed new data\n",
+    "    embeddings, token_strings, token_embeddings = embedding_service.embed(\n",
+    "                texts=text,\n",
+    "    )\n",
+    "\n",
+    "    # Create topic model for new data\n",
+    "    bertrend.train_topic_models({timestamp: df}, embeddings=embeddings, embedding_model=embedding_model_name)\n",
+    "\n",
+    "    # Merge models\n",
+    "    bertrend.merge_all_models()\n",
+    "\n",
+    "    # Compute popularities\n",
+    "    bertrend.calculate_signal_popularity()\n",
+    "\n",
+    "    # classify last signals\n",
+    "    noise_topics_df, weak_signal_topics_df, strong_signal_topics_df = bertrend.classify_signals(window_size, timestamp)\n",
+    "    # TODO: save dfs\n",
+    "    wt = noise_topics_df['Topic']\n",
+    "    logger.info(f\"Weak topics: {wt}\")\n",
+    "    for topic in wt:\n",
+    "        desc = generate_topic_description(topic_model=bertrend.topic_models[timestamp], topic_number=topic, filtered_docs=df, language_code=\"fr\")\n",
+    "        logger.info(f\"Topic: {topic}\\t\\t{desc['title']}\\n{desc['description']}\")\n",
+    "\n",
+    "\n",
+    "    # Save models\n",
+    "    bertrend.save_models(models_path=BERTREND_MODELS_PATH)\n",
+    "\n"
+   ],
+   "id": "63e3d13a7d8c0cb",
+   "outputs": [],
+   "execution_count": 56
+  },
+  {
+   "metadata": {
+    "ExecuteTime": {
+     "end_time": "2025-01-26T21:05:36.124752Z",
+     "start_time": "2025-01-26T21:05:36.122652Z"
+    }
+   },
+   "cell_type": "code",
+   "source": "",
+   "id": "1b36e0e226103b8c",
+   "outputs": [],
+   "execution_count": null
+  },
+  {
+   "metadata": {
+    "ExecuteTime": {
+     "end_time": "2025-01-26T21:12:53.800721Z",
+     "start_time": "2025-01-26T21:08:10.434372Z"
+    }
+   },
+   "cell_type": "code",
+   "source": [
+    "input_data = [\n",
+    "    MY_DATA_DIR / \"2024-12-30_feed_sobriete.jsonl\",\n",
+    "]\n",
+    "for data_file in input_data:\n",
+    "    timestamp = pd.Timestamp(data_file.name.split('_')[0])\n",
+    "    process_new_data(data_file, timestamp)"
+   ],
+   "id": "a2695805f56be632",
+   "outputs": [
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "\u001B[32m2025-01-26 22:08:10.435\u001B[0m | \u001B[34m\u001B[1mDEBUG   \u001B[0m | \u001B[36m__main__\u001B[0m:\u001B[36mprocess_new_data\u001B[0m:\u001B[36m2\u001B[0m - \u001B[34m\u001B[1mProcessing new data: /home/jerome/dev/data/bertrend/feeds/feed_sobriete/2024-12-30_feed_sobriete.jsonl\u001B[0m\n",
+      "\u001B[32m2025-01-26 22:08:10.439\u001B[0m | \u001B[33m\u001B[1mWARNING \u001B[0m | \u001B[36m__main__\u001B[0m:\u001B[36mprocess_new_data\u001B[0m:\u001B[36m8\u001B[0m - \u001B[33m\u001B[1mCannot restore previous models, creating new one\u001B[0m\n",
+      "\u001B[32m2025-01-26 22:08:10.650\u001B[0m | \u001B[34m\u001B[1mDEBUG   \u001B[0m | \u001B[36mbertrend.services.embedding_service\u001B[0m:\u001B[36m_remote_embed_documents\u001B[0m:\u001B[36m203\u001B[0m - \u001B[34m\u001B[1mComputing embeddings...\u001B[0m\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Index(['title', 'summary', 'link', 'url', 'text', 'timestamp', 'document_id',\n",
+      "       'source'],\n",
+      "      dtype='object')\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "\u001B[32m2025-01-26 22:08:40.748\u001B[0m | \u001B[34m\u001B[1mDEBUG   \u001B[0m | \u001B[36mbertrend.services.embedding_service\u001B[0m:\u001B[36m_remote_embed_documents\u001B[0m:\u001B[36m210\u001B[0m - \u001B[34m\u001B[1mComputing embeddings done for batch\u001B[0m\n",
+      "\u001B[32m2025-01-26 22:08:40.800\u001B[0m | \u001B[34m\u001B[1mDEBUG   \u001B[0m | \u001B[36mbertrend.services.embedding_service\u001B[0m:\u001B[36m_get_remote_model_name\u001B[0m:\u001B[36m226\u001B[0m - \u001B[34m\u001B[1mModel name: OrdalieTech/Solon-embeddings-large-0.1\u001B[0m\n",
+      "\u001B[32m2025-01-26 22:08:40.804\u001B[0m | \u001B[1mINFO    \u001B[0m | \u001B[36mbertrend.BERTrend\u001B[0m:\u001B[36mtrain_topic_models\u001B[0m:\u001B[36m240\u001B[0m - \u001B[1mTraining topic model 1/1...\u001B[0m\n",
+      "\u001B[32m2025-01-26 22:08:40.810\u001B[0m | \u001B[34m\u001B[1mDEBUG   \u001B[0m | \u001B[36mbertrend.BERTrend\u001B[0m:\u001B[36m_train_by_period\u001B[0m:\u001B[36m148\u001B[0m - \u001B[34m\u001B[1mProcessing period: 2024-12-30 00:00:00\u001B[0m\n",
+      "\u001B[32m2025-01-26 22:08:40.811\u001B[0m | \u001B[34m\u001B[1mDEBUG   \u001B[0m | \u001B[36mbertrend.BERTrend\u001B[0m:\u001B[36m_train_by_period\u001B[0m:\u001B[36m149\u001B[0m - \u001B[34m\u001B[1mNumber of documents: 932\u001B[0m\n",
+      "\u001B[32m2025-01-26 22:08:40.812\u001B[0m | \u001B[34m\u001B[1mDEBUG   \u001B[0m | \u001B[36mbertrend.BERTrend\u001B[0m:\u001B[36m_train_by_period\u001B[0m:\u001B[36m151\u001B[0m - \u001B[34m\u001B[1mCreating topic model...\u001B[0m\n",
+      "\u001B[32m2025-01-26 22:08:40.812\u001B[0m | \u001B[34m\u001B[1mDEBUG   \u001B[0m | \u001B[36mbertrend.BERTopicModel\u001B[0m:\u001B[36mfit\u001B[0m:\u001B[36m212\u001B[0m - \u001B[34m\u001B[1m\tInitializing BERTopic model\u001B[0m\n",
+      "\u001B[32m2025-01-26 22:08:40.814\u001B[0m | \u001B[32m\u001B[1mSUCCESS \u001B[0m | \u001B[36mbertrend.BERTopicModel\u001B[0m:\u001B[36mfit\u001B[0m:\u001B[36m222\u001B[0m - \u001B[32m\u001B[1m\tBERTopic model instance created successfully\u001B[0m\n",
+      "\u001B[32m2025-01-26 22:08:40.814\u001B[0m | \u001B[34m\u001B[1mDEBUG   \u001B[0m | \u001B[36mbertrend.BERTopicModel\u001B[0m:\u001B[36mfit\u001B[0m:\u001B[36m224\u001B[0m - \u001B[34m\u001B[1m\tFitting BERTopic model\u001B[0m\n",
+      "2025-01-26 22:08:43,626 - BERTopic - Dimensionality - Fitting the dimensionality reduction algorithm\n",
+      "2025-01-26 22:08:51,061 - BERTopic - Dimensionality - Completed ✓\n",
+      "2025-01-26 22:08:51,062 - BERTopic - Cluster - Start clustering the reduced embeddings\n",
+      "2025-01-26 22:08:51,118 - BERTopic - Cluster - Completed ✓\n",
+      "2025-01-26 22:08:51,125 - BERTopic - Representation - Extracting topics from clusters using representation models.\n"
+     ]
+    },
+    {
+     "ename": "KeyboardInterrupt",
+     "evalue": "",
+     "output_type": "error",
+     "traceback": [
+      "\u001B[0;31m---------------------------------------------------------------------------\u001B[0m",
+      "\u001B[0;31mKeyboardInterrupt\u001B[0m                         Traceback (most recent call last)",
+      "Cell \u001B[0;32mIn[57], line 6\u001B[0m\n\u001B[1;32m      4\u001B[0m \u001B[38;5;28;01mfor\u001B[39;00m data_file \u001B[38;5;129;01min\u001B[39;00m input_data:\n\u001B[1;32m      5\u001B[0m     timestamp \u001B[38;5;241m=\u001B[39m pd\u001B[38;5;241m.\u001B[39mTimestamp(data_file\u001B[38;5;241m.\u001B[39mname\u001B[38;5;241m.\u001B[39msplit(\u001B[38;5;124m'\u001B[39m\u001B[38;5;124m_\u001B[39m\u001B[38;5;124m'\u001B[39m)[\u001B[38;5;241m0\u001B[39m])\n\u001B[0;32m----> 6\u001B[0m     \u001B[43mprocess_new_data\u001B[49m\u001B[43m(\u001B[49m\u001B[43mdata_file\u001B[49m\u001B[43m,\u001B[49m\u001B[43m \u001B[49m\u001B[43mtimestamp\u001B[49m\u001B[43m)\u001B[49m\n",
+      "Cell \u001B[0;32mIn[56], line 23\u001B[0m, in \u001B[0;36mprocess_new_data\u001B[0;34m(data_slice_path, timestamp)\u001B[0m\n\u001B[1;32m     18\u001B[0m embeddings, token_strings, token_embeddings \u001B[38;5;241m=\u001B[39m embedding_service\u001B[38;5;241m.\u001B[39membed(\n\u001B[1;32m     19\u001B[0m             texts\u001B[38;5;241m=\u001B[39mtext,\n\u001B[1;32m     20\u001B[0m )\n\u001B[1;32m     22\u001B[0m \u001B[38;5;66;03m# Create topic model for new data\u001B[39;00m\n\u001B[0;32m---> 23\u001B[0m \u001B[43mbertrend\u001B[49m\u001B[38;5;241;43m.\u001B[39;49m\u001B[43mtrain_topic_models\u001B[49m\u001B[43m(\u001B[49m\u001B[43m{\u001B[49m\u001B[43mtimestamp\u001B[49m\u001B[43m:\u001B[49m\u001B[43m \u001B[49m\u001B[43mdf\u001B[49m\u001B[43m}\u001B[49m\u001B[43m,\u001B[49m\u001B[43m \u001B[49m\u001B[43membeddings\u001B[49m\u001B[38;5;241;43m=\u001B[39;49m\u001B[43membeddings\u001B[49m\u001B[43m,\u001B[49m\u001B[43m \u001B[49m\u001B[43membedding_model\u001B[49m\u001B[38;5;241;43m=\u001B[39;49m\u001B[43membedding_model_name\u001B[49m\u001B[43m)\u001B[49m\n\u001B[1;32m     25\u001B[0m \u001B[38;5;66;03m# Save models\u001B[39;00m\n\u001B[1;32m     26\u001B[0m bertrend\u001B[38;5;241m.\u001B[39msave_models()\n",
+      "File \u001B[0;32m~/dev/BERTrend/bertrend/BERTrend.py:242\u001B[0m, in \u001B[0;36mBERTrend.train_topic_models\u001B[0;34m(self, grouped_data, embedding_model, embeddings)\u001B[0m\n\u001B[1;32m    239\u001B[0m \u001B[38;5;28;01mtry\u001B[39;00m:\n\u001B[1;32m    240\u001B[0m     logger\u001B[38;5;241m.\u001B[39minfo(\u001B[38;5;124mf\u001B[39m\u001B[38;5;124m\"\u001B[39m\u001B[38;5;124mTraining topic model \u001B[39m\u001B[38;5;132;01m{\u001B[39;00mi\u001B[38;5;241m+\u001B[39m\u001B[38;5;241m1\u001B[39m\u001B[38;5;132;01m}\u001B[39;00m\u001B[38;5;124m/\u001B[39m\u001B[38;5;132;01m{\u001B[39;00m\u001B[38;5;28mlen\u001B[39m(non_empty_groups)\u001B[38;5;132;01m}\u001B[39;00m\u001B[38;5;124m...\u001B[39m\u001B[38;5;124m\"\u001B[39m)\n\u001B[1;32m    241\u001B[0m     topic_models[period], doc_groups[period], emb_groups[period] \u001B[38;5;241m=\u001B[39m (\n\u001B[0;32m--> 242\u001B[0m         \u001B[38;5;28;43mself\u001B[39;49m\u001B[38;5;241;43m.\u001B[39;49m\u001B[43m_train_by_period\u001B[49m\u001B[43m(\u001B[49m\u001B[43mperiod\u001B[49m\u001B[43m,\u001B[49m\u001B[43m \u001B[49m\u001B[43mgroup\u001B[49m\u001B[43m,\u001B[49m\u001B[43m \u001B[49m\u001B[43membedding_model\u001B[49m\u001B[43m,\u001B[49m\u001B[43m \u001B[49m\u001B[43membeddings\u001B[49m\u001B[43m)\u001B[49m\n\u001B[1;32m    243\u001B[0m     )  \u001B[38;5;66;03m# TODO: parallelize?\u001B[39;00m\n\u001B[1;32m    244\u001B[0m     logger\u001B[38;5;241m.\u001B[39mdebug(\u001B[38;5;124mf\u001B[39m\u001B[38;5;124m\"\u001B[39m\u001B[38;5;124mSuccessfully processed period: \u001B[39m\u001B[38;5;132;01m{\u001B[39;00mperiod\u001B[38;5;132;01m}\u001B[39;00m\u001B[38;5;124m\"\u001B[39m)\n\u001B[1;32m    246\u001B[0m \u001B[38;5;28;01mexcept\u001B[39;00m \u001B[38;5;167;01mException\u001B[39;00m \u001B[38;5;28;01mas\u001B[39;00m e:\n",
+      "File \u001B[0;32m~/dev/BERTrend/bertrend/BERTrend.py:152\u001B[0m, in \u001B[0;36mBERTrend._train_by_period\u001B[0;34m(self, period, group, embedding_model, embeddings)\u001B[0m\n\u001B[1;32m    149\u001B[0m logger\u001B[38;5;241m.\u001B[39mdebug(\u001B[38;5;124mf\u001B[39m\u001B[38;5;124m\"\u001B[39m\u001B[38;5;124mNumber of documents: \u001B[39m\u001B[38;5;132;01m{\u001B[39;00m\u001B[38;5;28mlen\u001B[39m(docs)\u001B[38;5;132;01m}\u001B[39;00m\u001B[38;5;124m\"\u001B[39m)\n\u001B[1;32m    151\u001B[0m logger\u001B[38;5;241m.\u001B[39mdebug(\u001B[38;5;124m\"\u001B[39m\u001B[38;5;124mCreating topic model...\u001B[39m\u001B[38;5;124m\"\u001B[39m)\n\u001B[0;32m--> 152\u001B[0m topic_model \u001B[38;5;241m=\u001B[39m \u001B[38;5;28;43mself\u001B[39;49m\u001B[38;5;241;43m.\u001B[39;49m\u001B[43mtopic_model\u001B[49m\u001B[38;5;241;43m.\u001B[39;49m\u001B[43mfit\u001B[49m\u001B[43m(\u001B[49m\n\u001B[1;32m    153\u001B[0m \u001B[43m    \u001B[49m\u001B[43mdocs\u001B[49m\u001B[38;5;241;43m=\u001B[39;49m\u001B[43mdocs\u001B[49m\u001B[43m,\u001B[49m\n\u001B[1;32m    154\u001B[0m \u001B[43m    \u001B[49m\u001B[43membedding_model\u001B[49m\u001B[38;5;241;43m=\u001B[39;49m\u001B[43membedding_model\u001B[49m\u001B[43m,\u001B[49m\n\u001B[1;32m    155\u001B[0m \u001B[43m    \u001B[49m\u001B[43membeddings\u001B[49m\u001B[38;5;241;43m=\u001B[39;49m\u001B[43membeddings_subset\u001B[49m\u001B[43m,\u001B[49m\n\u001B[1;32m    156\u001B[0m \u001B[43m\u001B[49m\u001B[43m)\u001B[49m\u001B[38;5;241m.\u001B[39mtopic_model\n\u001B[1;32m    158\u001B[0m logger\u001B[38;5;241m.\u001B[39mdebug(\u001B[38;5;124m\"\u001B[39m\u001B[38;5;124mTopic model created successfully\u001B[39m\u001B[38;5;124m\"\u001B[39m)\n\u001B[1;32m    160\u001B[0m doc_info_df \u001B[38;5;241m=\u001B[39m topic_model\u001B[38;5;241m.\u001B[39mget_document_info(docs\u001B[38;5;241m=\u001B[39mdocs)\n",
+      "File \u001B[0;32m~/dev/BERTrend/bertrend/BERTopicModel.py:225\u001B[0m, in \u001B[0;36mBERTopicModel.fit\u001B[0;34m(self, docs, embedding_model, embeddings, zeroshot_topic_list, zeroshot_min_similarity)\u001B[0m\n\u001B[1;32m    222\u001B[0m logger\u001B[38;5;241m.\u001B[39msuccess(\u001B[38;5;124m\"\u001B[39m\u001B[38;5;130;01m\\t\u001B[39;00m\u001B[38;5;124mBERTopic model instance created successfully\u001B[39m\u001B[38;5;124m\"\u001B[39m)\n\u001B[1;32m    224\u001B[0m logger\u001B[38;5;241m.\u001B[39mdebug(\u001B[38;5;124m\"\u001B[39m\u001B[38;5;130;01m\\t\u001B[39;00m\u001B[38;5;124mFitting BERTopic model\u001B[39m\u001B[38;5;124m\"\u001B[39m)\n\u001B[0;32m--> 225\u001B[0m topics, probs \u001B[38;5;241m=\u001B[39m \u001B[43mtopic_model\u001B[49m\u001B[38;5;241;43m.\u001B[39;49m\u001B[43mfit_transform\u001B[49m\u001B[43m(\u001B[49m\u001B[43mdocs\u001B[49m\u001B[43m,\u001B[49m\u001B[43m \u001B[49m\u001B[43membeddings\u001B[49m\u001B[43m)\u001B[49m\n\u001B[1;32m    227\u001B[0m \u001B[38;5;28;01mif\u001B[39;00m \u001B[38;5;129;01mnot\u001B[39;00m topic_model\u001B[38;5;241m.\u001B[39m_outliers:\n\u001B[1;32m    228\u001B[0m     logger\u001B[38;5;241m.\u001B[39mwarning(\u001B[38;5;124m\"\u001B[39m\u001B[38;5;130;01m\\t\u001B[39;00m\u001B[38;5;124mNo outliers to reduce.\u001B[39m\u001B[38;5;124m\"\u001B[39m)\n",
+      "File \u001B[0;32m~/miniconda3/envs/bertrend/lib/python3.11/site-packages/bertopic/_bertopic.py:433\u001B[0m, in \u001B[0;36mBERTopic.fit_transform\u001B[0;34m(self, documents, embeddings, images, y)\u001B[0m\n\u001B[1;32m    430\u001B[0m     \u001B[38;5;28mself\u001B[39m\u001B[38;5;241m.\u001B[39m_save_representative_docs(custom_documents)\n\u001B[1;32m    431\u001B[0m \u001B[38;5;28;01melse\u001B[39;00m:\n\u001B[1;32m    432\u001B[0m     \u001B[38;5;66;03m# Extract topics by calculating c-TF-IDF\u001B[39;00m\n\u001B[0;32m--> 433\u001B[0m     \u001B[38;5;28;43mself\u001B[39;49m\u001B[38;5;241;43m.\u001B[39;49m\u001B[43m_extract_topics\u001B[49m\u001B[43m(\u001B[49m\u001B[43mdocuments\u001B[49m\u001B[43m,\u001B[49m\u001B[43m \u001B[49m\u001B[43membeddings\u001B[49m\u001B[38;5;241;43m=\u001B[39;49m\u001B[43membeddings\u001B[49m\u001B[43m,\u001B[49m\u001B[43m \u001B[49m\u001B[43mverbose\u001B[49m\u001B[38;5;241;43m=\u001B[39;49m\u001B[38;5;28;43mself\u001B[39;49m\u001B[38;5;241;43m.\u001B[39;49m\u001B[43mverbose\u001B[49m\u001B[43m)\u001B[49m\n\u001B[1;32m    435\u001B[0m     \u001B[38;5;66;03m# Reduce topics\u001B[39;00m\n\u001B[1;32m    436\u001B[0m     \u001B[38;5;28;01mif\u001B[39;00m \u001B[38;5;28mself\u001B[39m\u001B[38;5;241m.\u001B[39mnr_topics:\n",
+      "File \u001B[0;32m~/miniconda3/envs/bertrend/lib/python3.11/site-packages/bertopic/_bertopic.py:3787\u001B[0m, in \u001B[0;36mBERTopic._extract_topics\u001B[0;34m(self, documents, embeddings, mappings, verbose)\u001B[0m\n\u001B[1;32m   3785\u001B[0m documents_per_topic \u001B[38;5;241m=\u001B[39m documents\u001B[38;5;241m.\u001B[39mgroupby([\u001B[38;5;124m'\u001B[39m\u001B[38;5;124mTopic\u001B[39m\u001B[38;5;124m'\u001B[39m], as_index\u001B[38;5;241m=\u001B[39m\u001B[38;5;28;01mFalse\u001B[39;00m)\u001B[38;5;241m.\u001B[39magg({\u001B[38;5;124m'\u001B[39m\u001B[38;5;124mDocument\u001B[39m\u001B[38;5;124m'\u001B[39m: \u001B[38;5;124m'\u001B[39m\u001B[38;5;124m \u001B[39m\u001B[38;5;124m'\u001B[39m\u001B[38;5;241m.\u001B[39mjoin})\n\u001B[1;32m   3786\u001B[0m \u001B[38;5;28mself\u001B[39m\u001B[38;5;241m.\u001B[39mc_tf_idf_, words \u001B[38;5;241m=\u001B[39m \u001B[38;5;28mself\u001B[39m\u001B[38;5;241m.\u001B[39m_c_tf_idf(documents_per_topic)\n\u001B[0;32m-> 3787\u001B[0m \u001B[38;5;28mself\u001B[39m\u001B[38;5;241m.\u001B[39mtopic_representations_ \u001B[38;5;241m=\u001B[39m \u001B[38;5;28;43mself\u001B[39;49m\u001B[38;5;241;43m.\u001B[39;49m\u001B[43m_extract_words_per_topic\u001B[49m\u001B[43m(\u001B[49m\u001B[43mwords\u001B[49m\u001B[43m,\u001B[49m\u001B[43m \u001B[49m\u001B[43mdocuments\u001B[49m\u001B[43m)\u001B[49m\n\u001B[1;32m   3788\u001B[0m \u001B[38;5;28mself\u001B[39m\u001B[38;5;241m.\u001B[39m_create_topic_vectors(documents\u001B[38;5;241m=\u001B[39mdocuments, embeddings\u001B[38;5;241m=\u001B[39membeddings, mappings\u001B[38;5;241m=\u001B[39mmappings)\n\u001B[1;32m   3789\u001B[0m \u001B[38;5;28mself\u001B[39m\u001B[38;5;241m.\u001B[39mtopic_labels_ \u001B[38;5;241m=\u001B[39m {key: \u001B[38;5;124mf\u001B[39m\u001B[38;5;124m\"\u001B[39m\u001B[38;5;132;01m{\u001B[39;00mkey\u001B[38;5;132;01m}\u001B[39;00m\u001B[38;5;124m_\u001B[39m\u001B[38;5;124m\"\u001B[39m \u001B[38;5;241m+\u001B[39m \u001B[38;5;124m\"\u001B[39m\u001B[38;5;124m_\u001B[39m\u001B[38;5;124m\"\u001B[39m\u001B[38;5;241m.\u001B[39mjoin([word[\u001B[38;5;241m0\u001B[39m] \u001B[38;5;28;01mfor\u001B[39;00m word \u001B[38;5;129;01min\u001B[39;00m values[:\u001B[38;5;241m4\u001B[39m]])\n\u001B[1;32m   3790\u001B[0m                       \u001B[38;5;28;01mfor\u001B[39;00m key, values \u001B[38;5;129;01min\u001B[39;00m\n\u001B[1;32m   3791\u001B[0m                       \u001B[38;5;28mself\u001B[39m\u001B[38;5;241m.\u001B[39mtopic_representations_\u001B[38;5;241m.\u001B[39mitems()}\n",
+      "File \u001B[0;32m~/miniconda3/envs/bertrend/lib/python3.11/site-packages/bertopic/_bertopic.py:4071\u001B[0m, in \u001B[0;36mBERTopic._extract_words_per_topic\u001B[0;34m(self, words, documents, c_tf_idf, calculate_aspects)\u001B[0m\n\u001B[1;32m   4069\u001B[0m         topics \u001B[38;5;241m=\u001B[39m tuner\u001B[38;5;241m.\u001B[39mextract_topics(\u001B[38;5;28mself\u001B[39m, documents, c_tf_idf, topics)\n\u001B[1;32m   4070\u001B[0m \u001B[38;5;28;01melif\u001B[39;00m \u001B[38;5;28misinstance\u001B[39m(\u001B[38;5;28mself\u001B[39m\u001B[38;5;241m.\u001B[39mrepresentation_model, BaseRepresentation):\n\u001B[0;32m-> 4071\u001B[0m     topics \u001B[38;5;241m=\u001B[39m \u001B[38;5;28;43mself\u001B[39;49m\u001B[38;5;241;43m.\u001B[39;49m\u001B[43mrepresentation_model\u001B[49m\u001B[38;5;241;43m.\u001B[39;49m\u001B[43mextract_topics\u001B[49m\u001B[43m(\u001B[49m\u001B[38;5;28;43mself\u001B[39;49m\u001B[43m,\u001B[49m\u001B[43m \u001B[49m\u001B[43mdocuments\u001B[49m\u001B[43m,\u001B[49m\u001B[43m \u001B[49m\u001B[43mc_tf_idf\u001B[49m\u001B[43m,\u001B[49m\u001B[43m \u001B[49m\u001B[43mtopics\u001B[49m\u001B[43m)\u001B[49m\n\u001B[1;32m   4072\u001B[0m \u001B[38;5;28;01melif\u001B[39;00m \u001B[38;5;28misinstance\u001B[39m(\u001B[38;5;28mself\u001B[39m\u001B[38;5;241m.\u001B[39mrepresentation_model, \u001B[38;5;28mdict\u001B[39m):\n\u001B[1;32m   4073\u001B[0m     \u001B[38;5;28;01mif\u001B[39;00m \u001B[38;5;28mself\u001B[39m\u001B[38;5;241m.\u001B[39mrepresentation_model\u001B[38;5;241m.\u001B[39mget(\u001B[38;5;124m\"\u001B[39m\u001B[38;5;124mMain\u001B[39m\u001B[38;5;124m\"\u001B[39m):\n",
+      "File \u001B[0;32m~/miniconda3/envs/bertrend/lib/python3.11/site-packages/bertopic/representation/_mmr.py:68\u001B[0m, in \u001B[0;36mMaximalMarginalRelevance.extract_topics\u001B[0;34m(self, topic_model, documents, c_tf_idf, topics)\u001B[0m\n\u001B[1;32m     66\u001B[0m \u001B[38;5;28;01mfor\u001B[39;00m topic, topic_words \u001B[38;5;129;01min\u001B[39;00m topics\u001B[38;5;241m.\u001B[39mitems():\n\u001B[1;32m     67\u001B[0m     words \u001B[38;5;241m=\u001B[39m [word[\u001B[38;5;241m0\u001B[39m] \u001B[38;5;28;01mfor\u001B[39;00m word \u001B[38;5;129;01min\u001B[39;00m topic_words]\n\u001B[0;32m---> 68\u001B[0m     word_embeddings \u001B[38;5;241m=\u001B[39m \u001B[43mtopic_model\u001B[49m\u001B[38;5;241;43m.\u001B[39;49m\u001B[43m_extract_embeddings\u001B[49m\u001B[43m(\u001B[49m\u001B[43mwords\u001B[49m\u001B[43m,\u001B[49m\u001B[43m \u001B[49m\u001B[43mmethod\u001B[49m\u001B[38;5;241;43m=\u001B[39;49m\u001B[38;5;124;43m\"\u001B[39;49m\u001B[38;5;124;43mword\u001B[39;49m\u001B[38;5;124;43m\"\u001B[39;49m\u001B[43m,\u001B[49m\u001B[43m \u001B[49m\u001B[43mverbose\u001B[49m\u001B[38;5;241;43m=\u001B[39;49m\u001B[38;5;28;43;01mFalse\u001B[39;49;00m\u001B[43m)\u001B[49m\n\u001B[1;32m     69\u001B[0m     topic_embedding \u001B[38;5;241m=\u001B[39m topic_model\u001B[38;5;241m.\u001B[39m_extract_embeddings(\u001B[38;5;124m\"\u001B[39m\u001B[38;5;124m \u001B[39m\u001B[38;5;124m\"\u001B[39m\u001B[38;5;241m.\u001B[39mjoin(words), method\u001B[38;5;241m=\u001B[39m\u001B[38;5;124m\"\u001B[39m\u001B[38;5;124mword\u001B[39m\u001B[38;5;124m\"\u001B[39m, verbose\u001B[38;5;241m=\u001B[39m\u001B[38;5;28;01mFalse\u001B[39;00m)\u001B[38;5;241m.\u001B[39mreshape(\u001B[38;5;241m1\u001B[39m, \u001B[38;5;241m-\u001B[39m\u001B[38;5;241m1\u001B[39m)\n\u001B[1;32m     70\u001B[0m     topic_words \u001B[38;5;241m=\u001B[39m mmr(topic_embedding, word_embeddings, words, \u001B[38;5;28mself\u001B[39m\u001B[38;5;241m.\u001B[39mdiversity, \u001B[38;5;28mself\u001B[39m\u001B[38;5;241m.\u001B[39mtop_n_words)\n",
+      "File \u001B[0;32m~/miniconda3/envs/bertrend/lib/python3.11/site-packages/bertopic/_bertopic.py:3408\u001B[0m, in \u001B[0;36mBERTopic._extract_embeddings\u001B[0;34m(self, documents, images, method, verbose)\u001B[0m\n\u001B[1;32m   3406\u001B[0m     embeddings \u001B[38;5;241m=\u001B[39m \u001B[38;5;28mself\u001B[39m\u001B[38;5;241m.\u001B[39membedding_model\u001B[38;5;241m.\u001B[39membed(documents\u001B[38;5;241m=\u001B[39mdocuments, images\u001B[38;5;241m=\u001B[39mimages, verbose\u001B[38;5;241m=\u001B[39mverbose)\n\u001B[1;32m   3407\u001B[0m \u001B[38;5;28;01melif\u001B[39;00m method \u001B[38;5;241m==\u001B[39m \u001B[38;5;124m\"\u001B[39m\u001B[38;5;124mword\u001B[39m\u001B[38;5;124m\"\u001B[39m:\n\u001B[0;32m-> 3408\u001B[0m     embeddings \u001B[38;5;241m=\u001B[39m \u001B[38;5;28;43mself\u001B[39;49m\u001B[38;5;241;43m.\u001B[39;49m\u001B[43membedding_model\u001B[49m\u001B[38;5;241;43m.\u001B[39;49m\u001B[43membed_words\u001B[49m\u001B[43m(\u001B[49m\u001B[43mwords\u001B[49m\u001B[38;5;241;43m=\u001B[39;49m\u001B[43mdocuments\u001B[49m\u001B[43m,\u001B[49m\u001B[43m \u001B[49m\u001B[43mverbose\u001B[49m\u001B[38;5;241;43m=\u001B[39;49m\u001B[43mverbose\u001B[49m\u001B[43m)\u001B[49m\n\u001B[1;32m   3409\u001B[0m \u001B[38;5;28;01melif\u001B[39;00m method \u001B[38;5;241m==\u001B[39m \u001B[38;5;124m\"\u001B[39m\u001B[38;5;124mdocument\u001B[39m\u001B[38;5;124m\"\u001B[39m:\n\u001B[1;32m   3410\u001B[0m     embeddings \u001B[38;5;241m=\u001B[39m \u001B[38;5;28mself\u001B[39m\u001B[38;5;241m.\u001B[39membedding_model\u001B[38;5;241m.\u001B[39membed_documents(documents, verbose\u001B[38;5;241m=\u001B[39mverbose)\n",
+      "File \u001B[0;32m~/miniconda3/envs/bertrend/lib/python3.11/site-packages/bertopic/backend/_base.py:53\u001B[0m, in \u001B[0;36mBaseEmbedder.embed_words\u001B[0;34m(self, words, verbose)\u001B[0m\n\u001B[1;32m     38\u001B[0m \u001B[38;5;28;01mdef\u001B[39;00m \u001B[38;5;21membed_words\u001B[39m(\u001B[38;5;28mself\u001B[39m,\n\u001B[1;32m     39\u001B[0m                 words: List[\u001B[38;5;28mstr\u001B[39m],\n\u001B[1;32m     40\u001B[0m                 verbose: \u001B[38;5;28mbool\u001B[39m \u001B[38;5;241m=\u001B[39m \u001B[38;5;28;01mFalse\u001B[39;00m) \u001B[38;5;241m-\u001B[39m\u001B[38;5;241m>\u001B[39m np\u001B[38;5;241m.\u001B[39mndarray:\n\u001B[1;32m     41\u001B[0m \u001B[38;5;250m    \u001B[39m\u001B[38;5;124;03m\"\"\" Embed a list of n words into an n-dimensional\u001B[39;00m\n\u001B[1;32m     42\u001B[0m \u001B[38;5;124;03m    matrix of embeddings\u001B[39;00m\n\u001B[1;32m     43\u001B[0m \n\u001B[0;32m   (...)\u001B[0m\n\u001B[1;32m     51\u001B[0m \n\u001B[1;32m     52\u001B[0m \u001B[38;5;124;03m    \"\"\"\u001B[39;00m\n\u001B[0;32m---> 53\u001B[0m     \u001B[38;5;28;01mreturn\u001B[39;00m \u001B[38;5;28;43mself\u001B[39;49m\u001B[38;5;241;43m.\u001B[39;49m\u001B[43membed\u001B[49m\u001B[43m(\u001B[49m\u001B[43mwords\u001B[49m\u001B[43m,\u001B[49m\u001B[43m \u001B[49m\u001B[43mverbose\u001B[49m\u001B[43m)\u001B[49m\n",
+      "File \u001B[0;32m~/miniconda3/envs/bertrend/lib/python3.11/site-packages/bertopic/backend/_sentencetransformers.py:65\u001B[0m, in \u001B[0;36mSentenceTransformerBackend.embed\u001B[0;34m(self, documents, verbose)\u001B[0m\n\u001B[1;32m     51\u001B[0m \u001B[38;5;28;01mdef\u001B[39;00m \u001B[38;5;21membed\u001B[39m(\u001B[38;5;28mself\u001B[39m,\n\u001B[1;32m     52\u001B[0m           documents: List[\u001B[38;5;28mstr\u001B[39m],\n\u001B[1;32m     53\u001B[0m           verbose: \u001B[38;5;28mbool\u001B[39m \u001B[38;5;241m=\u001B[39m \u001B[38;5;28;01mFalse\u001B[39;00m) \u001B[38;5;241m-\u001B[39m\u001B[38;5;241m>\u001B[39m np\u001B[38;5;241m.\u001B[39mndarray:\n\u001B[1;32m     54\u001B[0m \u001B[38;5;250m    \u001B[39m\u001B[38;5;124;03m\"\"\" Embed a list of n documents/words into an n-dimensional\u001B[39;00m\n\u001B[1;32m     55\u001B[0m \u001B[38;5;124;03m    matrix of embeddings\u001B[39;00m\n\u001B[1;32m     56\u001B[0m \n\u001B[0;32m   (...)\u001B[0m\n\u001B[1;32m     63\u001B[0m \u001B[38;5;124;03m        that each have an embeddings size of `m`\u001B[39;00m\n\u001B[1;32m     64\u001B[0m \u001B[38;5;124;03m    \"\"\"\u001B[39;00m\n\u001B[0;32m---> 65\u001B[0m     embeddings \u001B[38;5;241m=\u001B[39m \u001B[38;5;28;43mself\u001B[39;49m\u001B[38;5;241;43m.\u001B[39;49m\u001B[43membedding_model\u001B[49m\u001B[38;5;241;43m.\u001B[39;49m\u001B[43mencode\u001B[49m\u001B[43m(\u001B[49m\u001B[43mdocuments\u001B[49m\u001B[43m,\u001B[49m\u001B[43m \u001B[49m\u001B[43mshow_progress_bar\u001B[49m\u001B[38;5;241;43m=\u001B[39;49m\u001B[43mverbose\u001B[49m\u001B[43m)\u001B[49m\n\u001B[1;32m     66\u001B[0m     \u001B[38;5;28;01mreturn\u001B[39;00m embeddings\n",
+      "File \u001B[0;32m~/miniconda3/envs/bertrend/lib/python3.11/site-packages/sentence_transformers/SentenceTransformer.py:517\u001B[0m, in \u001B[0;36mSentenceTransformer.encode\u001B[0;34m(self, sentences, prompt_name, prompt, batch_size, show_progress_bar, output_value, precision, convert_to_numpy, convert_to_tensor, device, normalize_embeddings)\u001B[0m\n\u001B[1;32m    514\u001B[0m features\u001B[38;5;241m.\u001B[39mupdate(extra_features)\n\u001B[1;32m    516\u001B[0m \u001B[38;5;28;01mwith\u001B[39;00m torch\u001B[38;5;241m.\u001B[39mno_grad():\n\u001B[0;32m--> 517\u001B[0m     out_features \u001B[38;5;241m=\u001B[39m \u001B[38;5;28;43mself\u001B[39;49m\u001B[38;5;241;43m.\u001B[39;49m\u001B[43mforward\u001B[49m\u001B[43m(\u001B[49m\u001B[43mfeatures\u001B[49m\u001B[43m)\u001B[49m\n\u001B[1;32m    518\u001B[0m     \u001B[38;5;28;01mif\u001B[39;00m \u001B[38;5;28mself\u001B[39m\u001B[38;5;241m.\u001B[39mdevice\u001B[38;5;241m.\u001B[39mtype \u001B[38;5;241m==\u001B[39m \u001B[38;5;124m\"\u001B[39m\u001B[38;5;124mhpu\u001B[39m\u001B[38;5;124m\"\u001B[39m:\n\u001B[1;32m    519\u001B[0m         out_features \u001B[38;5;241m=\u001B[39m copy\u001B[38;5;241m.\u001B[39mdeepcopy(out_features)\n",
+      "File \u001B[0;32m~/miniconda3/envs/bertrend/lib/python3.11/site-packages/torch/nn/modules/container.py:250\u001B[0m, in \u001B[0;36mSequential.forward\u001B[0;34m(self, input)\u001B[0m\n\u001B[1;32m    248\u001B[0m \u001B[38;5;28;01mdef\u001B[39;00m \u001B[38;5;21mforward\u001B[39m(\u001B[38;5;28mself\u001B[39m, \u001B[38;5;28minput\u001B[39m):\n\u001B[1;32m    249\u001B[0m     \u001B[38;5;28;01mfor\u001B[39;00m module \u001B[38;5;129;01min\u001B[39;00m \u001B[38;5;28mself\u001B[39m:\n\u001B[0;32m--> 250\u001B[0m         \u001B[38;5;28minput\u001B[39m \u001B[38;5;241m=\u001B[39m \u001B[43mmodule\u001B[49m\u001B[43m(\u001B[49m\u001B[38;5;28;43minput\u001B[39;49m\u001B[43m)\u001B[49m\n\u001B[1;32m    251\u001B[0m     \u001B[38;5;28;01mreturn\u001B[39;00m \u001B[38;5;28minput\u001B[39m\n",
+      "File \u001B[0;32m~/miniconda3/envs/bertrend/lib/python3.11/site-packages/torch/nn/modules/module.py:1736\u001B[0m, in \u001B[0;36mModule._wrapped_call_impl\u001B[0;34m(self, *args, **kwargs)\u001B[0m\n\u001B[1;32m   1734\u001B[0m     \u001B[38;5;28;01mreturn\u001B[39;00m \u001B[38;5;28mself\u001B[39m\u001B[38;5;241m.\u001B[39m_compiled_call_impl(\u001B[38;5;241m*\u001B[39margs, \u001B[38;5;241m*\u001B[39m\u001B[38;5;241m*\u001B[39mkwargs)  \u001B[38;5;66;03m# type: ignore[misc]\u001B[39;00m\n\u001B[1;32m   1735\u001B[0m \u001B[38;5;28;01melse\u001B[39;00m:\n\u001B[0;32m-> 1736\u001B[0m     \u001B[38;5;28;01mreturn\u001B[39;00m \u001B[38;5;28;43mself\u001B[39;49m\u001B[38;5;241;43m.\u001B[39;49m\u001B[43m_call_impl\u001B[49m\u001B[43m(\u001B[49m\u001B[38;5;241;43m*\u001B[39;49m\u001B[43margs\u001B[49m\u001B[43m,\u001B[49m\u001B[43m \u001B[49m\u001B[38;5;241;43m*\u001B[39;49m\u001B[38;5;241;43m*\u001B[39;49m\u001B[43mkwargs\u001B[49m\u001B[43m)\u001B[49m\n",
+      "File \u001B[0;32m~/miniconda3/envs/bertrend/lib/python3.11/site-packages/torch/nn/modules/module.py:1747\u001B[0m, in \u001B[0;36mModule._call_impl\u001B[0;34m(self, *args, **kwargs)\u001B[0m\n\u001B[1;32m   1742\u001B[0m \u001B[38;5;66;03m# If we don't have any hooks, we want to skip the rest of the logic in\u001B[39;00m\n\u001B[1;32m   1743\u001B[0m \u001B[38;5;66;03m# this function, and just call forward.\u001B[39;00m\n\u001B[1;32m   1744\u001B[0m \u001B[38;5;28;01mif\u001B[39;00m \u001B[38;5;129;01mnot\u001B[39;00m (\u001B[38;5;28mself\u001B[39m\u001B[38;5;241m.\u001B[39m_backward_hooks \u001B[38;5;129;01mor\u001B[39;00m \u001B[38;5;28mself\u001B[39m\u001B[38;5;241m.\u001B[39m_backward_pre_hooks \u001B[38;5;129;01mor\u001B[39;00m \u001B[38;5;28mself\u001B[39m\u001B[38;5;241m.\u001B[39m_forward_hooks \u001B[38;5;129;01mor\u001B[39;00m \u001B[38;5;28mself\u001B[39m\u001B[38;5;241m.\u001B[39m_forward_pre_hooks\n\u001B[1;32m   1745\u001B[0m         \u001B[38;5;129;01mor\u001B[39;00m _global_backward_pre_hooks \u001B[38;5;129;01mor\u001B[39;00m _global_backward_hooks\n\u001B[1;32m   1746\u001B[0m         \u001B[38;5;129;01mor\u001B[39;00m _global_forward_hooks \u001B[38;5;129;01mor\u001B[39;00m _global_forward_pre_hooks):\n\u001B[0;32m-> 1747\u001B[0m     \u001B[38;5;28;01mreturn\u001B[39;00m \u001B[43mforward_call\u001B[49m\u001B[43m(\u001B[49m\u001B[38;5;241;43m*\u001B[39;49m\u001B[43margs\u001B[49m\u001B[43m,\u001B[49m\u001B[43m \u001B[49m\u001B[38;5;241;43m*\u001B[39;49m\u001B[38;5;241;43m*\u001B[39;49m\u001B[43mkwargs\u001B[49m\u001B[43m)\u001B[49m\n\u001B[1;32m   1749\u001B[0m result \u001B[38;5;241m=\u001B[39m \u001B[38;5;28;01mNone\u001B[39;00m\n\u001B[1;32m   1750\u001B[0m called_always_called_hooks \u001B[38;5;241m=\u001B[39m \u001B[38;5;28mset\u001B[39m()\n",
+      "File \u001B[0;32m~/miniconda3/envs/bertrend/lib/python3.11/site-packages/sentence_transformers/models/Transformer.py:118\u001B[0m, in \u001B[0;36mTransformer.forward\u001B[0;34m(self, features)\u001B[0m\n\u001B[1;32m    115\u001B[0m \u001B[38;5;28;01mif\u001B[39;00m \u001B[38;5;124m\"\u001B[39m\u001B[38;5;124mtoken_type_ids\u001B[39m\u001B[38;5;124m\"\u001B[39m \u001B[38;5;129;01min\u001B[39;00m features:\n\u001B[1;32m    116\u001B[0m     trans_features[\u001B[38;5;124m\"\u001B[39m\u001B[38;5;124mtoken_type_ids\u001B[39m\u001B[38;5;124m\"\u001B[39m] \u001B[38;5;241m=\u001B[39m features[\u001B[38;5;124m\"\u001B[39m\u001B[38;5;124mtoken_type_ids\u001B[39m\u001B[38;5;124m\"\u001B[39m]\n\u001B[0;32m--> 118\u001B[0m output_states \u001B[38;5;241m=\u001B[39m \u001B[38;5;28;43mself\u001B[39;49m\u001B[38;5;241;43m.\u001B[39;49m\u001B[43mauto_model\u001B[49m\u001B[43m(\u001B[49m\u001B[38;5;241;43m*\u001B[39;49m\u001B[38;5;241;43m*\u001B[39;49m\u001B[43mtrans_features\u001B[49m\u001B[43m,\u001B[49m\u001B[43m \u001B[49m\u001B[43mreturn_dict\u001B[49m\u001B[38;5;241;43m=\u001B[39;49m\u001B[38;5;28;43;01mFalse\u001B[39;49;00m\u001B[43m)\u001B[49m\n\u001B[1;32m    119\u001B[0m output_tokens \u001B[38;5;241m=\u001B[39m output_states[\u001B[38;5;241m0\u001B[39m]\n\u001B[1;32m    121\u001B[0m features\u001B[38;5;241m.\u001B[39mupdate({\u001B[38;5;124m\"\u001B[39m\u001B[38;5;124mtoken_embeddings\u001B[39m\u001B[38;5;124m\"\u001B[39m: output_tokens, \u001B[38;5;124m\"\u001B[39m\u001B[38;5;124mattention_mask\u001B[39m\u001B[38;5;124m\"\u001B[39m: features[\u001B[38;5;124m\"\u001B[39m\u001B[38;5;124mattention_mask\u001B[39m\u001B[38;5;124m\"\u001B[39m]})\n",
+      "File \u001B[0;32m~/miniconda3/envs/bertrend/lib/python3.11/site-packages/torch/nn/modules/module.py:1736\u001B[0m, in \u001B[0;36mModule._wrapped_call_impl\u001B[0;34m(self, *args, **kwargs)\u001B[0m\n\u001B[1;32m   1734\u001B[0m     \u001B[38;5;28;01mreturn\u001B[39;00m \u001B[38;5;28mself\u001B[39m\u001B[38;5;241m.\u001B[39m_compiled_call_impl(\u001B[38;5;241m*\u001B[39margs, \u001B[38;5;241m*\u001B[39m\u001B[38;5;241m*\u001B[39mkwargs)  \u001B[38;5;66;03m# type: ignore[misc]\u001B[39;00m\n\u001B[1;32m   1735\u001B[0m \u001B[38;5;28;01melse\u001B[39;00m:\n\u001B[0;32m-> 1736\u001B[0m     \u001B[38;5;28;01mreturn\u001B[39;00m \u001B[38;5;28;43mself\u001B[39;49m\u001B[38;5;241;43m.\u001B[39;49m\u001B[43m_call_impl\u001B[49m\u001B[43m(\u001B[49m\u001B[38;5;241;43m*\u001B[39;49m\u001B[43margs\u001B[49m\u001B[43m,\u001B[49m\u001B[43m \u001B[49m\u001B[38;5;241;43m*\u001B[39;49m\u001B[38;5;241;43m*\u001B[39;49m\u001B[43mkwargs\u001B[49m\u001B[43m)\u001B[49m\n",
+      "File \u001B[0;32m~/miniconda3/envs/bertrend/lib/python3.11/site-packages/torch/nn/modules/module.py:1747\u001B[0m, in \u001B[0;36mModule._call_impl\u001B[0;34m(self, *args, **kwargs)\u001B[0m\n\u001B[1;32m   1742\u001B[0m \u001B[38;5;66;03m# If we don't have any hooks, we want to skip the rest of the logic in\u001B[39;00m\n\u001B[1;32m   1743\u001B[0m \u001B[38;5;66;03m# this function, and just call forward.\u001B[39;00m\n\u001B[1;32m   1744\u001B[0m \u001B[38;5;28;01mif\u001B[39;00m \u001B[38;5;129;01mnot\u001B[39;00m (\u001B[38;5;28mself\u001B[39m\u001B[38;5;241m.\u001B[39m_backward_hooks \u001B[38;5;129;01mor\u001B[39;00m \u001B[38;5;28mself\u001B[39m\u001B[38;5;241m.\u001B[39m_backward_pre_hooks \u001B[38;5;129;01mor\u001B[39;00m \u001B[38;5;28mself\u001B[39m\u001B[38;5;241m.\u001B[39m_forward_hooks \u001B[38;5;129;01mor\u001B[39;00m \u001B[38;5;28mself\u001B[39m\u001B[38;5;241m.\u001B[39m_forward_pre_hooks\n\u001B[1;32m   1745\u001B[0m         \u001B[38;5;129;01mor\u001B[39;00m _global_backward_pre_hooks \u001B[38;5;129;01mor\u001B[39;00m _global_backward_hooks\n\u001B[1;32m   1746\u001B[0m         \u001B[38;5;129;01mor\u001B[39;00m _global_forward_hooks \u001B[38;5;129;01mor\u001B[39;00m _global_forward_pre_hooks):\n\u001B[0;32m-> 1747\u001B[0m     \u001B[38;5;28;01mreturn\u001B[39;00m \u001B[43mforward_call\u001B[49m\u001B[43m(\u001B[49m\u001B[38;5;241;43m*\u001B[39;49m\u001B[43margs\u001B[49m\u001B[43m,\u001B[49m\u001B[43m \u001B[49m\u001B[38;5;241;43m*\u001B[39;49m\u001B[38;5;241;43m*\u001B[39;49m\u001B[43mkwargs\u001B[49m\u001B[43m)\u001B[49m\n\u001B[1;32m   1749\u001B[0m result \u001B[38;5;241m=\u001B[39m \u001B[38;5;28;01mNone\u001B[39;00m\n\u001B[1;32m   1750\u001B[0m called_always_called_hooks \u001B[38;5;241m=\u001B[39m \u001B[38;5;28mset\u001B[39m()\n",
+      "File \u001B[0;32m~/miniconda3/envs/bertrend/lib/python3.11/site-packages/transformers/models/xlm_roberta/modeling_xlm_roberta.py:834\u001B[0m, in \u001B[0;36mXLMRobertaModel.forward\u001B[0;34m(self, input_ids, attention_mask, token_type_ids, position_ids, head_mask, inputs_embeds, encoder_hidden_states, encoder_attention_mask, past_key_values, use_cache, output_attentions, output_hidden_states, return_dict)\u001B[0m\n\u001B[1;32m    825\u001B[0m head_mask \u001B[38;5;241m=\u001B[39m \u001B[38;5;28mself\u001B[39m\u001B[38;5;241m.\u001B[39mget_head_mask(head_mask, \u001B[38;5;28mself\u001B[39m\u001B[38;5;241m.\u001B[39mconfig\u001B[38;5;241m.\u001B[39mnum_hidden_layers)\n\u001B[1;32m    827\u001B[0m embedding_output \u001B[38;5;241m=\u001B[39m \u001B[38;5;28mself\u001B[39m\u001B[38;5;241m.\u001B[39membeddings(\n\u001B[1;32m    828\u001B[0m     input_ids\u001B[38;5;241m=\u001B[39minput_ids,\n\u001B[1;32m    829\u001B[0m     position_ids\u001B[38;5;241m=\u001B[39mposition_ids,\n\u001B[0;32m   (...)\u001B[0m\n\u001B[1;32m    832\u001B[0m     past_key_values_length\u001B[38;5;241m=\u001B[39mpast_key_values_length,\n\u001B[1;32m    833\u001B[0m )\n\u001B[0;32m--> 834\u001B[0m encoder_outputs \u001B[38;5;241m=\u001B[39m \u001B[38;5;28;43mself\u001B[39;49m\u001B[38;5;241;43m.\u001B[39;49m\u001B[43mencoder\u001B[49m\u001B[43m(\u001B[49m\n\u001B[1;32m    835\u001B[0m \u001B[43m    \u001B[49m\u001B[43membedding_output\u001B[49m\u001B[43m,\u001B[49m\n\u001B[1;32m    836\u001B[0m \u001B[43m    \u001B[49m\u001B[43mattention_mask\u001B[49m\u001B[38;5;241;43m=\u001B[39;49m\u001B[43mextended_attention_mask\u001B[49m\u001B[43m,\u001B[49m\n\u001B[1;32m    837\u001B[0m \u001B[43m    \u001B[49m\u001B[43mhead_mask\u001B[49m\u001B[38;5;241;43m=\u001B[39;49m\u001B[43mhead_mask\u001B[49m\u001B[43m,\u001B[49m\n\u001B[1;32m    838\u001B[0m \u001B[43m    \u001B[49m\u001B[43mencoder_hidden_states\u001B[49m\u001B[38;5;241;43m=\u001B[39;49m\u001B[43mencoder_hidden_states\u001B[49m\u001B[43m,\u001B[49m\n\u001B[1;32m    839\u001B[0m \u001B[43m    \u001B[49m\u001B[43mencoder_attention_mask\u001B[49m\u001B[38;5;241;43m=\u001B[39;49m\u001B[43mencoder_extended_attention_mask\u001B[49m\u001B[43m,\u001B[49m\n\u001B[1;32m    840\u001B[0m \u001B[43m    \u001B[49m\u001B[43mpast_key_values\u001B[49m\u001B[38;5;241;43m=\u001B[39;49m\u001B[43mpast_key_values\u001B[49m\u001B[43m,\u001B[49m\n\u001B[1;32m    841\u001B[0m \u001B[43m    \u001B[49m\u001B[43muse_cache\u001B[49m\u001B[38;5;241;43m=\u001B[39;49m\u001B[43muse_cache\u001B[49m\u001B[43m,\u001B[49m\n\u001B[1;32m    842\u001B[0m \u001B[43m    \u001B[49m\u001B[43moutput_attentions\u001B[49m\u001B[38;5;241;43m=\u001B[39;49m\u001B[43moutput_attentions\u001B[49m\u001B[43m,\u001B[49m\n\u001B[1;32m    843\u001B[0m \u001B[43m    \u001B[49m\u001B[43moutput_hidden_states\u001B[49m\u001B[38;5;241;43m=\u001B[39;49m\u001B[43moutput_hidden_states\u001B[49m\u001B[43m,\u001B[49m\n\u001B[1;32m    844\u001B[0m \u001B[43m    \u001B[49m\u001B[43mreturn_dict\u001B[49m\u001B[38;5;241;43m=\u001B[39;49m\u001B[43mreturn_dict\u001B[49m\u001B[43m,\u001B[49m\n\u001B[1;32m    845\u001B[0m \u001B[43m\u001B[49m\u001B[43m)\u001B[49m\n\u001B[1;32m    846\u001B[0m sequence_output \u001B[38;5;241m=\u001B[39m encoder_outputs[\u001B[38;5;241m0\u001B[39m]\n\u001B[1;32m    847\u001B[0m pooled_output \u001B[38;5;241m=\u001B[39m \u001B[38;5;28mself\u001B[39m\u001B[38;5;241m.\u001B[39mpooler(sequence_output) \u001B[38;5;28;01mif\u001B[39;00m \u001B[38;5;28mself\u001B[39m\u001B[38;5;241m.\u001B[39mpooler \u001B[38;5;129;01mis\u001B[39;00m \u001B[38;5;129;01mnot\u001B[39;00m \u001B[38;5;28;01mNone\u001B[39;00m \u001B[38;5;28;01melse\u001B[39;00m \u001B[38;5;28;01mNone\u001B[39;00m\n",
+      "File \u001B[0;32m~/miniconda3/envs/bertrend/lib/python3.11/site-packages/torch/nn/modules/module.py:1736\u001B[0m, in \u001B[0;36mModule._wrapped_call_impl\u001B[0;34m(self, *args, **kwargs)\u001B[0m\n\u001B[1;32m   1734\u001B[0m     \u001B[38;5;28;01mreturn\u001B[39;00m \u001B[38;5;28mself\u001B[39m\u001B[38;5;241m.\u001B[39m_compiled_call_impl(\u001B[38;5;241m*\u001B[39margs, \u001B[38;5;241m*\u001B[39m\u001B[38;5;241m*\u001B[39mkwargs)  \u001B[38;5;66;03m# type: ignore[misc]\u001B[39;00m\n\u001B[1;32m   1735\u001B[0m \u001B[38;5;28;01melse\u001B[39;00m:\n\u001B[0;32m-> 1736\u001B[0m     \u001B[38;5;28;01mreturn\u001B[39;00m \u001B[38;5;28;43mself\u001B[39;49m\u001B[38;5;241;43m.\u001B[39;49m\u001B[43m_call_impl\u001B[49m\u001B[43m(\u001B[49m\u001B[38;5;241;43m*\u001B[39;49m\u001B[43margs\u001B[49m\u001B[43m,\u001B[49m\u001B[43m \u001B[49m\u001B[38;5;241;43m*\u001B[39;49m\u001B[38;5;241;43m*\u001B[39;49m\u001B[43mkwargs\u001B[49m\u001B[43m)\u001B[49m\n",
+      "File \u001B[0;32m~/miniconda3/envs/bertrend/lib/python3.11/site-packages/torch/nn/modules/module.py:1747\u001B[0m, in \u001B[0;36mModule._call_impl\u001B[0;34m(self, *args, **kwargs)\u001B[0m\n\u001B[1;32m   1742\u001B[0m \u001B[38;5;66;03m# If we don't have any hooks, we want to skip the rest of the logic in\u001B[39;00m\n\u001B[1;32m   1743\u001B[0m \u001B[38;5;66;03m# this function, and just call forward.\u001B[39;00m\n\u001B[1;32m   1744\u001B[0m \u001B[38;5;28;01mif\u001B[39;00m \u001B[38;5;129;01mnot\u001B[39;00m (\u001B[38;5;28mself\u001B[39m\u001B[38;5;241m.\u001B[39m_backward_hooks \u001B[38;5;129;01mor\u001B[39;00m \u001B[38;5;28mself\u001B[39m\u001B[38;5;241m.\u001B[39m_backward_pre_hooks \u001B[38;5;129;01mor\u001B[39;00m \u001B[38;5;28mself\u001B[39m\u001B[38;5;241m.\u001B[39m_forward_hooks \u001B[38;5;129;01mor\u001B[39;00m \u001B[38;5;28mself\u001B[39m\u001B[38;5;241m.\u001B[39m_forward_pre_hooks\n\u001B[1;32m   1745\u001B[0m         \u001B[38;5;129;01mor\u001B[39;00m _global_backward_pre_hooks \u001B[38;5;129;01mor\u001B[39;00m _global_backward_hooks\n\u001B[1;32m   1746\u001B[0m         \u001B[38;5;129;01mor\u001B[39;00m _global_forward_hooks \u001B[38;5;129;01mor\u001B[39;00m _global_forward_pre_hooks):\n\u001B[0;32m-> 1747\u001B[0m     \u001B[38;5;28;01mreturn\u001B[39;00m \u001B[43mforward_call\u001B[49m\u001B[43m(\u001B[49m\u001B[38;5;241;43m*\u001B[39;49m\u001B[43margs\u001B[49m\u001B[43m,\u001B[49m\u001B[43m \u001B[49m\u001B[38;5;241;43m*\u001B[39;49m\u001B[38;5;241;43m*\u001B[39;49m\u001B[43mkwargs\u001B[49m\u001B[43m)\u001B[49m\n\u001B[1;32m   1749\u001B[0m result \u001B[38;5;241m=\u001B[39m \u001B[38;5;28;01mNone\u001B[39;00m\n\u001B[1;32m   1750\u001B[0m called_always_called_hooks \u001B[38;5;241m=\u001B[39m \u001B[38;5;28mset\u001B[39m()\n",
+      "File \u001B[0;32m~/miniconda3/envs/bertrend/lib/python3.11/site-packages/transformers/models/xlm_roberta/modeling_xlm_roberta.py:522\u001B[0m, in \u001B[0;36mXLMRobertaEncoder.forward\u001B[0;34m(self, hidden_states, attention_mask, head_mask, encoder_hidden_states, encoder_attention_mask, past_key_values, use_cache, output_attentions, output_hidden_states, return_dict)\u001B[0m\n\u001B[1;32m    511\u001B[0m     layer_outputs \u001B[38;5;241m=\u001B[39m \u001B[38;5;28mself\u001B[39m\u001B[38;5;241m.\u001B[39m_gradient_checkpointing_func(\n\u001B[1;32m    512\u001B[0m         layer_module\u001B[38;5;241m.\u001B[39m\u001B[38;5;21m__call__\u001B[39m,\n\u001B[1;32m    513\u001B[0m         hidden_states,\n\u001B[0;32m   (...)\u001B[0m\n\u001B[1;32m    519\u001B[0m         output_attentions,\n\u001B[1;32m    520\u001B[0m     )\n\u001B[1;32m    521\u001B[0m \u001B[38;5;28;01melse\u001B[39;00m:\n\u001B[0;32m--> 522\u001B[0m     layer_outputs \u001B[38;5;241m=\u001B[39m \u001B[43mlayer_module\u001B[49m\u001B[43m(\u001B[49m\n\u001B[1;32m    523\u001B[0m \u001B[43m        \u001B[49m\u001B[43mhidden_states\u001B[49m\u001B[43m,\u001B[49m\n\u001B[1;32m    524\u001B[0m \u001B[43m        \u001B[49m\u001B[43mattention_mask\u001B[49m\u001B[43m,\u001B[49m\n\u001B[1;32m    525\u001B[0m \u001B[43m        \u001B[49m\u001B[43mlayer_head_mask\u001B[49m\u001B[43m,\u001B[49m\n\u001B[1;32m    526\u001B[0m \u001B[43m        \u001B[49m\u001B[43mencoder_hidden_states\u001B[49m\u001B[43m,\u001B[49m\n\u001B[1;32m    527\u001B[0m \u001B[43m        \u001B[49m\u001B[43mencoder_attention_mask\u001B[49m\u001B[43m,\u001B[49m\n\u001B[1;32m    528\u001B[0m \u001B[43m        \u001B[49m\u001B[43mpast_key_value\u001B[49m\u001B[43m,\u001B[49m\n\u001B[1;32m    529\u001B[0m \u001B[43m        \u001B[49m\u001B[43moutput_attentions\u001B[49m\u001B[43m,\u001B[49m\n\u001B[1;32m    530\u001B[0m \u001B[43m    \u001B[49m\u001B[43m)\u001B[49m\n\u001B[1;32m    532\u001B[0m hidden_states \u001B[38;5;241m=\u001B[39m layer_outputs[\u001B[38;5;241m0\u001B[39m]\n\u001B[1;32m    533\u001B[0m \u001B[38;5;28;01mif\u001B[39;00m use_cache:\n",
+      "File \u001B[0;32m~/miniconda3/envs/bertrend/lib/python3.11/site-packages/torch/nn/modules/module.py:1736\u001B[0m, in \u001B[0;36mModule._wrapped_call_impl\u001B[0;34m(self, *args, **kwargs)\u001B[0m\n\u001B[1;32m   1734\u001B[0m     \u001B[38;5;28;01mreturn\u001B[39;00m \u001B[38;5;28mself\u001B[39m\u001B[38;5;241m.\u001B[39m_compiled_call_impl(\u001B[38;5;241m*\u001B[39margs, \u001B[38;5;241m*\u001B[39m\u001B[38;5;241m*\u001B[39mkwargs)  \u001B[38;5;66;03m# type: ignore[misc]\u001B[39;00m\n\u001B[1;32m   1735\u001B[0m \u001B[38;5;28;01melse\u001B[39;00m:\n\u001B[0;32m-> 1736\u001B[0m     \u001B[38;5;28;01mreturn\u001B[39;00m \u001B[38;5;28;43mself\u001B[39;49m\u001B[38;5;241;43m.\u001B[39;49m\u001B[43m_call_impl\u001B[49m\u001B[43m(\u001B[49m\u001B[38;5;241;43m*\u001B[39;49m\u001B[43margs\u001B[49m\u001B[43m,\u001B[49m\u001B[43m \u001B[49m\u001B[38;5;241;43m*\u001B[39;49m\u001B[38;5;241;43m*\u001B[39;49m\u001B[43mkwargs\u001B[49m\u001B[43m)\u001B[49m\n",
+      "File \u001B[0;32m~/miniconda3/envs/bertrend/lib/python3.11/site-packages/torch/nn/modules/module.py:1747\u001B[0m, in \u001B[0;36mModule._call_impl\u001B[0;34m(self, *args, **kwargs)\u001B[0m\n\u001B[1;32m   1742\u001B[0m \u001B[38;5;66;03m# If we don't have any hooks, we want to skip the rest of the logic in\u001B[39;00m\n\u001B[1;32m   1743\u001B[0m \u001B[38;5;66;03m# this function, and just call forward.\u001B[39;00m\n\u001B[1;32m   1744\u001B[0m \u001B[38;5;28;01mif\u001B[39;00m \u001B[38;5;129;01mnot\u001B[39;00m (\u001B[38;5;28mself\u001B[39m\u001B[38;5;241m.\u001B[39m_backward_hooks \u001B[38;5;129;01mor\u001B[39;00m \u001B[38;5;28mself\u001B[39m\u001B[38;5;241m.\u001B[39m_backward_pre_hooks \u001B[38;5;129;01mor\u001B[39;00m \u001B[38;5;28mself\u001B[39m\u001B[38;5;241m.\u001B[39m_forward_hooks \u001B[38;5;129;01mor\u001B[39;00m \u001B[38;5;28mself\u001B[39m\u001B[38;5;241m.\u001B[39m_forward_pre_hooks\n\u001B[1;32m   1745\u001B[0m         \u001B[38;5;129;01mor\u001B[39;00m _global_backward_pre_hooks \u001B[38;5;129;01mor\u001B[39;00m _global_backward_hooks\n\u001B[1;32m   1746\u001B[0m         \u001B[38;5;129;01mor\u001B[39;00m _global_forward_hooks \u001B[38;5;129;01mor\u001B[39;00m _global_forward_pre_hooks):\n\u001B[0;32m-> 1747\u001B[0m     \u001B[38;5;28;01mreturn\u001B[39;00m \u001B[43mforward_call\u001B[49m\u001B[43m(\u001B[49m\u001B[38;5;241;43m*\u001B[39;49m\u001B[43margs\u001B[49m\u001B[43m,\u001B[49m\u001B[43m \u001B[49m\u001B[38;5;241;43m*\u001B[39;49m\u001B[38;5;241;43m*\u001B[39;49m\u001B[43mkwargs\u001B[49m\u001B[43m)\u001B[49m\n\u001B[1;32m   1749\u001B[0m result \u001B[38;5;241m=\u001B[39m \u001B[38;5;28;01mNone\u001B[39;00m\n\u001B[1;32m   1750\u001B[0m called_always_called_hooks \u001B[38;5;241m=\u001B[39m \u001B[38;5;28mset\u001B[39m()\n",
+      "File \u001B[0;32m~/miniconda3/envs/bertrend/lib/python3.11/site-packages/transformers/models/xlm_roberta/modeling_xlm_roberta.py:453\u001B[0m, in \u001B[0;36mXLMRobertaLayer.forward\u001B[0;34m(self, hidden_states, attention_mask, head_mask, encoder_hidden_states, encoder_attention_mask, past_key_value, output_attentions)\u001B[0m\n\u001B[1;32m    450\u001B[0m     cross_attn_present_key_value \u001B[38;5;241m=\u001B[39m cross_attention_outputs[\u001B[38;5;241m-\u001B[39m\u001B[38;5;241m1\u001B[39m]\n\u001B[1;32m    451\u001B[0m     present_key_value \u001B[38;5;241m=\u001B[39m present_key_value \u001B[38;5;241m+\u001B[39m cross_attn_present_key_value\n\u001B[0;32m--> 453\u001B[0m layer_output \u001B[38;5;241m=\u001B[39m \u001B[43mapply_chunking_to_forward\u001B[49m\u001B[43m(\u001B[49m\n\u001B[1;32m    454\u001B[0m \u001B[43m    \u001B[49m\u001B[38;5;28;43mself\u001B[39;49m\u001B[38;5;241;43m.\u001B[39;49m\u001B[43mfeed_forward_chunk\u001B[49m\u001B[43m,\u001B[49m\u001B[43m \u001B[49m\u001B[38;5;28;43mself\u001B[39;49m\u001B[38;5;241;43m.\u001B[39;49m\u001B[43mchunk_size_feed_forward\u001B[49m\u001B[43m,\u001B[49m\u001B[43m \u001B[49m\u001B[38;5;28;43mself\u001B[39;49m\u001B[38;5;241;43m.\u001B[39;49m\u001B[43mseq_len_dim\u001B[49m\u001B[43m,\u001B[49m\u001B[43m \u001B[49m\u001B[43mattention_output\u001B[49m\n\u001B[1;32m    455\u001B[0m \u001B[43m\u001B[49m\u001B[43m)\u001B[49m\n\u001B[1;32m    456\u001B[0m outputs \u001B[38;5;241m=\u001B[39m (layer_output,) \u001B[38;5;241m+\u001B[39m outputs\n\u001B[1;32m    458\u001B[0m \u001B[38;5;66;03m# if decoder, return the attn key/values as the last output\u001B[39;00m\n",
+      "File \u001B[0;32m~/miniconda3/envs/bertrend/lib/python3.11/site-packages/transformers/pytorch_utils.py:239\u001B[0m, in \u001B[0;36mapply_chunking_to_forward\u001B[0;34m(forward_fn, chunk_size, chunk_dim, *input_tensors)\u001B[0m\n\u001B[1;32m    236\u001B[0m     \u001B[38;5;66;03m# concatenate output at same dimension\u001B[39;00m\n\u001B[1;32m    237\u001B[0m     \u001B[38;5;28;01mreturn\u001B[39;00m torch\u001B[38;5;241m.\u001B[39mcat(output_chunks, dim\u001B[38;5;241m=\u001B[39mchunk_dim)\n\u001B[0;32m--> 239\u001B[0m \u001B[38;5;28;01mreturn\u001B[39;00m \u001B[43mforward_fn\u001B[49m\u001B[43m(\u001B[49m\u001B[38;5;241;43m*\u001B[39;49m\u001B[43minput_tensors\u001B[49m\u001B[43m)\u001B[49m\n",
+      "File \u001B[0;32m~/miniconda3/envs/bertrend/lib/python3.11/site-packages/transformers/models/xlm_roberta/modeling_xlm_roberta.py:465\u001B[0m, in \u001B[0;36mXLMRobertaLayer.feed_forward_chunk\u001B[0;34m(self, attention_output)\u001B[0m\n\u001B[1;32m    464\u001B[0m \u001B[38;5;28;01mdef\u001B[39;00m \u001B[38;5;21mfeed_forward_chunk\u001B[39m(\u001B[38;5;28mself\u001B[39m, attention_output):\n\u001B[0;32m--> 465\u001B[0m     intermediate_output \u001B[38;5;241m=\u001B[39m \u001B[38;5;28;43mself\u001B[39;49m\u001B[38;5;241;43m.\u001B[39;49m\u001B[43mintermediate\u001B[49m\u001B[43m(\u001B[49m\u001B[43mattention_output\u001B[49m\u001B[43m)\u001B[49m\n\u001B[1;32m    466\u001B[0m     layer_output \u001B[38;5;241m=\u001B[39m \u001B[38;5;28mself\u001B[39m\u001B[38;5;241m.\u001B[39moutput(intermediate_output, attention_output)\n\u001B[1;32m    467\u001B[0m     \u001B[38;5;28;01mreturn\u001B[39;00m layer_output\n",
+      "File \u001B[0;32m~/miniconda3/envs/bertrend/lib/python3.11/site-packages/torch/nn/modules/module.py:1736\u001B[0m, in \u001B[0;36mModule._wrapped_call_impl\u001B[0;34m(self, *args, **kwargs)\u001B[0m\n\u001B[1;32m   1734\u001B[0m     \u001B[38;5;28;01mreturn\u001B[39;00m \u001B[38;5;28mself\u001B[39m\u001B[38;5;241m.\u001B[39m_compiled_call_impl(\u001B[38;5;241m*\u001B[39margs, \u001B[38;5;241m*\u001B[39m\u001B[38;5;241m*\u001B[39mkwargs)  \u001B[38;5;66;03m# type: ignore[misc]\u001B[39;00m\n\u001B[1;32m   1735\u001B[0m \u001B[38;5;28;01melse\u001B[39;00m:\n\u001B[0;32m-> 1736\u001B[0m     \u001B[38;5;28;01mreturn\u001B[39;00m \u001B[38;5;28;43mself\u001B[39;49m\u001B[38;5;241;43m.\u001B[39;49m\u001B[43m_call_impl\u001B[49m\u001B[43m(\u001B[49m\u001B[38;5;241;43m*\u001B[39;49m\u001B[43margs\u001B[49m\u001B[43m,\u001B[49m\u001B[43m \u001B[49m\u001B[38;5;241;43m*\u001B[39;49m\u001B[38;5;241;43m*\u001B[39;49m\u001B[43mkwargs\u001B[49m\u001B[43m)\u001B[49m\n",
+      "File \u001B[0;32m~/miniconda3/envs/bertrend/lib/python3.11/site-packages/torch/nn/modules/module.py:1747\u001B[0m, in \u001B[0;36mModule._call_impl\u001B[0;34m(self, *args, **kwargs)\u001B[0m\n\u001B[1;32m   1742\u001B[0m \u001B[38;5;66;03m# If we don't have any hooks, we want to skip the rest of the logic in\u001B[39;00m\n\u001B[1;32m   1743\u001B[0m \u001B[38;5;66;03m# this function, and just call forward.\u001B[39;00m\n\u001B[1;32m   1744\u001B[0m \u001B[38;5;28;01mif\u001B[39;00m \u001B[38;5;129;01mnot\u001B[39;00m (\u001B[38;5;28mself\u001B[39m\u001B[38;5;241m.\u001B[39m_backward_hooks \u001B[38;5;129;01mor\u001B[39;00m \u001B[38;5;28mself\u001B[39m\u001B[38;5;241m.\u001B[39m_backward_pre_hooks \u001B[38;5;129;01mor\u001B[39;00m \u001B[38;5;28mself\u001B[39m\u001B[38;5;241m.\u001B[39m_forward_hooks \u001B[38;5;129;01mor\u001B[39;00m \u001B[38;5;28mself\u001B[39m\u001B[38;5;241m.\u001B[39m_forward_pre_hooks\n\u001B[1;32m   1745\u001B[0m         \u001B[38;5;129;01mor\u001B[39;00m _global_backward_pre_hooks \u001B[38;5;129;01mor\u001B[39;00m _global_backward_hooks\n\u001B[1;32m   1746\u001B[0m         \u001B[38;5;129;01mor\u001B[39;00m _global_forward_hooks \u001B[38;5;129;01mor\u001B[39;00m _global_forward_pre_hooks):\n\u001B[0;32m-> 1747\u001B[0m     \u001B[38;5;28;01mreturn\u001B[39;00m \u001B[43mforward_call\u001B[49m\u001B[43m(\u001B[49m\u001B[38;5;241;43m*\u001B[39;49m\u001B[43margs\u001B[49m\u001B[43m,\u001B[49m\u001B[43m \u001B[49m\u001B[38;5;241;43m*\u001B[39;49m\u001B[38;5;241;43m*\u001B[39;49m\u001B[43mkwargs\u001B[49m\u001B[43m)\u001B[49m\n\u001B[1;32m   1749\u001B[0m result \u001B[38;5;241m=\u001B[39m \u001B[38;5;28;01mNone\u001B[39;00m\n\u001B[1;32m   1750\u001B[0m called_always_called_hooks \u001B[38;5;241m=\u001B[39m \u001B[38;5;28mset\u001B[39m()\n",
+      "File \u001B[0;32m~/miniconda3/envs/bertrend/lib/python3.11/site-packages/transformers/models/xlm_roberta/modeling_xlm_roberta.py:363\u001B[0m, in \u001B[0;36mXLMRobertaIntermediate.forward\u001B[0;34m(self, hidden_states)\u001B[0m\n\u001B[1;32m    362\u001B[0m \u001B[38;5;28;01mdef\u001B[39;00m \u001B[38;5;21mforward\u001B[39m(\u001B[38;5;28mself\u001B[39m, hidden_states: torch\u001B[38;5;241m.\u001B[39mTensor) \u001B[38;5;241m-\u001B[39m\u001B[38;5;241m>\u001B[39m torch\u001B[38;5;241m.\u001B[39mTensor:\n\u001B[0;32m--> 363\u001B[0m     hidden_states \u001B[38;5;241m=\u001B[39m \u001B[38;5;28;43mself\u001B[39;49m\u001B[38;5;241;43m.\u001B[39;49m\u001B[43mdense\u001B[49m\u001B[43m(\u001B[49m\u001B[43mhidden_states\u001B[49m\u001B[43m)\u001B[49m\n\u001B[1;32m    364\u001B[0m     hidden_states \u001B[38;5;241m=\u001B[39m \u001B[38;5;28mself\u001B[39m\u001B[38;5;241m.\u001B[39mintermediate_act_fn(hidden_states)\n\u001B[1;32m    365\u001B[0m     \u001B[38;5;28;01mreturn\u001B[39;00m hidden_states\n",
+      "File \u001B[0;32m~/miniconda3/envs/bertrend/lib/python3.11/site-packages/torch/nn/modules/module.py:1736\u001B[0m, in \u001B[0;36mModule._wrapped_call_impl\u001B[0;34m(self, *args, **kwargs)\u001B[0m\n\u001B[1;32m   1734\u001B[0m     \u001B[38;5;28;01mreturn\u001B[39;00m \u001B[38;5;28mself\u001B[39m\u001B[38;5;241m.\u001B[39m_compiled_call_impl(\u001B[38;5;241m*\u001B[39margs, \u001B[38;5;241m*\u001B[39m\u001B[38;5;241m*\u001B[39mkwargs)  \u001B[38;5;66;03m# type: ignore[misc]\u001B[39;00m\n\u001B[1;32m   1735\u001B[0m \u001B[38;5;28;01melse\u001B[39;00m:\n\u001B[0;32m-> 1736\u001B[0m     \u001B[38;5;28;01mreturn\u001B[39;00m \u001B[38;5;28;43mself\u001B[39;49m\u001B[38;5;241;43m.\u001B[39;49m\u001B[43m_call_impl\u001B[49m\u001B[43m(\u001B[49m\u001B[38;5;241;43m*\u001B[39;49m\u001B[43margs\u001B[49m\u001B[43m,\u001B[49m\u001B[43m \u001B[49m\u001B[38;5;241;43m*\u001B[39;49m\u001B[38;5;241;43m*\u001B[39;49m\u001B[43mkwargs\u001B[49m\u001B[43m)\u001B[49m\n",
+      "File \u001B[0;32m~/miniconda3/envs/bertrend/lib/python3.11/site-packages/torch/nn/modules/module.py:1747\u001B[0m, in \u001B[0;36mModule._call_impl\u001B[0;34m(self, *args, **kwargs)\u001B[0m\n\u001B[1;32m   1742\u001B[0m \u001B[38;5;66;03m# If we don't have any hooks, we want to skip the rest of the logic in\u001B[39;00m\n\u001B[1;32m   1743\u001B[0m \u001B[38;5;66;03m# this function, and just call forward.\u001B[39;00m\n\u001B[1;32m   1744\u001B[0m \u001B[38;5;28;01mif\u001B[39;00m \u001B[38;5;129;01mnot\u001B[39;00m (\u001B[38;5;28mself\u001B[39m\u001B[38;5;241m.\u001B[39m_backward_hooks \u001B[38;5;129;01mor\u001B[39;00m \u001B[38;5;28mself\u001B[39m\u001B[38;5;241m.\u001B[39m_backward_pre_hooks \u001B[38;5;129;01mor\u001B[39;00m \u001B[38;5;28mself\u001B[39m\u001B[38;5;241m.\u001B[39m_forward_hooks \u001B[38;5;129;01mor\u001B[39;00m \u001B[38;5;28mself\u001B[39m\u001B[38;5;241m.\u001B[39m_forward_pre_hooks\n\u001B[1;32m   1745\u001B[0m         \u001B[38;5;129;01mor\u001B[39;00m _global_backward_pre_hooks \u001B[38;5;129;01mor\u001B[39;00m _global_backward_hooks\n\u001B[1;32m   1746\u001B[0m         \u001B[38;5;129;01mor\u001B[39;00m _global_forward_hooks \u001B[38;5;129;01mor\u001B[39;00m _global_forward_pre_hooks):\n\u001B[0;32m-> 1747\u001B[0m     \u001B[38;5;28;01mreturn\u001B[39;00m \u001B[43mforward_call\u001B[49m\u001B[43m(\u001B[49m\u001B[38;5;241;43m*\u001B[39;49m\u001B[43margs\u001B[49m\u001B[43m,\u001B[49m\u001B[43m \u001B[49m\u001B[38;5;241;43m*\u001B[39;49m\u001B[38;5;241;43m*\u001B[39;49m\u001B[43mkwargs\u001B[49m\u001B[43m)\u001B[49m\n\u001B[1;32m   1749\u001B[0m result \u001B[38;5;241m=\u001B[39m \u001B[38;5;28;01mNone\u001B[39;00m\n\u001B[1;32m   1750\u001B[0m called_always_called_hooks \u001B[38;5;241m=\u001B[39m \u001B[38;5;28mset\u001B[39m()\n",
+      "File \u001B[0;32m~/miniconda3/envs/bertrend/lib/python3.11/site-packages/torch/nn/modules/linear.py:125\u001B[0m, in \u001B[0;36mLinear.forward\u001B[0;34m(self, input)\u001B[0m\n\u001B[1;32m    124\u001B[0m \u001B[38;5;28;01mdef\u001B[39;00m \u001B[38;5;21mforward\u001B[39m(\u001B[38;5;28mself\u001B[39m, \u001B[38;5;28minput\u001B[39m: Tensor) \u001B[38;5;241m-\u001B[39m\u001B[38;5;241m>\u001B[39m Tensor:\n\u001B[0;32m--> 125\u001B[0m     \u001B[38;5;28;01mreturn\u001B[39;00m \u001B[43mF\u001B[49m\u001B[38;5;241;43m.\u001B[39;49m\u001B[43mlinear\u001B[49m\u001B[43m(\u001B[49m\u001B[38;5;28;43minput\u001B[39;49m\u001B[43m,\u001B[49m\u001B[43m \u001B[49m\u001B[38;5;28;43mself\u001B[39;49m\u001B[38;5;241;43m.\u001B[39;49m\u001B[43mweight\u001B[49m\u001B[43m,\u001B[49m\u001B[43m \u001B[49m\u001B[38;5;28;43mself\u001B[39;49m\u001B[38;5;241;43m.\u001B[39;49m\u001B[43mbias\u001B[49m\u001B[43m)\u001B[49m\n",
+      "\u001B[0;31mKeyboardInterrupt\u001B[0m: "
+     ]
+    }
+   ],
+   "execution_count": 57
+  },
+  {
+   "metadata": {
+    "ExecuteTime": {
+     "end_time": "2025-01-20T15:07:17.477460801Z",
+     "start_time": "2025-01-09T12:39:13.122511Z"
+    }
+   },
+   "cell_type": "code",
+   "source": "",
+   "id": "c160fcd13af19743",
+   "outputs": [],
+   "execution_count": null
+  },
+  {
+   "metadata": {},
+   "cell_type": "code",
+   "outputs": [],
+   "execution_count": null,
+   "source": "",
+   "id": "287113e909a99115"
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 2
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython2",
+   "version": "2.7.6"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}
diff --git a/pyproject.toml b/pyproject.toml
index 4147310..9b5aae9 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -23,7 +23,7 @@ bertopic = "0.16.2"
 black = "^24.10.0"
 datamapplot = "0.3.0"
 dateparser = "^1.2.0"
-dask = "2024.9.1" # issues with >=2025.x (https://github.com/dask/dask/issues/11678)
+dask = "2024.12.0" # issues with >=2025.x (https://github.com/dask/dask/issues/11678)
 gensim = "4.3.2"
 hdbscan = "^0.8.40"
 joblib = "^1.4.2"
@@ -38,6 +38,7 @@ markdown = "^3.7"
 nltk = "^3.9.1"
 numpy = "<2"
 openai = "^1.58.1"
+opentelemetry-exporter-otlp-proto-grpc = "1.25.0" # to avoid error chroma with protobuf
 pandas = "^2.2.2"
 plotly = "^5.24.1"
 plotly-resampler = "^0.10.0"