clean up unnecessary overrides
Some overrides were not needed and were introduced while understanding the best place to save bertopic results.

This commit cleans those up
varun646 committed Feb 25, 2025
1 parent 320d510 commit 8152e29
Showing 1 changed file with 8 additions and 266 deletions.
274 changes: 8 additions & 266 deletions src/PatientX/models/
Expand Up @@ -146,7 +146,13 @@ def _extract_words_per_topic(
c_tf_idf: csr_matrix = None,
calculate_aspects: bool = True,
) -> Mapping[str, List[Tuple[str, float]]]:
"""Based on tf_idf scores per topic, extract the top n words per topic.
NOTE: this function overrides bertopic._extract_words_per_topic()
The only difference is that we explicitly save the representative words to self.bertopic_representative_words
so that we can later save the intermediate bertopic results
Based on tf_idf scores per topic, extract the top n words per topic.
If the top words per topic need to be extracted, then only the `words` parameter
needs to be passed. If the top words per topic in a specific timestamp, then it
Expand Down Expand Up @@ -185,6 +191,7 @@ def _extract_words_per_topic(
for index, label in enumerate(labels)

# NOTE: this is the only change from bertopic._extract_words_per_topic()
self.bertopic_representative_words = {label: values[: self.top_n_words] for label, values in base_topics.items()}

# Fine-tune the topic representations
Expand Down Expand Up @@ -234,271 +241,6 @@ def _extract_words_per_topic(

return topics

