Skip to content

Commit de06e56

Browse files
committed
adding gpu hdbscan
1 parent 42a764b commit de06e56

File tree

1 file changed

+15
-2
lines changed

1 file changed

+15
-2
lines changed

top2vec/Top2Vec.py

+15-2
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,13 @@
2626
except ImportError:
2727
_HAVE_CUMAP = False
2828

29+
try:
30+
from cuml.cluster import HDBSCAN as cuHDBSCAN
31+
32+
_HAVE_CUHDBSCAN = True
33+
except ImportError:
34+
_HAVE_CUHDBSCAN = False
35+
2936
try:
3037
import hnswlib
3138

@@ -1369,13 +1376,19 @@ def compute_topics(self,
13691376
'metric': 'euclidean',
13701377
'cluster_selection_method': 'eom'}
13711378

1372-
cluster = hdbscan.HDBSCAN(**hdbscan_args).fit(umap_embedding)
1379+
if gpu_hdbscan and _HAVE_CUHDBSCAN:
1380+
cluster = cuHDBSCAN(**hdbscan_args)
1381+
labels = cluster.fit_predict(umap_embedding)
1382+
1383+
else:
1384+
cluster = hdbscan.HDBSCAN(**hdbscan_args).fit(umap_embedding)
1385+
labels = cluster.labels_
13731386

13741387
# calculate topic vectors from dense areas of documents
13751388
logger.info('Finding topics')
13761389

13771390
# create topic vectors
1378-
self._create_topic_vectors(cluster.labels_)
1391+
self._create_topic_vectors(labels)
13791392

13801393
# deduplicate topics
13811394
self._deduplicate_topics(topic_merge_delta)

0 commit comments

Comments
 (0)