From d83076c5ec0dee1986cf9338d00d261604abb0d0 Mon Sep 17 00:00:00 2001
From: osanseviero <osanseviero@gmail.com>
Date: Wed, 10 Jul 2024 16:25:59 +0200
Subject: [PATCH] Remove sentence similarity

---
 api_inference_community/validation.py         |  7 --
 .../adapter_transformers/tests/test_api.py    |  1 -
 docker_images/bertopic/tests/test_api.py      |  2 -
 .../common/app/pipelines/__init__.py          |  1 -
 .../app/pipelines/sentence_similarity.py      | 33 -------
 docker_images/common/tests/test_api.py        |  2 -
 .../tests/test_api_sentence_similarity.py     | 97 -------------------
 docker_images/diffusers/tests/test_api.py     |  2 -
 docker_images/fasttext/tests/test_api.py      |  1 -
 docker_images/nemo/tests/test_api.py          |  2 -
 docker_images/open_clip/tests/test_api.py     |  2 -
 docker_images/paddlenlp/tests/test_api.py     |  2 -
 docker_images/peft/tests/test_api.py          |  2 -
 .../pyannote_audio/tests/test_api.py          |  2 -
 .../sentence_transformers/app/main.py         |  7 +-
 .../app/pipelines/__init__.py                 |  1 -
 .../app/pipelines/sentence_similarity.py      | 33 -------
 .../sentence_transformers/tests/test_api.py   |  5 +-
 .../tests/test_api_sentence_similarity.py     | 94 ------------------
 docker_images/setfit/tests/test_api.py        |  2 -
 docker_images/sklearn/tests/test_api.py       |  1 -
 docker_images/spacy/app/main.py               |  3 -
 docker_images/spacy/app/pipelines/__init__.py |  1 -
 .../app/pipelines/sentence_similarity.py      | 51 ----------
 docker_images/spacy/tests/test_api.py         |  2 -
 .../tests/test_api_sentence_similarity.py     | 96 ------------------
 docker_images/span_marker/tests/test_api.py   |  2 -
 docker_images/stanza/tests/test_api.py        |  1 -
 tests/test_dockers.py                         |  9 +-
 tests/test_nlp.py                             | 20 ----
 30 files changed, 5 insertions(+), 479 deletions(-)
 delete mode 100644 docker_images/common/app/pipelines/sentence_similarity.py
 delete mode 100644 docker_images/common/tests/test_api_sentence_similarity.py
 delete mode 100644 docker_images/sentence_transformers/app/pipelines/sentence_similarity.py
 delete mode 100644 docker_images/sentence_transformers/tests/test_api_sentence_similarity.py
 delete mode 100644 docker_images/spacy/app/pipelines/sentence_similarity.py
 delete mode 100644 docker_images/spacy/tests/test_api_sentence_similarity.py

diff --git a/api_inference_community/validation.py b/api_inference_community/validation.py
index 1f83844e..6217a6f1 100644
--- a/api_inference_community/validation.py
+++ b/api_inference_community/validation.py
@@ -78,11 +78,6 @@ class QuestionInputsCheck(BaseModel):
     context: str
 
 
-class SentenceSimilarityInputsCheck(BaseModel):
-    source_sentence: str
-    sentences: List[str]
-
-
 class TableQuestionAnsweringInputsCheck(BaseModel):
     table: Dict[str, List[str]]
     query: str
@@ -139,7 +134,6 @@ class StringInput(RootModel):
     "conversational": ConversationalInputsCheck,
     "question-answering": QuestionInputsCheck,
     "feature-extraction": StringOrStringBatchInputCheck,
-    "sentence-similarity": SentenceSimilarityInputsCheck,
     "table-question-answering": TableQuestionAnsweringInputsCheck,
     "tabular-classification": TabularDataInputsCheck,
     "tabular-regression": TabularDataInputsCheck,
@@ -203,7 +197,6 @@ def check_inputs(inputs, tag):
     "conversational",
     "feature-extraction",
     "question-answering",
-    "sentence-similarity",
     "fill-mask",
     "table-question-answering",
     "tabular-classification",
diff --git a/docker_images/adapter_transformers/tests/test_api.py b/docker_images/adapter_transformers/tests/test_api.py
index 3060db10..80e9458d 100644
--- a/docker_images/adapter_transformers/tests/test_api.py
+++ b/docker_images/adapter_transformers/tests/test_api.py
@@ -22,7 +22,6 @@
     "feature-extraction",
     "image-classification",
     "question-answering",
-    "sentence-similarity",
     "structured-data-classification",
     "text-generation",
     "text-to-speech",
diff --git a/docker_images/bertopic/tests/test_api.py b/docker_images/bertopic/tests/test_api.py
index a756d8bf..3d5ed62e 100644
--- a/docker_images/bertopic/tests/test_api.py
+++ b/docker_images/bertopic/tests/test_api.py
@@ -20,7 +20,6 @@
     "feature-extraction",
     "image-classification",
     "question-answering",
-    "sentence-similarity",
     "speech-segmentation",
     "tabular-classification",
     "tabular-regression",
@@ -29,7 +28,6 @@
     "token-classification",
     "conversational",
     "feature-extraction",
-    "sentence-similarity",
     "fill-mask",
     "table-question-answering",
     "summarization",
diff --git a/docker_images/common/app/pipelines/__init__.py b/docker_images/common/app/pipelines/__init__.py
index b45e41d6..8dbcacca 100644
--- a/docker_images/common/app/pipelines/__init__.py
+++ b/docker_images/common/app/pipelines/__init__.py
@@ -8,7 +8,6 @@
 from app.pipelines.feature_extraction import FeatureExtractionPipeline
 from app.pipelines.image_classification import ImageClassificationPipeline
 from app.pipelines.question_answering import QuestionAnsweringPipeline
-from app.pipelines.sentence_similarity import SentenceSimilarityPipeline
 from app.pipelines.speech_segmentation import SpeechSegmentationPipeline
 from app.pipelines.tabular_classification_pipeline import TabularClassificationPipeline
 from app.pipelines.tabular_regression_pipeline import TabularRegressionPipeline
diff --git a/docker_images/common/app/pipelines/sentence_similarity.py b/docker_images/common/app/pipelines/sentence_similarity.py
deleted file mode 100644
index 95c08a70..00000000
--- a/docker_images/common/app/pipelines/sentence_similarity.py
+++ /dev/null
@@ -1,33 +0,0 @@
-from typing import Dict, List, Union
-
-from app.pipelines import Pipeline
-
-
-class SentenceSimilarityPipeline(Pipeline):
-    def __init__(
-        self,
-        model_id: str,
-    ):
-        # IMPLEMENT_THIS
-        # Preload all the elements you are going to need at inference.
-        # For instance your model, processors, tokenizer that might be needed.
-        # This function is only called once, so do all the heavy processing I/O here
-        raise NotImplementedError(
-            "Please implement SentenceSimilarityPipeline __init__ function"
-        )
-
-    def __call__(self, inputs: Dict[str, Union[str, List[str]]]) -> List[float]:
-        """
-        Args:
-            inputs (:obj:`dict`):
-                a dictionary containing two keys, 'source_sentence' mapping
-                to the sentence that will be compared against all the others,
-                and 'sentences', mapping to a list of strings to which the
-                source will be compared.
-        Return:
-            A :obj:`list` of floats: Some similarity measure between `source_sentence` and each sentence from `sentences`.
-        """
-        # IMPLEMENT_THIS
-        raise NotImplementedError(
-            "Please implement SentenceSimilarityPipeline __call__ function"
-        )
diff --git a/docker_images/common/tests/test_api.py b/docker_images/common/tests/test_api.py
index a7e085da..a3a9b851 100644
--- a/docker_images/common/tests/test_api.py
+++ b/docker_images/common/tests/test_api.py
@@ -22,7 +22,6 @@
     "feature-extraction",
     "image-classification",
     "question-answering",
-    "sentence-similarity",
     "speech-segmentation",
     "tabular-classification",
     "tabular-regression",
@@ -31,7 +30,6 @@
     "token-classification",
     "conversational",
     "feature-extraction",
-    "sentence-similarity",
     "fill-mask",
     "table-question-answering",
     "summarization",
diff --git a/docker_images/common/tests/test_api_sentence_similarity.py b/docker_images/common/tests/test_api_sentence_similarity.py
deleted file mode 100644
index 53c9b904..00000000
--- a/docker_images/common/tests/test_api_sentence_similarity.py
+++ /dev/null
@@ -1,97 +0,0 @@
-import json
-import os
-from unittest import TestCase, skipIf
-
-from app.main import ALLOWED_TASKS
-from starlette.testclient import TestClient
-from tests.test_api import TESTABLE_MODELS
-
-
-@skipIf(
-    "sentence-similarity" not in ALLOWED_TASKS,
-    "sentence-similarity not implemented",
-)
-class SentenceSimilarityTestCase(TestCase):
-    def setUp(self):
-        model_id = TESTABLE_MODELS["sentence-similarity"]
-        self.old_model_id = os.getenv("MODEL_ID")
-        self.old_task = os.getenv("TASK")
-        os.environ["MODEL_ID"] = model_id
-        os.environ["TASK"] = "sentence-similarity"
-        from app.main import app
-
-        self.app = app
-
-    @classmethod
-    def setUpClass(cls):
-        from app.main import get_pipeline
-
-        get_pipeline.cache_clear()
-
-    def tearDown(self):
-        if self.old_model_id is not None:
-            os.environ["MODEL_ID"] = self.old_model_id
-        else:
-            del os.environ["MODEL_ID"]
-        if self.old_task is not None:
-            os.environ["TASK"] = self.old_task
-        else:
-            del os.environ["TASK"]
-
-    def test_simple(self):
-        source_sentence = "I am a very happy man"
-        sentences = [
-            "What is this?",
-            "I am a super happy man",
-            "I am a sad man",
-            "I am a happy dog",
-        ]
-        inputs = {"source_sentence": source_sentence, "sentences": sentences}
-
-        with TestClient(self.app) as client:
-            response = client.post("/", json={"inputs": inputs})
-
-        self.assertEqual(
-            response.status_code,
-            200,
-        )
-
-        content = json.loads(response.content)
-        self.assertEqual(type(content), list)
-        self.assertEqual({type(item) for item in content}, {float})
-
-        with TestClient(self.app) as client:
-            response = client.post("/", json=inputs)
-
-        self.assertEqual(
-            response.status_code,
-            200,
-        )
-        content = json.loads(response.content)
-        self.assertEqual(type(content), list)
-        self.assertEqual({type(item) for item in content}, {float})
-
-    def test_missing_input_sentences(self):
-        source_sentence = "I am a very happy man"
-        inputs = {"source_sentence": source_sentence}
-
-        with TestClient(self.app) as client:
-            response = client.post("/", json={"inputs": inputs})
-
-        self.assertEqual(
-            response.status_code,
-            400,
-        )
-
-    def test_malformed_input(self):
-        with TestClient(self.app) as client:
-            response = client.post("/", data=b"\xc3\x28")
-
-        self.assertEqual(
-            response.status_code,
-            400,
-        )
-        self.assertEqual(
-            response.content,
-            b'{"error":"\'utf-8\' codec can\'t decode byte 0xc3 in position 0: invalid continuation byte"}',
-        )
diff --git a/docker_images/diffusers/tests/test_api.py b/docker_images/diffusers/tests/test_api.py
index 019486ef..067ddab7 100644
--- a/docker_images/diffusers/tests/test_api.py
+++ b/docker_images/diffusers/tests/test_api.py
@@ -24,7 +24,6 @@
     "feature-extraction",
     "image-classification",
     "question-answering",
-    "sentence-similarity",
     "speech-segmentation",
     "tabular-classification",
     "tabular-regression",
@@ -35,7 +34,6 @@
     "conversational",
     "feature-extraction",
     "question-answering",
-    "sentence-similarity",
     "fill-mask",
     "table-question-answering",
     "summarization",
diff --git a/docker_images/fasttext/tests/test_api.py b/docker_images/fasttext/tests/test_api.py
index 5b16588f..171d82aa 100644
--- a/docker_images/fasttext/tests/test_api.py
+++ b/docker_images/fasttext/tests/test_api.py
@@ -25,7 +25,6 @@
     "image-classification",
     "language-identification",
     "question-answering",
-    "sentence-similarity",
     "speech-segmentation",
     "structured-data-classification",
     "text-to-speech",
diff --git a/docker_images/nemo/tests/test_api.py b/docker_images/nemo/tests/test_api.py
index 0c3f1f9c..5bcec334 100644
--- a/docker_images/nemo/tests/test_api.py
+++ b/docker_images/nemo/tests/test_api.py
@@ -21,7 +21,6 @@
     "feature-extraction",
     "image-classification",
     "question-answering",
-    "sentence-similarity",
     "speech-segmentation",
     "structured-data-classification",
     "text-classification",
@@ -31,7 +30,6 @@
     "conversational",
     "feature-extraction",
     "question-answering",
-    "sentence-similarity",
     "fill-mask",
     "table-question-answering",
     "summarization",
diff --git a/docker_images/open_clip/tests/test_api.py b/docker_images/open_clip/tests/test_api.py
index 38fa0f1d..7077a4bb 100644
--- a/docker_images/open_clip/tests/test_api.py
+++ b/docker_images/open_clip/tests/test_api.py
@@ -24,7 +24,6 @@
     "feature-extraction",
     "image-classification",
     "question-answering",
-    "sentence-similarity",
     "speech-segmentation",
     "tabular-classification",
     "tabular-regression",
@@ -35,7 +34,6 @@
     "conversational",
     "feature-extraction",
     "question-answering",
-    "sentence-similarity",
     "fill-mask",
     "table-question-answering",
     "summarization",
diff --git a/docker_images/paddlenlp/tests/test_api.py b/docker_images/paddlenlp/tests/test_api.py
index e86d5eaa..1ae165e7 100644
--- a/docker_images/paddlenlp/tests/test_api.py
+++ b/docker_images/paddlenlp/tests/test_api.py
@@ -23,7 +23,6 @@
     "feature-extraction",
     "image-classification",
     "question-answering",
-    "sentence-similarity",
     "speech-segmentation",
     "tabular-classification",
     "tabular-regression",
@@ -34,7 +33,6 @@
     "conversational",
     "feature-extraction",
     "question-answering",
-    "sentence-similarity",
     "fill-mask",
     "table-question-answering",
     "summarization",
diff --git a/docker_images/peft/tests/test_api.py b/docker_images/peft/tests/test_api.py
index bca74322..960e1192 100644
--- a/docker_images/peft/tests/test_api.py
+++ b/docker_images/peft/tests/test_api.py
@@ -20,7 +20,6 @@
     "feature-extraction",
     "image-classification",
     "question-answering",
-    "sentence-similarity",
     "speech-segmentation",
     "tabular-classification",
     "tabular-regression",
@@ -29,7 +28,6 @@
     "token-classification",
     "conversational",
     "feature-extraction",
-    "sentence-similarity",
     "fill-mask",
     "table-question-answering",
     "summarization",
diff --git a/docker_images/pyannote_audio/tests/test_api.py b/docker_images/pyannote_audio/tests/test_api.py
index 7a4a3da2..5c740e7c 100644
--- a/docker_images/pyannote_audio/tests/test_api.py
+++ b/docker_images/pyannote_audio/tests/test_api.py
@@ -20,7 +20,6 @@
     "feature-extraction",
     "image-classification",
     "question-answering",
-    "sentence-similarity",
     "speech-segmentation",
     "structured-data-classification",
     "text-classification",
@@ -30,7 +29,6 @@
     "conversational",
     "feature-extraction",
     "question-answering",
-    "sentence-similarity",
     "fill-mask",
     "table-question-answering",
     "summarization",
diff --git a/docker_images/sentence_transformers/app/main.py b/docker_images/sentence_transformers/app/main.py
index 8c301bfc..780450aa 100644
--- a/docker_images/sentence_transformers/app/main.py
+++ b/docker_images/sentence_transformers/app/main.py
@@ -4,11 +4,7 @@
 from typing import Dict, Type
 
 from api_inference_community.routes import pipeline_route, status_ok
-from app.pipelines import (
-    FeatureExtractionPipeline,
-    Pipeline,
-    SentenceSimilarityPipeline,
-)
+from app.pipelines import FeatureExtractionPipeline, Pipeline
 from starlette.applications import Starlette
 from starlette.middleware import Middleware
 from starlette.middleware.gzip import GZipMiddleware
@@ -38,7 +34,6 @@
 # directories. Implement directly within the directories.
 ALLOWED_TASKS: Dict[str, Type[Pipeline]] = {
     "feature-extraction": FeatureExtractionPipeline,
-    "sentence-similarity": SentenceSimilarityPipeline,
 }
 
 
diff --git a/docker_images/sentence_transformers/app/pipelines/__init__.py b/docker_images/sentence_transformers/app/pipelines/__init__.py
index 2ba1548b..00c3cb22 100644
--- a/docker_images/sentence_transformers/app/pipelines/__init__.py
+++ b/docker_images/sentence_transformers/app/pipelines/__init__.py
@@ -1,4 +1,3 @@
 from app.pipelines.base import Pipeline, PipelineException  # isort:skip
 
 from app.pipelines.feature_extraction import FeatureExtractionPipeline
-from app.pipelines.sentence_similarity import SentenceSimilarityPipeline
diff --git a/docker_images/sentence_transformers/app/pipelines/sentence_similarity.py b/docker_images/sentence_transformers/app/pipelines/sentence_similarity.py
deleted file mode 100644
index 5829ff30..00000000
--- a/docker_images/sentence_transformers/app/pipelines/sentence_similarity.py
+++ /dev/null
@@ -1,33 +0,0 @@
-import os
-from typing import Dict, List, Union
-
-from app.pipelines import Pipeline
-from sentence_transformers import SentenceTransformer, util
-
-
-class SentenceSimilarityPipeline(Pipeline):
-    def __init__(
-        self,
-        model_id: str,
-    ):
-        self.model = SentenceTransformer(
-            model_id, use_auth_token=os.getenv("HF_API_TOKEN")
-        )
-
-    def __call__(self, inputs: Dict[str, Union[str, List[str]]]) -> List[float]:
-        """
-        Args:
-            inputs (:obj:`dict`):
-                a dictionary containing two keys, 'source_sentence' mapping
-                to the sentence that will be compared against all the others,
-                and 'sentences', mapping to a list of strings to which the
-                source will be compared.
-        Return:
-            A :obj:`list` of floats: Cosine similarity between `source_sentence` and each sentence from `sentences`.
-        """
-        embeddings1 = self.model.encode(
-            inputs["source_sentence"], convert_to_tensor=True
-        )
-        embeddings2 = self.model.encode(inputs["sentences"], convert_to_tensor=True)
-        similarities = util.pytorch_cos_sim(embeddings1, embeddings2).tolist()[0]
-        return similarities
diff --git a/docker_images/sentence_transformers/tests/test_api.py b/docker_images/sentence_transformers/tests/test_api.py
index 4b53e113..ccd72622 100644
--- a/docker_images/sentence_transformers/tests/test_api.py
+++ b/docker_images/sentence_transformers/tests/test_api.py
@@ -9,8 +9,8 @@
 # Tests do not check the actual values of the model output, so small dummy
 # models are recommended for faster tests.
 TESTABLE_MODELS: Dict[str, List[str]] = {
-    "feature-extraction": ["bert-base-uncased"],
-    "sentence-similarity": [
+    "feature-extraction": [
+        "bert-base-uncased",
         "sentence-transformers/paraphrase-distilroberta-base-v1",
         "sentence-transformers/paraphrase-xlm-r-multilingual-v1",
     ],
@@ -23,7 +23,6 @@
     "feature-extraction",
     "image-classification",
     "question-answering",
-    "sentence-similarity",
     "text-generation",
     "text-to-speech",
 }
diff --git a/docker_images/sentence_transformers/tests/test_api_sentence_similarity.py b/docker_images/sentence_transformers/tests/test_api_sentence_similarity.py
deleted file mode 100644
index 98abeef8..00000000
--- a/docker_images/sentence_transformers/tests/test_api_sentence_similarity.py
+++ /dev/null
@@ -1,94 +0,0 @@
-import json
-import os
-from unittest import TestCase, skipIf
-
-from app.main import ALLOWED_TASKS
-from parameterized import parameterized_class
-from starlette.testclient import TestClient
-from tests.test_api import TESTABLE_MODELS
-
-
-@skipIf(
-    "feature-extraction" not in ALLOWED_TASKS,
-    "feature-extraction not implemented",
-)
-@parameterized_class(
-    [{"model_id": model_id} for model_id in TESTABLE_MODELS["sentence-similarity"]]
-)
-class SentenceSimilarityTestCase(TestCase):
-    def setUp(self):
-        self.old_model_id = os.getenv("MODEL_ID")
-        self.old_task = os.getenv("TASK")
-        os.environ["MODEL_ID"] = self.model_id
-        os.environ["TASK"] = "sentence-similarity"
-        from app.main import app
-
-        self.app = app
-
-    def tearDown(self):
-        if self.old_model_id is not None:
-            os.environ["MODEL_ID"] = self.old_model_id
-        else:
-            del os.environ["MODEL_ID"]
-        if self.old_task is not None:
-            os.environ["TASK"] = self.old_task
-        else:
-            del os.environ["TASK"]
-
-    def test_simple(self):
-        source_sentence = "I am a very happy man"
-        sentences = [
-            "What is this?",
-            "I am a super happy man",
-            "I am a sad man",
-            "I am a happy dog",
-        ]
-        inputs = {"source_sentence": source_sentence, "sentences": sentences}
-
-        with TestClient(self.app) as client:
-            response = client.post("/", json={"inputs": inputs})
-
-        self.assertEqual(
-            response.status_code,
-            200,
-        )
-
-        content = json.loads(response.content)
-        self.assertEqual(type(content), list)
-        self.assertEqual({type(item) for item in content}, {float})
-
-        with TestClient(self.app) as client:
-            response = client.post("/", json=inputs)
-
-        self.assertEqual(
-            response.status_code,
-            200,
-        )
-        content = json.loads(response.content)
-        self.assertEqual(type(content), list)
-        self.assertEqual({type(item) for item in content}, {float})
-
-    def test_missing_input_sentences(self):
-        source_sentence = "I am a very happy man"
-        inputs = {"source_sentence": source_sentence}
-
-        with TestClient(self.app) as client:
-            response = client.post("/", json={"inputs": inputs})
-
-        self.assertEqual(
-            response.status_code,
-            400,
-        )
-
-    def test_malformed_input(self):
-        with TestClient(self.app) as client:
-            response = client.post("/", data=b"\xc3\x28")
-
-        self.assertEqual(
-            response.status_code,
-            400,
-        )
-        self.assertEqual(
-            response.content,
-            b'{"error":"\'utf-8\' codec can\'t decode byte 0xc3 in position 0: invalid continuation byte"}',
-        )
diff --git a/docker_images/setfit/tests/test_api.py b/docker_images/setfit/tests/test_api.py
index aa2f9082..4dbc672e 100644
--- a/docker_images/setfit/tests/test_api.py
+++ b/docker_images/setfit/tests/test_api.py
@@ -20,7 +20,6 @@
     "feature-extraction",
     "image-classification",
     "question-answering",
-    "sentence-similarity",
     "speech-segmentation",
     "tabular-classification",
     "tabular-regression",
@@ -29,7 +28,6 @@
     "token-classification",
     "conversational",
     "feature-extraction",
-    "sentence-similarity",
     "fill-mask",
     "table-question-answering",
     "summarization",
diff --git a/docker_images/sklearn/tests/test_api.py b/docker_images/sklearn/tests/test_api.py
index aae326da..e12cd587 100644
--- a/docker_images/sklearn/tests/test_api.py
+++ b/docker_images/sklearn/tests/test_api.py
@@ -343,7 +343,6 @@
     "feature-extraction",
     "image-classification",
     "question-answering",
-    "sentence-similarity",
     "tabular-classification",
     "text-generation",
     "text-to-speech",
diff --git a/docker_images/spacy/app/main.py b/docker_images/spacy/app/main.py
index ddc0e615..757eb97c 100644
--- a/docker_images/spacy/app/main.py
+++ b/docker_images/spacy/app/main.py
@@ -6,7 +6,6 @@
 from api_inference_community.routes import pipeline_route, status_ok
 from app.pipelines import (
     Pipeline,
-    SentenceSimilarityPipeline,
     TextClassificationPipeline,
     TokenClassificationPipeline,
 )
@@ -31,7 +30,6 @@
 # - translation
 # - summarization
 # - automatic-speech-recognition
-# - sentence-similarity
 # - ...
 # For instance
 # from app.pipelines import AutomaticSpeechRecognitionPipeline
@@ -41,7 +39,6 @@
 ALLOWED_TASKS: Dict[str, Type[Pipeline]] = {
     "token-classification": TokenClassificationPipeline,
     "text-classification": TextClassificationPipeline,
-    "sentence-similarity": SentenceSimilarityPipeline,
 }
 
 
diff --git a/docker_images/spacy/app/pipelines/__init__.py b/docker_images/spacy/app/pipelines/__init__.py
index 11c0d8b9..cb40d0e1 100644
--- a/docker_images/spacy/app/pipelines/__init__.py
+++ b/docker_images/spacy/app/pipelines/__init__.py
@@ -1,5 +1,4 @@
 from app.pipelines.base import Pipeline, PipelineException  # isort:skip
 
-from app.pipelines.sentence_similarity import SentenceSimilarityPipeline
 from app.pipelines.text_classification import TextClassificationPipeline
 from app.pipelines.token_classification import TokenClassificationPipeline
diff --git a/docker_images/spacy/app/pipelines/sentence_similarity.py b/docker_images/spacy/app/pipelines/sentence_similarity.py
deleted file mode 100644
index f7cafa62..00000000
--- a/docker_images/spacy/app/pipelines/sentence_similarity.py
+++ /dev/null
@@ -1,51 +0,0 @@
-import os
-import subprocess
-import sys
-from typing import Dict, List, Union
-
-from app.pipelines import Pipeline
-
-
-class SentenceSimilarityPipeline(Pipeline):
-    def __init__(
-        self,
-        model_id: str,
-    ):
-        # At the time, only public models from spaCy are allowed in the inference API.
-        full_model_path = model_id.split("/")
-        if len(full_model_path) != 2:
-            raise ValueError(
-                f"Invalid model_id: {model_id}. It should have a namespace (:namespace:/:model_name:)"
-            )
-        namespace, model_name = full_model_path
-        hf_endpoint = os.getenv("HF_ENDPOINT", "https://huggingface.co")
-        package = f"{hf_endpoint}/{namespace}/{model_name}/resolve/main/{model_name}-any-py3-none-any.whl"
-        cache_dir = os.environ["PIP_CACHE"]
-        subprocess.check_call(
-            [sys.executable, "-m", "pip", "install", "--cache-dir", cache_dir, package]
-        )
-
-        import spacy
-
-        self.model = spacy.load(model_name)
-
-    def __call__(self, inputs: Dict[str, Union[str, List[str]]]) -> List[float]:
-        """
-        Args:
-            inputs (:obj:`dict`):
-                a dictionary containing two keys, 'source_sentence' mapping
-                to the sentence that will be compared against all the others,
-                and 'sentences', mapping to a list of strings to which the
-                source will be compared.
-        Return:
-            A :obj:`list` of floats: Some similarity measure between `source_sentence` and each sentence from `sentences`.
-        """
-        source_sentence = inputs["source_sentence"]
-        source_doc = self.model(source_sentence)
-
-        similarities = []
-        for sentence in inputs["sentences"]:
-            search_doc = self.model(sentence)
-            similarities.append(source_doc.similarity(search_doc))
-
-        return similarities
diff --git a/docker_images/spacy/tests/test_api.py b/docker_images/spacy/tests/test_api.py
index 601c9d59..ee11877b 100644
--- a/docker_images/spacy/tests/test_api.py
+++ b/docker_images/spacy/tests/test_api.py
@@ -14,7 +14,6 @@
     # "text-generation": "mysample-gpt2",
     "token-classification": "spacy/en_core_web_sm",
     "text-classification": "explosion/en_textcat_goemotions",
-    "sentence-similarity": "spacy/en_core_web_sm",
 }
 
 
@@ -24,7 +23,6 @@
     "feature-extraction",
     "image-classification",
     "question-answering",
-    "sentence-similarity",
     "text-generation",
     "text-to-speech",
 }
diff --git a/docker_images/spacy/tests/test_api_sentence_similarity.py b/docker_images/spacy/tests/test_api_sentence_similarity.py
deleted file mode 100644
index 49a373d4..00000000
--- a/docker_images/spacy/tests/test_api_sentence_similarity.py
+++ /dev/null
@@ -1,96 +0,0 @@
-import json
-import os
-from unittest import TestCase, skipIf
-
-from app.main import ALLOWED_TASKS
-from starlette.testclient import TestClient
-from tests.test_api import TESTABLE_MODELS
-
-
-@skipIf(
-    "sentence-similarity" not in ALLOWED_TASKS,
-    "sentence-similarity not implemented",
-)
-class SentenceSimilarityTestCase(TestCase):
-    def setUp(self):
-        model_id = TESTABLE_MODELS["sentence-similarity"]
-        self.old_model_id = os.getenv("MODEL_ID")
-        self.old_task = os.getenv("TASK")
-        os.environ["MODEL_ID"] = model_id
-        os.environ["TASK"] = "sentence-similarity"
-        from app.main import app
-
-        self.app = app
-
-    @classmethod
-    def setUpClass(cls):
-        from app.main import get_pipeline
-
-        get_pipeline.cache_clear()
-
-    def tearDown(self):
-        if self.old_model_id is not None:
-            os.environ["MODEL_ID"] = self.old_model_id
-        else:
-            del os.environ["MODEL_ID"]
-        if self.old_task is not None:
-            os.environ["TASK"] = self.old_task
-        else:
-            del os.environ["TASK"]
-
-    def test_simple(self):
-        source_sentence = "I am a very happy man"
-        sentences = [
-            "What is this?",
-            "I am a super happy man",
-            "I am a sad man",
-            "I am a happy dog",
-        ]
-        inputs = {"source_sentence": source_sentence, "sentences": sentences}
-
-        with TestClient(self.app) as client:
-            response = client.post("/", json={"inputs": inputs})
-        self.assertEqual(
-            response.status_code,
-            200,
-        )
-
-        content = json.loads(response.content)
-        self.assertEqual(type(content), list)
-        self.assertEqual({type(item) for item in content}, {float})
-
-        with TestClient(self.app) as client:
-            response = client.post("/", json=inputs)
-
-        self.assertEqual(
-            response.status_code,
-            200,
-        )
-        content = json.loads(response.content)
-        self.assertEqual(type(content), list)
-        self.assertEqual({type(item) for item in content}, {float})
-
-    def test_missing_input_sentences(self):
-        source_sentence = "I am a very happy man"
-        inputs = {"source_sentence": source_sentence}
-
-        with TestClient(self.app) as client:
-            response = client.post("/", json={"inputs": inputs})
-
-        self.assertEqual(
-            response.status_code,
-            400,
-        )
-
-    def test_malformed_input(self):
-        with TestClient(self.app) as client:
-            response = client.post("/", data=b"\xc3\x28")
-
-        self.assertEqual(
-            response.status_code,
-            400,
-        )
-        self.assertEqual(
-            response.content,
-            b'{"error":"\'utf-8\' codec can\'t decode byte 0xc3 in position 0: invalid continuation byte"}',
-        )
diff --git a/docker_images/span_marker/tests/test_api.py b/docker_images/span_marker/tests/test_api.py
index 64531ae8..5830b861 100644
--- a/docker_images/span_marker/tests/test_api.py
+++ b/docker_images/span_marker/tests/test_api.py
@@ -21,7 +21,6 @@
     "feature-extraction",
     "image-classification",
     "question-answering",
-    "sentence-similarity",
     "speech-segmentation",
     "tabular-classification",
     "tabular-regression",
@@ -32,7 +31,6 @@
     "conversational",
     "feature-extraction",
     "question-answering",
-    "sentence-similarity",
     "fill-mask",
     "table-question-answering",
     "summarization",
diff --git a/docker_images/stanza/tests/test_api.py b/docker_images/stanza/tests/test_api.py
index e7da5ef9..e19071a2 100644
--- a/docker_images/stanza/tests/test_api.py
+++ b/docker_images/stanza/tests/test_api.py
@@ -23,7 +23,6 @@
     "feature-extraction",
     "image-classification",
     "question-answering",
-    "sentence-similarity",
     "structured-data-classification",
     "speech-segmentation",
     "text-to-speech",
diff --git a/tests/test_dockers.py b/tests/test_dockers.py
index 49602605..a48c9d47 100644
--- a/tests/test_dockers.py
+++ b/tests/test_dockers.py
@@ -126,12 +126,12 @@ def test_sentence_transformers(self):
         )
         self.framework_docker_test(
             "sentence_transformers",
-            "sentence-similarity",
+            "feature-extraction",
             "ymelka/camembert-cosmetic-similarity-cp1200",
         )
         self.framework_docker_test(
             "sentence_transformers",
-            "sentence-similarity",
+            "feature-extraction",
             "sentence-transformers/paraphrase-distilroberta-base-v1",
         )
         self.framework_invalid_test("sentence_transformers")
@@ -253,11 +253,6 @@ def test_spacy(self):
             "text-classification",
             "cverluise/xx_cat_pateexx_md",
         )
-        self.framework_docker_test(
-            "spacy",
-            "sentence-similarity",
-            "spacy/en_core_web_sm",
-        )
         self.framework_invalid_test("spacy")
 
     def test_span_marker(self):
diff --git a/tests/test_nlp.py b/tests/test_nlp.py
index 906936ee..0ca3b814 100644
--- a/tests/test_nlp.py
+++ b/tests/test_nlp.py
@@ -45,26 +45,6 @@ def test_missing_input(self):
             normalize_payload_nlp(bpayload, "question-answering")
 
 
-class SentenceSimilarityValidationTestCase(TestCase):
-    def test_valid_input(self):
-        source_sentence = "why is the sky blue?"
-        sentences = ["this is", "a list of sentences"]
-        inputs = {"source_sentence": source_sentence, "sentences": sentences}
-        bpayload = json.dumps({"inputs": inputs}).encode("utf-8")
-        normalized_inputs, processed_params = normalize_payload_nlp(
-            bpayload, "sentence-similarity"
-        )
-        self.assertEqual(processed_params, {})
-        self.assertEqual(inputs, normalized_inputs)
-
-    def test_missing_input(self):
-        source_sentence = "why is the sky blue?"
-        inputs = {"source_sentence": source_sentence}
-        bpayload = json.dumps({"inputs": inputs}).encode("utf-8")
-        with self.assertRaises(ValidationError):
-            normalize_payload_nlp(bpayload, "sentence-similarity")
-
-
 class ConversationalValidationTestCase(TestCase):
     def test_valid_inputs(self):
         past_user_inputs = ["Which movie is the best ?"]