From d83076c5ec0dee1986cf9338d00d261604abb0d0 Mon Sep 17 00:00:00 2001 From: osanseviero Date: Wed, 10 Jul 2024 16:25:59 +0200 Subject: [PATCH] Remove sentence similarity --- api_inference_community/validation.py | 7 -- .../adapter_transformers/tests/test_api.py | 1 - docker_images/bertopic/tests/test_api.py | 2 - .../common/app/pipelines/__init__.py | 1 - .../app/pipelines/sentence_similarity.py | 33 ------- docker_images/common/tests/test_api.py | 2 - .../tests/test_api_sentence_similarity.py | 97 ------------------- docker_images/diffusers/tests/test_api.py | 2 - docker_images/fasttext/tests/test_api.py | 1 - docker_images/nemo/tests/test_api.py | 2 - docker_images/open_clip/tests/test_api.py | 2 - docker_images/paddlenlp/tests/test_api.py | 2 - docker_images/peft/tests/test_api.py | 2 - .../pyannote_audio/tests/test_api.py | 2 - .../sentence_transformers/app/main.py | 7 +- .../app/pipelines/__init__.py | 1 - .../app/pipelines/sentence_similarity.py | 33 ------- .../sentence_transformers/tests/test_api.py | 5 +- .../tests/test_api_sentence_similarity.py | 94 ------------------ docker_images/setfit/tests/test_api.py | 2 - docker_images/sklearn/tests/test_api.py | 1 - docker_images/spacy/app/main.py | 3 - docker_images/spacy/app/pipelines/__init__.py | 1 - .../app/pipelines/sentence_similarity.py | 51 ---------- docker_images/spacy/tests/test_api.py | 2 - .../tests/test_api_sentence_similarity.py | 96 ------------------ docker_images/span_marker/tests/test_api.py | 2 - docker_images/stanza/tests/test_api.py | 1 - tests/test_dockers.py | 9 +- tests/test_nlp.py | 20 ---- 30 files changed, 5 insertions(+), 479 deletions(-) delete mode 100644 docker_images/common/app/pipelines/sentence_similarity.py delete mode 100644 docker_images/common/tests/test_api_sentence_similarity.py delete mode 100644 docker_images/sentence_transformers/app/pipelines/sentence_similarity.py delete mode 100644 docker_images/sentence_transformers/tests/test_api_sentence_similarity.py delete mode 100644 docker_images/spacy/app/pipelines/sentence_similarity.py delete mode 100644 docker_images/spacy/tests/test_api_sentence_similarity.py diff --git a/api_inference_community/validation.py b/api_inference_community/validation.py index 1f83844e..6217a6f1 100644 --- a/api_inference_community/validation.py +++ b/api_inference_community/validation.py @@ -78,11 +78,6 @@ class QuestionInputsCheck(BaseModel): context: str -class SentenceSimilarityInputsCheck(BaseModel): - source_sentence: str - sentences: List[str] - - class TableQuestionAnsweringInputsCheck(BaseModel): table: Dict[str, List[str]] query: str @@ -139,7 +134,6 @@ class StringInput(RootModel): "conversational": ConversationalInputsCheck, "question-answering": QuestionInputsCheck, "feature-extraction": StringOrStringBatchInputCheck, - "sentence-similarity": SentenceSimilarityInputsCheck, "table-question-answering": TableQuestionAnsweringInputsCheck, "tabular-classification": TabularDataInputsCheck, "tabular-regression": TabularDataInputsCheck, @@ -203,7 +197,6 @@ def check_inputs(inputs, tag): "conversational", "feature-extraction", "question-answering", - "sentence-similarity", "fill-mask", "table-question-answering", "tabular-classification", diff --git a/docker_images/adapter_transformers/tests/test_api.py b/docker_images/adapter_transformers/tests/test_api.py index 3060db10..80e9458d 100644 --- a/docker_images/adapter_transformers/tests/test_api.py +++ b/docker_images/adapter_transformers/tests/test_api.py @@ -22,7 +22,6 @@ "feature-extraction", "image-classification", "question-answering", - "sentence-similarity", "structured-data-classification", "text-generation", "text-to-speech", diff --git a/docker_images/bertopic/tests/test_api.py b/docker_images/bertopic/tests/test_api.py index a756d8bf..3d5ed62e 100644 --- a/docker_images/bertopic/tests/test_api.py +++ b/docker_images/bertopic/tests/test_api.py @@ -20,7 +20,6 @@ "feature-extraction", "image-classification", "question-answering", - "sentence-similarity", "speech-segmentation", "tabular-classification", "tabular-regression", @@ -29,7 +28,6 @@ "token-classification", "conversational", "feature-extraction", - "sentence-similarity", "fill-mask", "table-question-answering", "summarization", diff --git a/docker_images/common/app/pipelines/__init__.py b/docker_images/common/app/pipelines/__init__.py index b45e41d6..8dbcacca 100644 --- a/docker_images/common/app/pipelines/__init__.py +++ b/docker_images/common/app/pipelines/__init__.py @@ -8,7 +8,6 @@ from app.pipelines.feature_extraction import FeatureExtractionPipeline from app.pipelines.image_classification import ImageClassificationPipeline from app.pipelines.question_answering import QuestionAnsweringPipeline -from app.pipelines.sentence_similarity import SentenceSimilarityPipeline from app.pipelines.speech_segmentation import SpeechSegmentationPipeline from app.pipelines.tabular_classification_pipeline import TabularClassificationPipeline from app.pipelines.tabular_regression_pipeline import TabularRegressionPipeline diff --git a/docker_images/common/app/pipelines/sentence_similarity.py b/docker_images/common/app/pipelines/sentence_similarity.py deleted file mode 100644 index 95c08a70..00000000 --- a/docker_images/common/app/pipelines/sentence_similarity.py +++ /dev/null @@ -1,33 +0,0 @@ -from typing import Dict, List, Union - -from app.pipelines import Pipeline - - -class SentenceSimilarityPipeline(Pipeline): - def __init__( - self, - model_id: str, - ): - # IMPLEMENT_THIS - # Preload all the elements you are going to need at inference. - # For instance your model, processors, tokenizer that might be needed. - # This function is only called once, so do all the heavy processing I/O here - raise NotImplementedError( - "Please implement SentenceSimilarityPipeline __init__ function" - ) - - def __call__(self, inputs: Dict[str, Union[str, List[str]]]) -> List[float]: - """ - Args: - inputs (:obj:`dict`): - a dictionary containing two keys, 'source_sentence' mapping - to the sentence that will be compared against all the others, - and 'sentences', mapping to a list of strings to which the - source will be compared. - Return: - A :obj:`list` of floats: Some similarity measure between `source_sentence` and each sentence from `sentences`. - """ - # IMPLEMENT_THIS - raise NotImplementedError( - "Please implement SentenceSimilarityPipeline __call__ function" - ) diff --git a/docker_images/common/tests/test_api.py b/docker_images/common/tests/test_api.py index a7e085da..a3a9b851 100644 --- a/docker_images/common/tests/test_api.py +++ b/docker_images/common/tests/test_api.py @@ -22,7 +22,6 @@ "feature-extraction", "image-classification", "question-answering", - "sentence-similarity", "speech-segmentation", "tabular-classification", "tabular-regression", @@ -31,7 +30,6 @@ "token-classification", "conversational", "feature-extraction", - "sentence-similarity", "fill-mask", "table-question-answering", "summarization", diff --git a/docker_images/common/tests/test_api_sentence_similarity.py b/docker_images/common/tests/test_api_sentence_similarity.py deleted file mode 100644 index 53c9b904..00000000 --- a/docker_images/common/tests/test_api_sentence_similarity.py +++ /dev/null @@ -1,97 +0,0 @@ -import json -import os -from unittest import TestCase, skipIf - -from app.main import ALLOWED_TASKS -from starlette.testclient import TestClient -from tests.test_api import TESTABLE_MODELS - - -@skipIf( - "sentence-similarity" not in ALLOWED_TASKS, - "sentence-similarity not implemented", -) -class SentenceSimilarityTestCase(TestCase): - def setUp(self): - model_id = TESTABLE_MODELS["sentence-similarity"] - self.old_model_id = os.getenv("MODEL_ID") - self.old_task = os.getenv("TASK") - os.environ["MODEL_ID"] = model_id - os.environ["TASK"] = "sentence-similarity" - from app.main import app - - self.app = app - - @classmethod - def setUpClass(cls): - from app.main import get_pipeline - - get_pipeline.cache_clear() - - def tearDown(self): - if self.old_model_id is not None: - os.environ["MODEL_ID"] = self.old_model_id - else: - del os.environ["MODEL_ID"] - if self.old_task is not None: - os.environ["TASK"] = self.old_task - else: - del os.environ["TASK"] - - def test_simple(self): - source_sentence = "I am a very happy man" - sentences = [ - "What is this?", - "I am a super happy man", - "I am a sad man", - "I am a happy dog", - ] - inputs = {"source_sentence": source_sentence, "sentences": sentences} - - with TestClient(self.app) as client: - response = client.post("/", json={"inputs": inputs}) - - self.assertEqual( - response.status_code, - 200, - ) - - content = json.loads(response.content) - self.assertEqual(type(content), list) - self.assertEqual({type(item) for item in content}, {float}) - - with TestClient(self.app) as client: - response = client.post("/", json=inputs) - - self.assertEqual( - response.status_code, - 200, - ) - content = json.loads(response.content) - self.assertEqual(type(content), list) - self.assertEqual({type(item) for item in content}, {float}) - - def test_missing_input_sentences(self): - source_sentence = "I am a very happy man" - inputs = {"source_sentence": source_sentence} - - with TestClient(self.app) as client: - response = client.post("/", json={"inputs": inputs}) - - self.assertEqual( - response.status_code, - 400, - ) - - def test_malformed_input(self): - with TestClient(self.app) as client: - response = client.post("/", data=b"\xc3\x28") - - self.assertEqual( - response.status_code, - 400, - ) - self.assertEqual( - response.content, - b'{"error":"\'utf-8\' codec can\'t decode byte 0xc3 in position 0: invalid continuation byte"}', - ) diff --git a/docker_images/diffusers/tests/test_api.py b/docker_images/diffusers/tests/test_api.py index 019486ef..067ddab7 100644 --- a/docker_images/diffusers/tests/test_api.py +++ b/docker_images/diffusers/tests/test_api.py @@ -24,7 +24,6 @@ "feature-extraction", "image-classification", "question-answering", - "sentence-similarity", "speech-segmentation", "tabular-classification", "tabular-regression", @@ -35,7 +34,6 @@ "conversational", "feature-extraction", "question-answering", - "sentence-similarity", "fill-mask", "table-question-answering", "summarization", diff --git a/docker_images/fasttext/tests/test_api.py b/docker_images/fasttext/tests/test_api.py index 5b16588f..171d82aa 100644 --- a/docker_images/fasttext/tests/test_api.py +++ b/docker_images/fasttext/tests/test_api.py @@ -25,7 +25,6 @@ "image-classification", "language-identification", "question-answering", - "sentence-similarity", "speech-segmentation", "structured-data-classification", "text-to-speech", diff --git a/docker_images/nemo/tests/test_api.py b/docker_images/nemo/tests/test_api.py index 0c3f1f9c..5bcec334 100644 --- a/docker_images/nemo/tests/test_api.py +++ b/docker_images/nemo/tests/test_api.py @@ -21,7 +21,6 @@ "feature-extraction", "image-classification", "question-answering", - "sentence-similarity", "speech-segmentation", "structured-data-classification", "text-classification", @@ -31,7 +30,6 @@ "conversational", "feature-extraction", "question-answering", - "sentence-similarity", "fill-mask", "table-question-answering", "summarization", diff --git a/docker_images/open_clip/tests/test_api.py b/docker_images/open_clip/tests/test_api.py index 38fa0f1d..7077a4bb 100644 --- a/docker_images/open_clip/tests/test_api.py +++ b/docker_images/open_clip/tests/test_api.py @@ -24,7 +24,6 @@ "feature-extraction", "image-classification", "question-answering", - "sentence-similarity", "speech-segmentation", "tabular-classification", "tabular-regression", @@ -35,7 +34,6 @@ "conversational", "feature-extraction", "question-answering", - "sentence-similarity", "fill-mask", "table-question-answering", "summarization", diff --git a/docker_images/paddlenlp/tests/test_api.py b/docker_images/paddlenlp/tests/test_api.py index e86d5eaa..1ae165e7 100644 --- a/docker_images/paddlenlp/tests/test_api.py +++ b/docker_images/paddlenlp/tests/test_api.py @@ -23,7 +23,6 @@ "feature-extraction", "image-classification", "question-answering", - "sentence-similarity", "speech-segmentation", "tabular-classification", "tabular-regression", @@ -34,7 +33,6 @@ "conversational", "feature-extraction", "question-answering", - "sentence-similarity", "fill-mask", "table-question-answering", "summarization", diff --git a/docker_images/peft/tests/test_api.py b/docker_images/peft/tests/test_api.py index bca74322..960e1192 100644 --- a/docker_images/peft/tests/test_api.py +++ b/docker_images/peft/tests/test_api.py @@ -20,7 +20,6 @@ "feature-extraction", "image-classification", "question-answering", - "sentence-similarity", "speech-segmentation", "tabular-classification", "tabular-regression", @@ -29,7 +28,6 @@ "token-classification", "conversational", "feature-extraction", - "sentence-similarity", "fill-mask", "table-question-answering", "summarization", diff --git a/docker_images/pyannote_audio/tests/test_api.py b/docker_images/pyannote_audio/tests/test_api.py index 7a4a3da2..5c740e7c 100644 --- a/docker_images/pyannote_audio/tests/test_api.py +++ b/docker_images/pyannote_audio/tests/test_api.py @@ -20,7 +20,6 @@ "feature-extraction", "image-classification", "question-answering", - "sentence-similarity", "speech-segmentation", "structured-data-classification", "text-classification", @@ -30,7 +29,6 @@ "conversational", "feature-extraction", "question-answering", - "sentence-similarity", "fill-mask", "table-question-answering", "summarization", diff --git a/docker_images/sentence_transformers/app/main.py b/docker_images/sentence_transformers/app/main.py index 8c301bfc..780450aa 100644 --- a/docker_images/sentence_transformers/app/main.py +++ b/docker_images/sentence_transformers/app/main.py @@ -4,11 +4,7 @@ from typing import Dict, Type from api_inference_community.routes import pipeline_route, status_ok -from app.pipelines import ( - FeatureExtractionPipeline, - Pipeline, - SentenceSimilarityPipeline, -) +from app.pipelines import FeatureExtractionPipeline, Pipeline from starlette.applications import Starlette from starlette.middleware import Middleware from starlette.middleware.gzip import GZipMiddleware @@ -38,7 +34,6 @@ # directories. Implement directly within the directories. ALLOWED_TASKS: Dict[str, Type[Pipeline]] = { "feature-extraction": FeatureExtractionPipeline, - "sentence-similarity": SentenceSimilarityPipeline, } diff --git a/docker_images/sentence_transformers/app/pipelines/__init__.py b/docker_images/sentence_transformers/app/pipelines/__init__.py index 2ba1548b..00c3cb22 100644 --- a/docker_images/sentence_transformers/app/pipelines/__init__.py +++ b/docker_images/sentence_transformers/app/pipelines/__init__.py @@ -1,4 +1,3 @@ from app.pipelines.base import Pipeline, PipelineException # isort:skip from app.pipelines.feature_extraction import FeatureExtractionPipeline -from app.pipelines.sentence_similarity import SentenceSimilarityPipeline diff --git a/docker_images/sentence_transformers/app/pipelines/sentence_similarity.py b/docker_images/sentence_transformers/app/pipelines/sentence_similarity.py deleted file mode 100644 index 5829ff30..00000000 --- a/docker_images/sentence_transformers/app/pipelines/sentence_similarity.py +++ /dev/null @@ -1,33 +0,0 @@ -import os -from typing import Dict, List, Union - -from app.pipelines import Pipeline -from sentence_transformers import SentenceTransformer, util - - -class SentenceSimilarityPipeline(Pipeline): - def __init__( - self, - model_id: str, - ): - self.model = SentenceTransformer( - model_id, use_auth_token=os.getenv("HF_API_TOKEN") - ) - - def __call__(self, inputs: Dict[str, Union[str, List[str]]]) -> List[float]: - """ - Args: - inputs (:obj:`dict`): - a dictionary containing two keys, 'source_sentence' mapping - to the sentence that will be compared against all the others, - and 'sentences', mapping to a list of strings to which the - source will be compared. - Return: - A :obj:`list` of floats: Cosine similarity between `source_sentence` and each sentence from `sentences`. - """ - embeddings1 = self.model.encode( - inputs["source_sentence"], convert_to_tensor=True - ) - embeddings2 = self.model.encode(inputs["sentences"], convert_to_tensor=True) - similarities = util.pytorch_cos_sim(embeddings1, embeddings2).tolist()[0] - return similarities diff --git a/docker_images/sentence_transformers/tests/test_api.py b/docker_images/sentence_transformers/tests/test_api.py index 4b53e113..ccd72622 100644 --- a/docker_images/sentence_transformers/tests/test_api.py +++ b/docker_images/sentence_transformers/tests/test_api.py @@ -9,8 +9,8 @@ # Tests do not check the actual values of the model output, so small dummy # models are recommended for faster tests. TESTABLE_MODELS: Dict[str, List[str]] = { - "feature-extraction": ["bert-base-uncased"], - "sentence-similarity": [ + "feature-extraction": [ + "bert-base-uncased", "sentence-transformers/paraphrase-distilroberta-base-v1", "sentence-transformers/paraphrase-xlm-r-multilingual-v1", ], @@ -23,7 +23,6 @@ "feature-extraction", "image-classification", "question-answering", - "sentence-similarity", "text-generation", "text-to-speech", } diff --git a/docker_images/sentence_transformers/tests/test_api_sentence_similarity.py b/docker_images/sentence_transformers/tests/test_api_sentence_similarity.py deleted file mode 100644 index 98abeef8..00000000 --- a/docker_images/sentence_transformers/tests/test_api_sentence_similarity.py +++ /dev/null @@ -1,94 +0,0 @@ -import json -import os -from unittest import TestCase, skipIf - -from app.main import ALLOWED_TASKS -from parameterized import parameterized_class -from starlette.testclient import TestClient -from tests.test_api import TESTABLE_MODELS - - -@skipIf( - "feature-extraction" not in ALLOWED_TASKS, - "feature-extraction not implemented", -) -@parameterized_class( - [{"model_id": model_id} for model_id in TESTABLE_MODELS["sentence-similarity"]] -) -class SentenceSimilarityTestCase(TestCase): - def setUp(self): - self.old_model_id = os.getenv("MODEL_ID") - self.old_task = os.getenv("TASK") - os.environ["MODEL_ID"] = self.model_id - os.environ["TASK"] = "sentence-similarity" - from app.main import app - - self.app = app - - def tearDown(self): - if self.old_model_id is not None: - os.environ["MODEL_ID"] = self.old_model_id - else: - del os.environ["MODEL_ID"] - if self.old_task is not None: - os.environ["TASK"] = self.old_task - else: - del os.environ["TASK"] - - def test_simple(self): - source_sentence = "I am a very happy man" - sentences = [ - "What is this?", - "I am a super happy man", - "I am a sad man", - "I am a happy dog", - ] - inputs = {"source_sentence": source_sentence, "sentences": sentences} - - with TestClient(self.app) as client: - response = client.post("/", json={"inputs": inputs}) - - self.assertEqual( - response.status_code, - 200, - ) - - content = json.loads(response.content) - self.assertEqual(type(content), list) - self.assertEqual({type(item) for item in content}, {float}) - - with TestClient(self.app) as client: - response = client.post("/", json=inputs) - - self.assertEqual( - response.status_code, - 200, - ) - content = json.loads(response.content) - self.assertEqual(type(content), list) - self.assertEqual({type(item) for item in content}, {float}) - - def test_missing_input_sentences(self): - source_sentence = "I am a very happy man" - inputs = {"source_sentence": source_sentence} - - with TestClient(self.app) as client: - response = client.post("/", json={"inputs": inputs}) - - self.assertEqual( - response.status_code, - 400, - ) - - def test_malformed_input(self): - with TestClient(self.app) as client: - response = client.post("/", data=b"\xc3\x28") - - self.assertEqual( - response.status_code, - 400, - ) - self.assertEqual( - response.content, - b'{"error":"\'utf-8\' codec can\'t decode byte 0xc3 in position 0: invalid continuation byte"}', - ) diff --git a/docker_images/setfit/tests/test_api.py b/docker_images/setfit/tests/test_api.py index aa2f9082..4dbc672e 100644 --- a/docker_images/setfit/tests/test_api.py +++ b/docker_images/setfit/tests/test_api.py @@ -20,7 +20,6 @@ "feature-extraction", "image-classification", "question-answering", - "sentence-similarity", "speech-segmentation", "tabular-classification", "tabular-regression", @@ -29,7 +28,6 @@ "token-classification", "conversational", "feature-extraction", - "sentence-similarity", "fill-mask", "table-question-answering", "summarization", diff --git a/docker_images/sklearn/tests/test_api.py b/docker_images/sklearn/tests/test_api.py index aae326da..e12cd587 100644 --- a/docker_images/sklearn/tests/test_api.py +++ b/docker_images/sklearn/tests/test_api.py @@ -343,7 +343,6 @@ "feature-extraction", "image-classification", "question-answering", - "sentence-similarity", "tabular-classification", "text-generation", "text-to-speech", diff --git a/docker_images/spacy/app/main.py b/docker_images/spacy/app/main.py index ddc0e615..757eb97c 100644 --- a/docker_images/spacy/app/main.py +++ b/docker_images/spacy/app/main.py @@ -6,7 +6,6 @@ from api_inference_community.routes import pipeline_route, status_ok from app.pipelines import ( Pipeline, - SentenceSimilarityPipeline, TextClassificationPipeline, TokenClassificationPipeline, ) @@ -31,7 +30,6 @@ # - translation # - summarization # - automatic-speech-recognition -# - sentence-similarity # - ... # For instance # from app.pipelines import AutomaticSpeechRecognitionPipeline @@ -41,7 +39,6 @@ ALLOWED_TASKS: Dict[str, Type[Pipeline]] = { "token-classification": TokenClassificationPipeline, "text-classification": TextClassificationPipeline, - "sentence-similarity": SentenceSimilarityPipeline, } diff --git a/docker_images/spacy/app/pipelines/__init__.py b/docker_images/spacy/app/pipelines/__init__.py index 11c0d8b9..cb40d0e1 100644 --- a/docker_images/spacy/app/pipelines/__init__.py +++ b/docker_images/spacy/app/pipelines/__init__.py @@ -1,5 +1,4 @@ from app.pipelines.base import Pipeline, PipelineException # isort:skip -from app.pipelines.sentence_similarity import SentenceSimilarityPipeline from app.pipelines.text_classification import TextClassificationPipeline from app.pipelines.token_classification import TokenClassificationPipeline diff --git a/docker_images/spacy/app/pipelines/sentence_similarity.py b/docker_images/spacy/app/pipelines/sentence_similarity.py deleted file mode 100644 index f7cafa62..00000000 --- a/docker_images/spacy/app/pipelines/sentence_similarity.py +++ /dev/null @@ -1,51 +0,0 @@ -import os -import subprocess -import sys -from typing import Dict, List, Union - -from app.pipelines import Pipeline - - -class SentenceSimilarityPipeline(Pipeline): - def __init__( - self, - model_id: str, - ): - # At the time, only public models from spaCy are allowed in the inference API. - full_model_path = model_id.split("/") - if len(full_model_path) != 2: - raise ValueError( - f"Invalid model_id: {model_id}. It should have a namespace (:namespace:/:model_name:)" - ) - namespace, model_name = full_model_path - hf_endpoint = os.getenv("HF_ENDPOINT", "https://huggingface.co") - package = f"{hf_endpoint}/{namespace}/{model_name}/resolve/main/{model_name}-any-py3-none-any.whl" - cache_dir = os.environ["PIP_CACHE"] - subprocess.check_call( - [sys.executable, "-m", "pip", "install", "--cache-dir", cache_dir, package] - ) - - import spacy - - self.model = spacy.load(model_name) - - def __call__(self, inputs: Dict[str, Union[str, List[str]]]) -> List[float]: - """ - Args: - inputs (:obj:`dict`): - a dictionary containing two keys, 'source_sentence' mapping - to the sentence that will be compared against all the others, - and 'sentences', mapping to a list of strings to which the - source will be compared. - Return: - A :obj:`list` of floats: Some similarity measure between `source_sentence` and each sentence from `sentences`. - """ - source_sentence = inputs["source_sentence"] - source_doc = self.model(source_sentence) - - similarities = [] - for sentence in inputs["sentences"]: - search_doc = self.model(sentence) - similarities.append(source_doc.similarity(search_doc)) - - return similarities diff --git a/docker_images/spacy/tests/test_api.py b/docker_images/spacy/tests/test_api.py index 601c9d59..ee11877b 100644 --- a/docker_images/spacy/tests/test_api.py +++ b/docker_images/spacy/tests/test_api.py @@ -14,7 +14,6 @@ # "text-generation": "mysample-gpt2", "token-classification": "spacy/en_core_web_sm", "text-classification": "explosion/en_textcat_goemotions", - "sentence-similarity": "spacy/en_core_web_sm", } @@ -24,7 +23,6 @@ "feature-extraction", "image-classification", "question-answering", - "sentence-similarity", "text-generation", "text-to-speech", } diff --git a/docker_images/spacy/tests/test_api_sentence_similarity.py b/docker_images/spacy/tests/test_api_sentence_similarity.py deleted file mode 100644 index 49a373d4..00000000 --- a/docker_images/spacy/tests/test_api_sentence_similarity.py +++ /dev/null @@ -1,96 +0,0 @@ -import json -import os -from unittest import TestCase, skipIf - -from app.main import ALLOWED_TASKS -from starlette.testclient import TestClient -from tests.test_api import TESTABLE_MODELS - - -@skipIf( - "sentence-similarity" not in ALLOWED_TASKS, - "sentence-similarity not implemented", -) -class SentenceSimilarityTestCase(TestCase): - def setUp(self): - model_id = TESTABLE_MODELS["sentence-similarity"] - self.old_model_id = os.getenv("MODEL_ID") - self.old_task = os.getenv("TASK") - os.environ["MODEL_ID"] = model_id - os.environ["TASK"] = "sentence-similarity" - from app.main import app - - self.app = app - - @classmethod - def setUpClass(cls): - from app.main import get_pipeline - - get_pipeline.cache_clear() - - def tearDown(self): - if self.old_model_id is not None: - os.environ["MODEL_ID"] = self.old_model_id - else: - del os.environ["MODEL_ID"] - if self.old_task is not None: - os.environ["TASK"] = self.old_task - else: - del os.environ["TASK"] - - def test_simple(self): - source_sentence = "I am a very happy man" - sentences = [ - "What is this?", - "I am a super happy man", - "I am a sad man", - "I am a happy dog", - ] - inputs = {"source_sentence": source_sentence, "sentences": sentences} - - with TestClient(self.app) as client: - response = client.post("/", json={"inputs": inputs}) - self.assertEqual( - response.status_code, - 200, - ) - - content = json.loads(response.content) - self.assertEqual(type(content), list) - self.assertEqual({type(item) for item in content}, {float}) - - with TestClient(self.app) as client: - response = client.post("/", json=inputs) - - self.assertEqual( - response.status_code, - 200, - ) - content = json.loads(response.content) - self.assertEqual(type(content), list) - self.assertEqual({type(item) for item in content}, {float}) - - def test_missing_input_sentences(self): - source_sentence = "I am a very happy man" - inputs = {"source_sentence": source_sentence} - - with TestClient(self.app) as client: - response = client.post("/", json={"inputs": inputs}) - - self.assertEqual( - response.status_code, - 400, - ) - - def test_malformed_input(self): - with TestClient(self.app) as client: - response = client.post("/", data=b"\xc3\x28") - - self.assertEqual( - response.status_code, - 400, - ) - self.assertEqual( - response.content, - b'{"error":"\'utf-8\' codec can\'t decode byte 0xc3 in position 0: invalid continuation byte"}', - ) diff --git a/docker_images/span_marker/tests/test_api.py b/docker_images/span_marker/tests/test_api.py index 64531ae8..5830b861 100644 --- a/docker_images/span_marker/tests/test_api.py +++ b/docker_images/span_marker/tests/test_api.py @@ -21,7 +21,6 @@ "feature-extraction", "image-classification", "question-answering", - "sentence-similarity", "speech-segmentation", "tabular-classification", "tabular-regression", @@ -32,7 +31,6 @@ "conversational", "feature-extraction", "question-answering", - "sentence-similarity", "fill-mask", "table-question-answering", "summarization", diff --git a/docker_images/stanza/tests/test_api.py b/docker_images/stanza/tests/test_api.py index e7da5ef9..e19071a2 100644 --- a/docker_images/stanza/tests/test_api.py +++ b/docker_images/stanza/tests/test_api.py @@ -23,7 +23,6 @@ "feature-extraction", "image-classification", "question-answering", - "sentence-similarity", "structured-data-classification", "speech-segmentation", "text-to-speech", diff --git a/tests/test_dockers.py b/tests/test_dockers.py index 49602605..a48c9d47 100644 --- a/tests/test_dockers.py +++ b/tests/test_dockers.py @@ -126,12 +126,12 @@ def test_sentence_transformers(self): ) self.framework_docker_test( "sentence_transformers", - "sentence-similarity", + "feature-extraction", "ymelka/camembert-cosmetic-similarity-cp1200", ) self.framework_docker_test( "sentence_transformers", - "sentence-similarity", + "feature-extraction", "sentence-transformers/paraphrase-distilroberta-base-v1", ) self.framework_invalid_test("sentence_transformers") @@ -253,11 +253,6 @@ def test_spacy(self): "text-classification", "cverluise/xx_cat_pateexx_md", ) - self.framework_docker_test( - "spacy", - "sentence-similarity", - "spacy/en_core_web_sm", - ) self.framework_invalid_test("spacy") def test_span_marker(self): diff --git a/tests/test_nlp.py b/tests/test_nlp.py index 906936ee..0ca3b814 100644 --- a/tests/test_nlp.py +++ b/tests/test_nlp.py @@ -45,26 +45,6 @@ def test_missing_input(self): normalize_payload_nlp(bpayload, "question-answering") -class SentenceSimilarityValidationTestCase(TestCase): - def test_valid_input(self): - source_sentence = "why is the sky blue?" - sentences = ["this is", "a list of sentences"] - inputs = {"source_sentence": source_sentence, "sentences": sentences} - bpayload = json.dumps({"inputs": inputs}).encode("utf-8") - normalized_inputs, processed_params = normalize_payload_nlp( - bpayload, "sentence-similarity" - ) - self.assertEqual(processed_params, {}) - self.assertEqual(inputs, normalized_inputs) - - def test_missing_input(self): - source_sentence = "why is the sky blue?" - inputs = {"source_sentence": source_sentence} - bpayload = json.dumps({"inputs": inputs}).encode("utf-8") - with self.assertRaises(ValidationError): - normalize_payload_nlp(bpayload, "sentence-similarity") - - class ConversationalValidationTestCase(TestCase): def test_valid_inputs(self): past_user_inputs = ["Which movie is the best ?"]