From 6bf378e0bacaa7176dc5fbd5ab3468b9fe01f614 Mon Sep 17 00:00:00 2001 From: Alvaro Moran Date: Mon, 25 Nov 2024 16:48:14 +0000 Subject: [PATCH] chore(dependencies): update transformers to v4.46.3 --- pyproject.toml | 2 +- text-generation-inference/docker/Dockerfile | 2 +- text-generation-inference/server/pyproject.toml | 2 +- .../server/text_generation_server/generator.py | 3 +++ .../text_generation_server/jetstream_pt_support/generator.py | 4 ++++ 5 files changed, 10 insertions(+), 3 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index 22e7e3f0..449c909b 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -42,7 +42,7 @@ keywords = [ ] dependencies = [ - "transformers == 4.41.1", + "transformers == 4.46.3", "torch == 2.5.1", "torch-xla[tpu] == 2.5.1", 'typer == 0.6.1', diff --git a/text-generation-inference/docker/Dockerfile b/text-generation-inference/docker/Dockerfile index 2775cf7d..3caf72a5 100644 --- a/text-generation-inference/docker/Dockerfile +++ b/text-generation-inference/docker/Dockerfile @@ -101,7 +101,7 @@ RUN apt-get update -y \ RUN pip install --upgrade pip # Install HuggingFace packages -ARG TRANSFORMERS_VERSION='4.41.1' +ARG TRANSFORMERS_VERSION='4.46.3' ARG ACCELERATE_VERSION='1.1.1' ARG SAFETENSORS_VERSION='0.4.2' diff --git a/text-generation-inference/server/pyproject.toml b/text-generation-inference/server/pyproject.toml index a10727b8..db37423b 100644 --- a/text-generation-inference/server/pyproject.toml +++ b/text-generation-inference/server/pyproject.toml @@ -15,7 +15,7 @@ dependencies = [ 'grpc-interceptor == 0.15.2', 'typer == 0.6.1', 'safetensors == 0.4.2', - 'transformers == 4.41.1', + 'transformers == 4.46.3', 'loguru == 0.6.0', "sentencepiece == 0.2.0", "numpy<2.0", diff --git a/text-generation-inference/server/text_generation_server/generator.py b/text-generation-inference/server/text_generation_server/generator.py index 6bfbb661..c657d067 100644 --- a/text-generation-inference/server/text_generation_server/generator.py +++ b/text-generation-inference/server/text_generation_server/generator.py @@ -314,6 +314,9 @@ def __init__( tokenizer.truncation_side = "left" self.tokenizer = tokenizer self.special_tokens = self.tokenizer.all_special_ids + # The token selector will use the model's generation mixin internal variables to select the next token, and it + # expects special tokens to be initialized in the model. + model._prepare_special_tokens(generation_config=model.generation_config, device=model.device) # Slots are empty to begin with, they will be populated as new batches arrive self.slots = [] self.batch_id = 0 diff --git a/text-generation-inference/server/text_generation_server/jetstream_pt_support/generator.py b/text-generation-inference/server/text_generation_server/jetstream_pt_support/generator.py index 97061421..45f0a549 100644 --- a/text-generation-inference/server/text_generation_server/jetstream_pt_support/generator.py +++ b/text-generation-inference/server/text_generation_server/jetstream_pt_support/generator.py @@ -262,6 +262,10 @@ def __init__( tokenizer.truncation_side = "left" self.tokenizer = tokenizer self.special_tokens = self.tokenizer.all_special_ids + # The token selector will use the model's generation mixin internal variables to select the next token, and it + # expects special tokens to be initialized in the model. + model = self.engine.pt_model + model._prepare_special_tokens(generation_config=model.generation_config, device='cpu') # Slots number is static, it cannot grow over the size of the batch self.slots = [Slot(i, tokenizer) for i in range(self.model.config.batch_size)] self.batch_id = 0