From 17238bfced79748c92422ebbbb9a7d6d4451c16f Mon Sep 17 00:00:00 2001 From: Alvaro Moran <6949769+tengomucho@users.noreply.github.com> Date: Wed, 3 Apr 2024 16:16:49 +0200 Subject: [PATCH] Repo layout (#10) * chore: add basic optimum package layout files * feat: generic optimum features moved outsize TGI server * chore: fix style with ruff and black * feat(CI): add code quality workflow * Remove invalid markers * Attempt to match pep440 * Fix initial top level non-compliant versioning * Update to latest version for tgi build-deps * Remove gawk to awk + sed * Complete the ppyproject.toml files * Use only pyproject.toml * Revert mypy-protobuf to 3.2.0 * Update version for tgi * Use test_installs to setup the dependencies * Move tpu extras to the mandatory deps * Remove commented code --------- Co-authored-by: Morgan Funtowicz --- .github/workflows/check_code_quality.yml | 55 ++++++ MANIFEST.in | 17 ++ Makefile | 52 ++++- optimum/tpu/__init__.py | 15 ++ optimum/tpu/generation/__init__.py | 17 ++ .../tpu/generation}/logits_process.py | 0 .../tpu/generation}/token_selector.py | 0 .../tpu}/model.py | 0 .../tpu}/modeling.py | 5 +- optimum/tpu/version.py | 18 ++ pyproject.toml | 49 ++++- setup.cfg | 17 ++ text-generation-inference/LICENSE | 181 ------------------ .../server/build-requirements.txt | 2 +- .../server/pyproject.toml | 13 +- .../text_generation_server/generator.py | 12 +- .../server/text_generation_server/version.py | 4 + text-generation-inference/tests/test_gemma.py | 2 +- text-generation-inference/tests/test_gpt2.py | 2 +- 19 files changed, 255 insertions(+), 206 deletions(-) create mode 100644 .github/workflows/check_code_quality.yml create mode 100644 MANIFEST.in create mode 100644 optimum/tpu/__init__.py create mode 100644 optimum/tpu/generation/__init__.py rename {text-generation-inference/server/text_generation_server => optimum/tpu/generation}/logits_process.py (100%) rename {text-generation-inference/server/text_generation_server => optimum/tpu/generation}/token_selector.py (100%) rename {text-generation-inference/server/text_generation_server => optimum/tpu}/model.py (100%) rename {text-generation-inference/server/text_generation_server => optimum/tpu}/modeling.py (94%) create mode 100644 optimum/tpu/version.py create mode 100644 setup.cfg delete mode 100644 text-generation-inference/LICENSE create mode 100644 text-generation-inference/server/text_generation_server/version.py diff --git a/.github/workflows/check_code_quality.yml b/.github/workflows/check_code_quality.yml new file mode 100644 index 00000000..fec93652 --- /dev/null +++ b/.github/workflows/check_code_quality.yml @@ -0,0 +1,55 @@ +name: check_code_quality + +on: + push: + branches: [ main ] + paths: + - "setup.py" + - "optimum/tpu/**.py" + - "tests/**.py" + - "examples/**.py" + + pull_request: + branches: [ main ] + paths: + - "setup.py" + - "optimum/tpu/**.py" + - "tests/**.py" + - "examples/**.py" + +concurrency: + group: ${{ github.workflow }}-${{ github.head_ref || github.run_id }} + cancel-in-progress: true + +jobs: + build: + strategy: + fail-fast: false + matrix: + python-version: [3.10.12] + os: [ubuntu-22.04] + + runs-on: ${{ matrix.os }} + steps: + - uses: actions/checkout@v4 + - name: Setup Python ${{ matrix.python-version }} + uses: actions/setup-python@v2 + with: + python-version: ${{ matrix.python-version }} + - name: Create and start a virtual environment + run: | + python -m venv venv + source venv/bin/activate + - name: Install dependencies + run: | + source venv/bin/activate + pip install --upgrade pip + pip install .[quality] + - name: Check style with black + run: | + source venv/bin/activate + black --check . + - name: Check style with ruff + run: | + source venv/bin/activate + ruff check . diff --git a/MANIFEST.in b/MANIFEST.in new file mode 100644 index 00000000..0cdc63aa --- /dev/null +++ b/MANIFEST.in @@ -0,0 +1,17 @@ +# Copyright 2024 The HuggingFace Team. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +include README.md +include LICENSE + diff --git a/Makefile b/Makefile index 9636b4a8..7868bac0 100644 --- a/Makefile +++ b/Makefile @@ -1,8 +1,47 @@ +# Copyright 2024 The HuggingFace Team. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +SHELL := /bin/bash +CURRENT_DIR = $(shell pwd) +DEFAULT_CLONE_URL := https://github.com/huggingface/optimum-neuron.git +# If CLONE_URL is empty, revert to DEFAULT_CLONE_URL +REAL_CLONE_URL = $(if $(CLONE_URL),$(CLONE_URL),$(DEFAULT_CLONE_URL)) -VERSION := "0.0.1" +.PHONY: build_dist style style_check clean TGI_VERSION ?= 1.4.2 +rwildcard=$(wildcard $1) $(foreach d,$1,$(call rwildcard,$(addsuffix /$(notdir $d),$(wildcard $(dir $d)*)))) + +VERSION := $(shell awk '/__version__ = "(.*)"/{print $$3}' optimum/tpu/version.py | sed 's/"//g') + +PACKAGE_DIST = dist/optimum-tpu-$(VERSION).tar.gz +PACKAGE_WHEEL = dist/optimum_tpu-$(VERSION)-py3-none-any.whl +PACKAGE_PYTHON_FILES = $(call rwildcard, optimum/*.py) +PACKAGE_FILES = $(PACKAGE_PYTHON_FILES) \ + setup.py \ + setup.cfg \ + pyproject.toml \ + README.md \ + MANIFEST.in + +# Package build recipe +$(PACKAGE_DIST) $(PACKAGE_WHEEL): $(PACKAGE_FILES) + python -m build + +clean: + rm -rf dist + tpu-tgi: docker build --rm -f text-generation-inference/Dockerfile \ --build-arg VERSION=$(VERSION) \ @@ -24,10 +63,14 @@ build_dist_install_tools: python -m pip install build python -m pip install twine +build_dist: ${PACKAGE_DIST} ${PACKAGE_WHEEL} + +pypi_upload: ${PACKAGE_DIST} ${PACKAGE_WHEEL} + python -m twine upload ${PACKAGE_DIST} ${PACKAGE_WHEEL} +# Tests test_installs: - python -m pip install pytest safetensors - python -m pip install git+https://github.com/huggingface/transformers.git + python -m pip install .[tpu,tests] # Stand-alone TGI server for unit tests outside of TGI container tgi_server: @@ -35,8 +78,7 @@ tgi_server: make -C text-generation-inference/server clean VERSION=${VERSION} TGI_VERSION=${TGI_VERSION} make -C text-generation-inference/server gen-server -tgi_test: tgi_server - python -m pip install pytest +tgi_test: test_installs tgi_server find text-generation-inference -name "text_generation_server-$(VERSION)-py3-none-any.whl" \ -exec python -m pip install --force-reinstall {} \; python -m pytest -sv text-generation-inference/tests diff --git a/optimum/tpu/__init__.py b/optimum/tpu/__init__.py new file mode 100644 index 00000000..adb37a59 --- /dev/null +++ b/optimum/tpu/__init__.py @@ -0,0 +1,15 @@ +# Copyright 2024 The HuggingFace Team. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from .version import __version__, VERSION # noqa: F401 diff --git a/optimum/tpu/generation/__init__.py b/optimum/tpu/generation/__init__.py new file mode 100644 index 00000000..a80c6741 --- /dev/null +++ b/optimum/tpu/generation/__init__.py @@ -0,0 +1,17 @@ +# coding=utf-8 +# Copyright 2024 The HuggingFace Inc. team. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from .logits_process import FusedLogitsWarper # noqa: F401 +from .token_selector import TokenSelector # noqa: F401 diff --git a/text-generation-inference/server/text_generation_server/logits_process.py b/optimum/tpu/generation/logits_process.py similarity index 100% rename from text-generation-inference/server/text_generation_server/logits_process.py rename to optimum/tpu/generation/logits_process.py diff --git a/text-generation-inference/server/text_generation_server/token_selector.py b/optimum/tpu/generation/token_selector.py similarity index 100% rename from text-generation-inference/server/text_generation_server/token_selector.py rename to optimum/tpu/generation/token_selector.py diff --git a/text-generation-inference/server/text_generation_server/model.py b/optimum/tpu/model.py similarity index 100% rename from text-generation-inference/server/text_generation_server/model.py rename to optimum/tpu/model.py diff --git a/text-generation-inference/server/text_generation_server/modeling.py b/optimum/tpu/modeling.py similarity index 94% rename from text-generation-inference/server/text_generation_server/modeling.py rename to optimum/tpu/modeling.py index bb59d1f8..5d6e7ac6 100644 --- a/text-generation-inference/server/text_generation_server/modeling.py +++ b/optimum/tpu/modeling.py @@ -18,7 +18,6 @@ from os import PathLike, environ from typing import Any -import torch from loguru import logger from transformers import AutoModelForCausalLM from transformers.utils import is_accelerate_available @@ -51,9 +50,7 @@ def from_pretrained( pretrained_model_name_or_path, device_map=device, *model_args, **kwargs ) else: - model = AutoModelForCausalLM.from_pretrained( - pretrained_model_name_or_path, *model_args, **kwargs - ) + model = AutoModelForCausalLM.from_pretrained(pretrained_model_name_or_path, *model_args, **kwargs) model.to(device) # Update config with specific data) if task is not None or getattr(model.config, "task", None) is None: diff --git a/optimum/tpu/version.py b/optimum/tpu/version.py new file mode 100644 index 00000000..ec7613a4 --- /dev/null +++ b/optimum/tpu/version.py @@ -0,0 +1,18 @@ +# Copyright 2024 The HuggingFace Team. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from pkg_resources import parse_version + +__version__ = "0.1.0.dev0" +VERSION = parse_version(__version__) diff --git a/pyproject.toml b/pyproject.toml index d21952a7..1f47e038 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -11,6 +11,53 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. +[project] +name = "optimum-tpu" +dynamic = ["version"] +authors=[ + { name = "HuggingFace Inc. Machine Learning Optimization Team", email = "hardware@huggingface.co"} +] +description = "Optimum TPU is the interface between the Hugging Face Transformers library and Google Cloud TPU devices." +readme = "README.md" +license = {file = "LICENSE"} +classifiers = [ + "Development Status :: 1 - Pre-Alpha", + "License :: OSI Approved :: Apache Software License", + "Intended Audience :: Developers", + "Intended Audience :: Education", + "Intended Audience :: Science/Research", + "Operating System :: OS Independent", + "Programming Language :: Python :: 3.10", + "Programming Language :: Python :: 3.11", + "Topic :: Scientific/Engineering :: Artificial Intelligence", +] +keywords = [ + "transformers", + "fine-tuning", + "inference", + "tpu", + "cloud-tpu", + "gcp", + "google-cloud" +] + +dependencies = ["transformers == 4.38.1", "torch-xla>=2.2.0", "torch>=2.2.0"] + +[project.optional-dependencies] +tests = ["pytest", "safetensors"] +quality = ["black", "ruff", "isort",] + +[project.urls] +Homepage = "https://hf.co/hardware" +Documentation = "https://hf.co/docs/optimum/tpu" +Repository = "https://github.com/huggingface/optimum-tpu" +Issues = "https://github.com/huggingface/optimum-tpu/issues" + +[tool.setuptools.dynamic] +version = {attr = "optimum.tpu.__version__"} + +[tool.setuptools.packages.find] +include = ["optimum.tpu"] [tool.black] line-length = 119 @@ -34,6 +81,4 @@ known-first-party = ["optimum.tpu"] [tool.pytest.ini_options] markers = [ "is_staging_test", - "is_trainium_test", - "is_inferentia_test", ] diff --git a/setup.cfg b/setup.cfg new file mode 100644 index 00000000..251e71cf --- /dev/null +++ b/setup.cfg @@ -0,0 +1,17 @@ +[isort] +default_section = FIRSTPARTY +ensure_newline_before_comments = True +force_grid_wrap = 0 +include_trailing_comma = True +known_first_party = optimum.tpu +line_length = 119 +lines_after_imports = 2 +multi_line_output = 3 +use_parentheses = True + +[flake8] +ignore = E203, E501, E741, W503, W605 +max-line-length = 119 + +[tool:pytest] +doctest_optionflags=NUMBER NORMALIZE_WHITESPACE ELLIPSIS diff --git a/text-generation-inference/LICENSE b/text-generation-inference/LICENSE deleted file mode 100644 index 19a34fcf..00000000 --- a/text-generation-inference/LICENSE +++ /dev/null @@ -1,181 +0,0 @@ -Hugging Face Optimized Inference License 1.0 (HFOILv1.0) - - -This License Agreement governs the use of the Software and its Modifications. It is a -binding agreement between the Licensor and You. - -This License Agreement shall be referred to as Hugging Face Optimized Inference License -1.0 or HFOILv1.0. We may publish revised versions of this License Agreement from time to -time. Each version will be given a distinguished number. - -By downloading, accessing, modifying, distributing or otherwise using the Software, You -consent to all of the terms and conditions below. So, if You do not agree with those, -please do not download, access, modify, distribute, or use the Software. - - -1. PERMISSIONS - -You may use, modify and distribute the Software pursuant to the following terms and -conditions: - -Copyright License. Subject to the terms and conditions of this License Agreement and where -and as applicable, each Contributor hereby grants You a perpetual, worldwide, -non-exclusive, royalty-free, copyright license to reproduce, prepare, publicly display, -publicly perform, sublicense under the terms herein, and distribute the Software and -Modifications of the Software. - -Patent License. Subject to the terms and conditions of this License Agreement and where -and as applicable, each Contributor hereby grants You a perpetual, worldwide, -non-exclusive, royalty-free patent license to make, have made, Use, offer to sell, sell, -import, and otherwise transfer the Software, where such license applies only to those -patent claims licensable by such Contributor that are necessarily infringed by their -Contribution(s) alone or by combination of their Contribution(s) with the Software to -which such Contribution(s) was submitted. If You institute patent litigation against any -entity (including a cross-claim or counterclaim in a lawsuit) alleging that the Software -or a Contribution incorporated within the Software constitutes direct or contributory -patent infringement, then any rights granted to You under this License Agreement for the -Software shall terminate as of the date such litigation is filed. - -No other rights. All rights not expressly granted herein are retained. - - -2. RESTRICTIONS - -You may not distribute the Software as a hosted or managed, and paid service, where the -service grants users access to any substantial set of the features or functionality of the -Software. If you wish to do so, You will need to be granted additional rights from the -Licensor which will be subject to a separate mutually agreed agreement. - -You may not sublicense the Software under any other terms than those listed in this -License. - - -3. OBLIGATIONS - -When You modify the Software, You agree to: - attach a notice stating the Modifications of -the Software You made; and - attach a notice stating that the Modifications of the -Software are released under this License Agreement. - -When You distribute the Software or Modifications of the Software, You agree to: - give -any recipients of the Software a copy of this License Agreement; - retain all Explanatory -Documentation; and if sharing the Modifications of the Software, add Explanatory -Documentation documenting the changes made to create the Modifications of the Software; - -retain all copyright, patent, trademark and attribution notices. - - -4. MISCELLANEOUS - -Termination. Licensor reserves the right to restrict Use of the Software in violation of -this License Agreement, upon which Your licenses will automatically terminate. - -Contributions. Unless You explicitly state otherwise, any Contribution intentionally -submitted for inclusion in the Software by You to the Licensor shall be under the terms -and conditions of this License, without any additional terms or conditions. -Notwithstanding the above, nothing herein shall supersede or modify the terms of any -separate license agreement you may have executed with Licensor regarding such -Contributions. - -Trademarks and related. Nothing in this License Agreement permits You (i) to make Use of -Licensors’ trademarks, trade names, or logos, (ii) otherwise suggest endorsement by -Licensor, or (iii) misrepresent the relationship between the parties; and any rights not -expressly granted herein are reserved by the Licensors. - -Output You generate. Licensor claims no rights in the Output. You agree not to contravene -any provision as stated in the License Agreement with your Use of the Output. - -Disclaimer of Warranty. Except as expressly provided otherwise herein, and to the fullest -extent permitted by law, Licensor provides the Software (and each Contributor provides its -Contributions) AS IS, and Licensor disclaims all warranties or guarantees of any kind, -express or implied, whether arising under any law or from any usage in trade, or otherwise -including but not limited to the implied warranties of merchantability, non-infringement, -quiet enjoyment, fitness for a particular purpose, or otherwise. You are solely -responsible for determining the appropriateness of the Software and Modifications of the -Software for your purposes (including your use or distribution of the Software and -Modifications of the Software), and assume any risks associated with Your exercise of -permissions under this License Agreement. - -Limitation of Liability. In no event and under no legal theory, whether in tort (including -negligence), contract, or otherwise, unless required by applicable law (such as deliberate -and grossly negligent acts) or agreed to in writing, shall any Contributor be liable to -You for damages, including any direct, indirect, special, incidental, or consequential -damages of any character arising as a result of this License Agreement or out of the Use -or inability to Use the Software (including but not limited to damages for loss of -goodwill, work stoppage, computer failure or malfunction, model failure or malfunction, or -any and all other commercial damages or losses), even if such Contributor has been advised -of the possibility of such damages. - -Accepting Warranty or Additional Liability. While sharing the Software or Modifications of -the Software thereof, You may choose to offer and charge a fee for, acceptance of support, -warranty, indemnity, or other liability obligations and/or rights consistent with this -License Agreement. However, in accepting such obligations, You may act only on Your own -behalf and on Your sole responsibility, not on behalf of Licensor or any other -Contributor, and you hereby agree to indemnify, defend, and hold Licensor and each other -Contributor (and their successors or assigns) harmless for any liability incurred by, or -claims asserted against, such Licensor or Contributor (and their successors or assigns) by -reason of your accepting any such warranty or additional liability. - -Severability. This License Agreement is a license of copyright and patent rights and an -agreement in contract between You and the Licensor. If any provision of this License -Agreement is held to be invalid, illegal or unenforceable, the remaining provisions shall -be unaffected thereby and remain valid as if such provision had not been set forth herein. - - -5. DEFINITIONS - -“Contribution” refers to any work of authorship, including the original version of the -Software and any Modifications of the Software that is intentionally submitted to Licensor -for inclusion in the Software by the copyright owner or by an individual or entity -authorized to submit on behalf of the copyright owner. For the purposes of this -definition, “submitted” means any form of electronic, verbal, or written communication -sent to the Licensor or its representatives, including but not limited to communication on -electronic mailing lists, source code control systems, and issue tracking systems that are -managed by, or on behalf of, the Licensor for the purpose of discussing and improving the -Software, but excluding communication that is conspicuously marked or otherwise designated -in writing by the copyright owner as “Not a Contribution.” - -“Contributor” refers to Licensor and any individual or entity on behalf of whom a -Contribution has been received by Licensor and subsequently incorporated within the -Software. - -“Data” refers to a collection of information extracted from the dataset used with the -Model, including to train, pretrain, or otherwise evaluate the Model. The Data is not -licensed under this License Agreement. - -“Explanatory Documentation” refers to any documentation or related information including -but not limited to model cards or data cards dedicated to inform the public about the -characteristics of the Software. Explanatory documentation is not licensed under this -License. - -"License Agreement" refers to these terms and conditions. - -“Licensor” refers to the rights owners or entity authorized by the rights owners that are -granting the terms and conditions of this License Agreement. - -“Model” refers to machine-learning based assemblies (including checkpoints), consisting of -learnt weights and parameters (including optimizer states), corresponding to a model -architecture as embodied in Software source code. Source code is not licensed under this -License Agreement. - -“Modifications of the Software” refers to all changes to the Software, including without -limitation derivative works of the Software. - -“Output” refers to the results of operating the Software. - -“Share” refers to any transmission, reproduction, publication or other sharing of the -Software or Modifications of the Software to a third party, including providing the -Softwaire as a hosted service made available by electronic or other remote means, -including - but not limited to - API-based or web access. - -“Software” refers to the software and Model (or parts of either) that Licensor makes -available under this License Agreement. - -“Third Parties” refers to individuals or legal entities that are not under common control -with Licensor or You. - -“Use” refers to anything You or your representatives do with the Software, including but -not limited to generating any Output, fine tuning, updating, running, training, evaluating -and/or reparametrizing the Model. - -"You" (or "Your") refers to an individual or Legal Entity exercising permissions granted -by this License Agreement and/or making Use of the Software for whichever purpose and in -any field of Use. diff --git a/text-generation-inference/server/build-requirements.txt b/text-generation-inference/server/build-requirements.txt index f745f993..5307dc5d 100644 --- a/text-generation-inference/server/build-requirements.txt +++ b/text-generation-inference/server/build-requirements.txt @@ -1,3 +1,3 @@ build -grpcio-tools==1.62.0 +grpcio-tools==1.62.1 mypy-protobuf==3.2.0 diff --git a/text-generation-inference/server/pyproject.toml b/text-generation-inference/server/pyproject.toml index 78d6b0af..f6e94c96 100644 --- a/text-generation-inference/server/pyproject.toml +++ b/text-generation-inference/server/pyproject.toml @@ -4,14 +4,14 @@ build-backend = "setuptools.build_meta" [project] name = "text-generation-server" -version = "VERSION" +dynamic = ["version"] authors = [{name="Alvaro Moran", email="alvaro.moran@huggingface.co" }] description = "TGI compatible inference server for Google TPU platforms" dependencies = [ - 'protobuf > 3.20.1, < 5', - 'grpcio == 1.62.0', - 'grpcio-status == 1.62.0', - 'grpcio-reflection == 1.62.0', + 'protobuf', + 'grpcio == 1.62.1', + 'grpcio-status == 1.62.1', + 'grpcio-reflection == 1.62.1', 'grpc-interceptor == 0.15.2', 'typer == 0.6.1', 'safetensors == 0.4.2', @@ -22,5 +22,8 @@ dependencies = [ [tool.setuptools] packages = ["text_generation_server", "text_generation_server.pb"] +[tool.setuptools.dynamic] +version = {attr = "text_generation_server.version.__version__"} + [project.scripts] text-generation-server = 'text_generation_server.cli:app' diff --git a/text-generation-inference/server/text_generation_server/generator.py b/text-generation-inference/server/text_generation_server/generator.py index 6bb327ad..17fa439f 100644 --- a/text-generation-inference/server/text_generation_server/generator.py +++ b/text-generation-inference/server/text_generation_server/generator.py @@ -11,8 +11,9 @@ from loguru import logger from transformers import AutoTokenizer, PreTrainedTokenizerBase, StaticCache from transformers.generation import GenerationConfig +from optimum.tpu.modeling import TpuModelForCausalLM +from optimum.tpu.generation import TokenSelector -from .modeling import TpuModelForCausalLM from .pb.generate_pb2 import ( Batch, CachedBatch, @@ -23,7 +24,6 @@ Request, Tokens, ) -from .token_selector import TokenSelector # Disable optimum-tpu warnings as it seems to block the server after a while @@ -250,9 +250,7 @@ def append(self, next_token: int) -> str: Return: The corresponding decoded text (if any). """ - self._tokens = torch.cat( - [self._tokens, torch.tensor([next_token], dtype=self._tokens.dtype)] - ) + self._tokens = torch.cat([self._tokens, torch.tensor([next_token], dtype=self._tokens.dtype)]) # Update mask only if it was set previously if self._mask is not None: self._mask = torch.cat([self._mask, torch.tensor([1], device=self._device, dtype=self._mask.dtype)]) @@ -536,7 +534,9 @@ def _generate_token( ret = self._post_generate(outputs, next_batch_id, input_ids) return ret - def _post_generate(self, outputs: Dict, next_batch_id: int, input_ids: torch.LongTensor) -> Tuple[List[Generation], CachedBatch]: + def _post_generate( + self, outputs: Dict, next_batch_id: int, input_ids: torch.LongTensor + ) -> Tuple[List[Generation], CachedBatch]: generations = [] active_slots = False for i, slot in enumerate(self.slots): diff --git a/text-generation-inference/server/text_generation_server/version.py b/text-generation-inference/server/text_generation_server/version.py new file mode 100644 index 00000000..26e4cdb9 --- /dev/null +++ b/text-generation-inference/server/text_generation_server/version.py @@ -0,0 +1,4 @@ +from pkg_resources import parse_version + +__version__ = "0.1.0.dev0" +VERSION = parse_version(__version__) diff --git a/text-generation-inference/tests/test_gemma.py b/text-generation-inference/tests/test_gemma.py index 69a85a08..707cce44 100644 --- a/text-generation-inference/tests/test_gemma.py +++ b/text-generation-inference/tests/test_gemma.py @@ -2,7 +2,7 @@ import os from tqdm import tqdm from text_generation_server.generator import TpuGenerator -from text_generation_server.model import fetch_model +from optimum.tpu.model import fetch_model from text_generation_server.pb.generate_pb2 import ( Batch, NextTokenChooserParameters, diff --git a/text-generation-inference/tests/test_gpt2.py b/text-generation-inference/tests/test_gpt2.py index 2638f620..4a0d4960 100644 --- a/text-generation-inference/tests/test_gpt2.py +++ b/text-generation-inference/tests/test_gpt2.py @@ -2,7 +2,7 @@ import os from tqdm import tqdm from text_generation_server.generator import TpuGenerator -from text_generation_server.model import fetch_model +from optimum.tpu.model import fetch_model from text_generation_server.pb.generate_pb2 import ( Batch, NextTokenChooserParameters,