From 17238bfced79748c92422ebbbb9a7d6d4451c16f Mon Sep 17 00:00:00 2001
From: Alvaro Moran <6949769+tengomucho@users.noreply.github.com>
Date: Wed, 3 Apr 2024 16:16:49 +0200
Subject: [PATCH] Repo layout (#10)

* chore: add basic optimum package layout files

* feat: generic optimum features moved outsize TGI server

* chore: fix style with ruff and black

* feat(CI): add code quality workflow

* Remove invalid markers

* Attempt to match pep440

* Fix initial top level non-compliant versioning

* Update to latest version for tgi build-deps

* Remove gawk to awk + sed

* Complete the ppyproject.toml files

* Use only pyproject.toml

* Revert mypy-protobuf to 3.2.0

* Update version for tgi

* Use test_installs to setup the dependencies

* Move tpu extras to the mandatory deps

* Remove commented code

---------

Co-authored-by: Morgan Funtowicz <funtowiczmo@gmail.com>
---
 .github/workflows/check_code_quality.yml      |  55 ++++++
 MANIFEST.in                                   |  17 ++
 Makefile                                      |  52 ++++-
 optimum/tpu/__init__.py                       |  15 ++
 optimum/tpu/generation/__init__.py            |  17 ++
 .../tpu/generation}/logits_process.py         |   0
 .../tpu/generation}/token_selector.py         |   0
 .../tpu}/model.py                             |   0
 .../tpu}/modeling.py                          |   5 +-
 optimum/tpu/version.py                        |  18 ++
 pyproject.toml                                |  49 ++++-
 setup.cfg                                     |  17 ++
 text-generation-inference/LICENSE             | 181 ------------------
 .../server/build-requirements.txt             |   2 +-
 .../server/pyproject.toml                     |  13 +-
 .../text_generation_server/generator.py       |  12 +-
 .../server/text_generation_server/version.py  |   4 +
 text-generation-inference/tests/test_gemma.py |   2 +-
 text-generation-inference/tests/test_gpt2.py  |   2 +-
 19 files changed, 255 insertions(+), 206 deletions(-)
 create mode 100644 .github/workflows/check_code_quality.yml
 create mode 100644 MANIFEST.in
 create mode 100644 optimum/tpu/__init__.py
 create mode 100644 optimum/tpu/generation/__init__.py
 rename {text-generation-inference/server/text_generation_server => optimum/tpu/generation}/logits_process.py (100%)
 rename {text-generation-inference/server/text_generation_server => optimum/tpu/generation}/token_selector.py (100%)
 rename {text-generation-inference/server/text_generation_server => optimum/tpu}/model.py (100%)
 rename {text-generation-inference/server/text_generation_server => optimum/tpu}/modeling.py (94%)
 create mode 100644 optimum/tpu/version.py
 create mode 100644 setup.cfg
 delete mode 100644 text-generation-inference/LICENSE
 create mode 100644 text-generation-inference/server/text_generation_server/version.py

diff --git a/.github/workflows/check_code_quality.yml b/.github/workflows/check_code_quality.yml
new file mode 100644
index 00000000..fec93652
--- /dev/null
+++ b/.github/workflows/check_code_quality.yml
@@ -0,0 +1,55 @@
+name: check_code_quality
+
+on:
+  push:
+    branches: [ main ]
+    paths:
+      - "setup.py"
+      - "optimum/tpu/**.py"
+      - "tests/**.py"
+      - "examples/**.py"
+
+  pull_request:
+    branches: [ main ]
+    paths:
+      - "setup.py"
+      - "optimum/tpu/**.py"
+      - "tests/**.py"
+      - "examples/**.py"
+
+concurrency:
+  group: ${{ github.workflow }}-${{ github.head_ref || github.run_id }}
+  cancel-in-progress: true
+
+jobs:
+  build:
+    strategy:
+      fail-fast: false
+      matrix:
+        python-version: [3.10.12]
+        os: [ubuntu-22.04]
+
+    runs-on: ${{ matrix.os }}
+    steps:
+    - uses: actions/checkout@v4
+    - name: Setup Python ${{ matrix.python-version }}
+      uses: actions/setup-python@v2
+      with:
+        python-version: ${{ matrix.python-version }}
+    - name: Create and start a virtual environment
+      run: |
+        python -m venv venv
+        source venv/bin/activate
+    - name: Install dependencies
+      run: |
+        source venv/bin/activate
+        pip install --upgrade pip
+        pip install .[quality]
+    - name: Check style with black
+      run: |
+        source venv/bin/activate
+        black --check .
+    - name: Check style with ruff
+      run: |
+        source venv/bin/activate
+        ruff check .
diff --git a/MANIFEST.in b/MANIFEST.in
new file mode 100644
index 00000000..0cdc63aa
--- /dev/null
+++ b/MANIFEST.in
@@ -0,0 +1,17 @@
+#  Copyright 2024 The HuggingFace Team. All rights reserved.
+#
+#  Licensed under the Apache License, Version 2.0 (the "License");
+#  you may not use this file except in compliance with the License.
+#  You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+#  Unless required by applicable law or agreed to in writing, software
+#  distributed under the License is distributed on an "AS IS" BASIS,
+#  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#  See the License for the specific language governing permissions and
+#  limitations under the License.
+
+include README.md
+include LICENSE
+
diff --git a/Makefile b/Makefile
index 9636b4a8..7868bac0 100644
--- a/Makefile
+++ b/Makefile
@@ -1,8 +1,47 @@
+#  Copyright 2024 The HuggingFace Team. All rights reserved.
+#
+#  Licensed under the Apache License, Version 2.0 (the "License");
+#  you may not use this file except in compliance with the License.
+#  You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+#  Unless required by applicable law or agreed to in writing, software
+#  distributed under the License is distributed on an "AS IS" BASIS,
+#  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#  See the License for the specific language governing permissions and
+#  limitations under the License.
+SHELL := /bin/bash
+CURRENT_DIR = $(shell pwd)
+DEFAULT_CLONE_URL := https://github.com/huggingface/optimum-neuron.git
+# If CLONE_URL is empty, revert to DEFAULT_CLONE_URL
+REAL_CLONE_URL = $(if $(CLONE_URL),$(CLONE_URL),$(DEFAULT_CLONE_URL))
 
-VERSION := "0.0.1"
+.PHONY:	build_dist style style_check clean
 
 TGI_VERSION ?= 1.4.2
 
+rwildcard=$(wildcard $1) $(foreach d,$1,$(call rwildcard,$(addsuffix /$(notdir $d),$(wildcard $(dir $d)*))))
+
+VERSION := $(shell awk '/__version__ = "(.*)"/{print $$3}' optimum/tpu/version.py | sed 's/"//g')
+
+PACKAGE_DIST = dist/optimum-tpu-$(VERSION).tar.gz
+PACKAGE_WHEEL = dist/optimum_tpu-$(VERSION)-py3-none-any.whl
+PACKAGE_PYTHON_FILES = $(call rwildcard, optimum/*.py)
+PACKAGE_FILES = $(PACKAGE_PYTHON_FILES)  \
+				setup.py \
+				setup.cfg \
+				pyproject.toml \
+				README.md \
+				MANIFEST.in
+
+# Package build recipe
+$(PACKAGE_DIST) $(PACKAGE_WHEEL): $(PACKAGE_FILES)
+	python -m build
+
+clean:
+	rm -rf dist
+
 tpu-tgi:
 	docker build --rm -f text-generation-inference/Dockerfile \
 	             --build-arg VERSION=$(VERSION) \
@@ -24,10 +63,14 @@ build_dist_install_tools:
 	python -m pip install build
 	python -m pip install twine
 
+build_dist: ${PACKAGE_DIST} ${PACKAGE_WHEEL}
+
+pypi_upload: ${PACKAGE_DIST} ${PACKAGE_WHEEL}
+	python -m twine upload ${PACKAGE_DIST} ${PACKAGE_WHEEL}
 
+# Tests
 test_installs:
-	python -m pip install pytest safetensors
-	python -m pip install git+https://github.com/huggingface/transformers.git
+	python -m pip install .[tpu,tests]
 
 # Stand-alone TGI server for unit tests outside of TGI container
 tgi_server:
@@ -35,8 +78,7 @@ tgi_server:
 	make -C text-generation-inference/server clean
 	VERSION=${VERSION} TGI_VERSION=${TGI_VERSION} make -C text-generation-inference/server gen-server
 
-tgi_test: tgi_server
-	python -m pip install pytest
+tgi_test: test_installs tgi_server
 	find text-generation-inference -name "text_generation_server-$(VERSION)-py3-none-any.whl" \
 	                               -exec python -m pip install --force-reinstall {} \;
 	python -m pytest -sv text-generation-inference/tests
diff --git a/optimum/tpu/__init__.py b/optimum/tpu/__init__.py
new file mode 100644
index 00000000..adb37a59
--- /dev/null
+++ b/optimum/tpu/__init__.py
@@ -0,0 +1,15 @@
+#  Copyright 2024 The HuggingFace Team. All rights reserved.
+#
+#  Licensed under the Apache License, Version 2.0 (the "License");
+#  you may not use this file except in compliance with the License.
+#  You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+#  Unless required by applicable law or agreed to in writing, software
+#  distributed under the License is distributed on an "AS IS" BASIS,
+#  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#  See the License for the specific language governing permissions and
+#  limitations under the License.
+
+from .version import __version__, VERSION  # noqa: F401
diff --git a/optimum/tpu/generation/__init__.py b/optimum/tpu/generation/__init__.py
new file mode 100644
index 00000000..a80c6741
--- /dev/null
+++ b/optimum/tpu/generation/__init__.py
@@ -0,0 +1,17 @@
+# coding=utf-8
+# Copyright 2024 The HuggingFace Inc. team. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from .logits_process import FusedLogitsWarper  # noqa: F401
+from .token_selector import TokenSelector  # noqa: F401
diff --git a/text-generation-inference/server/text_generation_server/logits_process.py b/optimum/tpu/generation/logits_process.py
similarity index 100%
rename from text-generation-inference/server/text_generation_server/logits_process.py
rename to optimum/tpu/generation/logits_process.py
diff --git a/text-generation-inference/server/text_generation_server/token_selector.py b/optimum/tpu/generation/token_selector.py
similarity index 100%
rename from text-generation-inference/server/text_generation_server/token_selector.py
rename to optimum/tpu/generation/token_selector.py
diff --git a/text-generation-inference/server/text_generation_server/model.py b/optimum/tpu/model.py
similarity index 100%
rename from text-generation-inference/server/text_generation_server/model.py
rename to optimum/tpu/model.py
diff --git a/text-generation-inference/server/text_generation_server/modeling.py b/optimum/tpu/modeling.py
similarity index 94%
rename from text-generation-inference/server/text_generation_server/modeling.py
rename to optimum/tpu/modeling.py
index bb59d1f8..5d6e7ac6 100644
--- a/text-generation-inference/server/text_generation_server/modeling.py
+++ b/optimum/tpu/modeling.py
@@ -18,7 +18,6 @@
 from os import PathLike, environ
 from typing import Any
 
-import torch
 from loguru import logger
 from transformers import AutoModelForCausalLM
 from transformers.utils import is_accelerate_available
@@ -51,9 +50,7 @@ def from_pretrained(
                 pretrained_model_name_or_path, device_map=device, *model_args, **kwargs
             )
         else:
-            model = AutoModelForCausalLM.from_pretrained(
-                pretrained_model_name_or_path, *model_args, **kwargs
-            )
+            model = AutoModelForCausalLM.from_pretrained(pretrained_model_name_or_path, *model_args, **kwargs)
             model.to(device)
         # Update config with specific data)
         if task is not None or getattr(model.config, "task", None) is None:
diff --git a/optimum/tpu/version.py b/optimum/tpu/version.py
new file mode 100644
index 00000000..ec7613a4
--- /dev/null
+++ b/optimum/tpu/version.py
@@ -0,0 +1,18 @@
+#  Copyright 2024 The HuggingFace Team. All rights reserved.
+#
+#  Licensed under the Apache License, Version 2.0 (the "License");
+#  you may not use this file except in compliance with the License.
+#  You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+#  Unless required by applicable law or agreed to in writing, software
+#  distributed under the License is distributed on an "AS IS" BASIS,
+#  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#  See the License for the specific language governing permissions and
+#  limitations under the License.
+
+from pkg_resources import parse_version
+
+__version__ = "0.1.0.dev0"
+VERSION = parse_version(__version__)
diff --git a/pyproject.toml b/pyproject.toml
index d21952a7..1f47e038 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -11,6 +11,53 @@
 #  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 #  See the License for the specific language governing permissions and
 #  limitations under the License.
+[project]
+name = "optimum-tpu"
+dynamic = ["version"]
+authors=[
+    { name = "HuggingFace Inc. Machine Learning Optimization Team", email = "hardware@huggingface.co"}
+]
+description = "Optimum TPU is the interface between the Hugging Face Transformers library and Google Cloud TPU devices."
+readme = "README.md"
+license = {file = "LICENSE"}
+classifiers = [
+    "Development Status :: 1 - Pre-Alpha",
+    "License :: OSI Approved :: Apache Software License",
+    "Intended Audience :: Developers",
+    "Intended Audience :: Education",
+    "Intended Audience :: Science/Research",
+    "Operating System :: OS Independent",
+    "Programming Language :: Python :: 3.10",
+    "Programming Language :: Python :: 3.11",
+    "Topic :: Scientific/Engineering :: Artificial Intelligence",
+]
+keywords = [
+    "transformers",
+    "fine-tuning",
+    "inference",
+    "tpu",
+    "cloud-tpu",
+    "gcp",
+    "google-cloud"
+]
+
+dependencies = ["transformers == 4.38.1", "torch-xla>=2.2.0", "torch>=2.2.0"]
+
+[project.optional-dependencies]
+tests = ["pytest", "safetensors"]
+quality = ["black", "ruff", "isort",]
+
+[project.urls]
+Homepage = "https://hf.co/hardware"
+Documentation = "https://hf.co/docs/optimum/tpu"
+Repository = "https://github.com/huggingface/optimum-tpu"
+Issues = "https://github.com/huggingface/optimum-tpu/issues"
+
+[tool.setuptools.dynamic]
+version = {attr = "optimum.tpu.__version__"}
+
+[tool.setuptools.packages.find]
+include = ["optimum.tpu"]
 
 [tool.black]
 line-length = 119
@@ -34,6 +81,4 @@ known-first-party = ["optimum.tpu"]
 [tool.pytest.ini_options]
 markers = [
     "is_staging_test",
-    "is_trainium_test",
-    "is_inferentia_test",
 ]
diff --git a/setup.cfg b/setup.cfg
new file mode 100644
index 00000000..251e71cf
--- /dev/null
+++ b/setup.cfg
@@ -0,0 +1,17 @@
+[isort]
+default_section = FIRSTPARTY
+ensure_newline_before_comments = True
+force_grid_wrap = 0
+include_trailing_comma = True
+known_first_party = optimum.tpu
+line_length = 119
+lines_after_imports = 2
+multi_line_output = 3
+use_parentheses = True
+
+[flake8]
+ignore = E203, E501, E741, W503, W605
+max-line-length = 119
+
+[tool:pytest]
+doctest_optionflags=NUMBER NORMALIZE_WHITESPACE ELLIPSIS
diff --git a/text-generation-inference/LICENSE b/text-generation-inference/LICENSE
deleted file mode 100644
index 19a34fcf..00000000
--- a/text-generation-inference/LICENSE
+++ /dev/null
@@ -1,181 +0,0 @@
-Hugging Face Optimized Inference License 1.0 (HFOILv1.0)
-
-
-This License Agreement governs the use of the Software and its Modifications. It is a
-binding agreement between the Licensor and You.
-
-This License Agreement shall be referred to as Hugging Face Optimized Inference License
-1.0 or HFOILv1.0. We may publish revised versions of this License Agreement from time to
-time. Each version will be given a distinguished number.
-
-By downloading, accessing, modifying, distributing or otherwise using the Software, You
-consent to all of the terms and conditions below. So, if You do not agree with those,
-please do not download, access, modify, distribute, or use the Software.
-
-
-1. PERMISSIONS
-
-You may use, modify and distribute the Software pursuant to the following terms and
-conditions:
-
-Copyright License. Subject to the terms and conditions of this License Agreement and where
-and as applicable, each Contributor hereby grants You a perpetual, worldwide,
-non-exclusive, royalty-free, copyright license to reproduce, prepare, publicly display,
-publicly perform, sublicense under the terms herein, and distribute the Software and
-Modifications of the Software.
-
-Patent License. Subject to the terms and conditions of this License Agreement and where
-and as applicable, each Contributor hereby grants You a perpetual, worldwide,
-non-exclusive, royalty-free patent license to make, have made, Use, offer to sell, sell,
-import, and otherwise transfer the Software, where such license applies only to those
-patent claims licensable by such Contributor that are necessarily infringed by their
-Contribution(s) alone or by combination of their Contribution(s) with the Software to
-which such Contribution(s) was submitted. If You institute patent litigation against any
-entity (including a cross-claim or counterclaim in a lawsuit) alleging that the Software
-or a Contribution incorporated within the Software constitutes direct or contributory
-patent infringement, then any rights granted to You under this License Agreement for the
-Software shall terminate as of the date such litigation is filed.
-
-No other rights. All rights not expressly granted herein are retained.
-
-
-2. RESTRICTIONS
-
-You may not distribute the Software as a hosted or managed, and paid service, where the
-service grants users access to any substantial set of the features or functionality of the
-Software. If you wish to do so, You will need to be granted additional rights from the
-Licensor which will be subject to a separate mutually agreed agreement.
-
-You may not sublicense the Software under any other terms than those listed in this
-License.
-
-
-3. OBLIGATIONS
-
-When You modify the Software, You agree to: - attach a notice stating the Modifications of
-the Software You made; and - attach a notice stating that the Modifications of the
-Software are released under this License Agreement.
-
-When You distribute the Software or Modifications of the Software, You agree to: - give
-any recipients of the Software a copy of this License Agreement; - retain all Explanatory
-Documentation; and if sharing the Modifications of the Software, add Explanatory
-Documentation documenting the changes made to create the Modifications of the Software; -
-retain all copyright, patent, trademark and attribution notices.
-
-
-4. MISCELLANEOUS
-
-Termination. Licensor reserves the right to restrict Use of the Software in violation of
-this License Agreement, upon which Your licenses will automatically terminate.
-
-Contributions. Unless You explicitly state otherwise, any Contribution intentionally
-submitted for inclusion in the Software by You to the Licensor shall be under the terms
-and conditions of this License, without any additional terms or conditions.
-Notwithstanding the above, nothing herein shall supersede or modify the terms of any
-separate license agreement you may have executed with Licensor regarding such
-Contributions.
-
-Trademarks and related. Nothing in this License Agreement permits You (i) to make Use of
-Licensors’ trademarks, trade names, or logos, (ii) otherwise suggest endorsement by
-Licensor, or (iii) misrepresent the relationship between the parties; and any rights not
-expressly granted herein are reserved by the Licensors.
-
-Output You generate. Licensor claims no rights in the Output. You agree not to contravene
-any provision as stated in the License Agreement with your Use of the Output.
-
-Disclaimer of Warranty. Except as expressly provided otherwise herein, and to the fullest
-extent permitted by law, Licensor provides the Software (and each Contributor provides its
-Contributions) AS IS, and Licensor disclaims all warranties or guarantees of any kind,
-express or implied, whether arising under any law or from any usage in trade, or otherwise
-including but not limited to the implied warranties of merchantability, non-infringement,
-quiet enjoyment, fitness for a particular purpose, or otherwise. You are solely
-responsible for determining the appropriateness of the Software and Modifications of the
-Software for your purposes (including your use or distribution of the Software and
-Modifications of the Software), and assume any risks associated with Your exercise of
-permissions under this License Agreement.
-
-Limitation of Liability. In no event and under no legal theory, whether in tort (including
-negligence), contract, or otherwise, unless required by applicable law (such as deliberate
-and grossly negligent acts) or agreed to in writing, shall any Contributor be liable to
-You for damages, including any direct, indirect, special, incidental, or consequential
-damages of any character arising as a result of this License Agreement or out of the Use
-or inability to Use the Software (including but not limited to damages for loss of
-goodwill, work stoppage, computer failure or malfunction, model failure or malfunction, or
-any and all other commercial damages or losses), even if such Contributor has been advised
-of the possibility of such damages.
-
-Accepting Warranty or Additional Liability. While sharing the Software or Modifications of
-the Software thereof, You may choose to offer and charge a fee for, acceptance of support,
-warranty, indemnity, or other liability obligations and/or rights consistent with this
-License Agreement. However, in accepting such obligations, You may act only on Your own
-behalf and on Your sole responsibility, not on behalf of Licensor or any other
-Contributor, and you hereby agree to indemnify, defend, and hold Licensor and each other
-Contributor (and their successors or assigns) harmless for any liability incurred by, or
-claims asserted against, such Licensor or Contributor (and their successors or assigns) by
-reason of your accepting any such warranty or additional liability.
-
-Severability. This License Agreement is a license of copyright and patent rights and an
-agreement in contract between You and the Licensor. If any provision of this License
-Agreement is held to be invalid, illegal or unenforceable, the remaining provisions shall
-be unaffected thereby and remain valid as if such provision had not been set forth herein.
-
-
-5. DEFINITIONS
-
-“Contribution” refers to any work of authorship, including the original version of the
-Software and any Modifications of the Software that is intentionally submitted to Licensor
-for inclusion in the Software by the copyright owner or by an individual or entity
-authorized to submit on behalf of the copyright owner. For the purposes of this
-definition, “submitted” means any form of electronic, verbal, or written communication
-sent to the Licensor or its representatives, including but not limited to communication on
-electronic mailing lists, source code control systems, and issue tracking systems that are
-managed by, or on behalf of, the Licensor for the purpose of discussing and improving the
-Software, but excluding communication that is conspicuously marked or otherwise designated
-in writing by the copyright owner as “Not a Contribution.”
-
-“Contributor” refers to Licensor and any individual or entity on behalf of whom a
-Contribution has been received by Licensor and subsequently incorporated within the
-Software.
-
-“Data” refers to a collection of information extracted from the dataset used with the
-Model, including to train, pretrain, or otherwise evaluate the Model. The Data is not
-licensed under this License Agreement.
-
-“Explanatory Documentation” refers to any documentation or related information including
-but not limited to model cards or data cards dedicated to inform the public about the
-characteristics of the Software. Explanatory documentation is not licensed under this
-License.
-
-"License Agreement" refers to these terms and conditions.
-
-“Licensor” refers to the rights owners or entity authorized by the rights owners that are
-granting the terms and conditions of this License Agreement.
-
-“Model” refers to machine-learning based assemblies (including checkpoints), consisting of
-learnt weights and parameters (including optimizer states), corresponding to a model
-architecture as embodied in Software source code. Source code is not licensed under this
-License Agreement.
-
-“Modifications of the Software” refers to all changes to the Software, including without
-limitation derivative works of the Software.
-
-“Output” refers to the results of operating the Software.
-
-“Share” refers to any transmission, reproduction, publication or other sharing of the
-Software or Modifications of the Software to a third party, including providing the
-Softwaire as a hosted service made available by electronic or other remote means,
-including - but not limited to - API-based or web access.
-
-“Software” refers to the software and Model (or parts of either) that Licensor makes
-available under this License Agreement.
-
-“Third Parties” refers to individuals or legal entities that are not under common control
-with Licensor or You.
-
-“Use” refers to anything You or your representatives do with the Software, including but
-not limited to generating any Output, fine tuning, updating, running, training, evaluating
-and/or reparametrizing the Model.
-
-"You" (or "Your")  refers to an individual or Legal Entity exercising permissions granted
-by this License Agreement and/or making Use of the Software for whichever purpose and in
-any field of Use.
diff --git a/text-generation-inference/server/build-requirements.txt b/text-generation-inference/server/build-requirements.txt
index f745f993..5307dc5d 100644
--- a/text-generation-inference/server/build-requirements.txt
+++ b/text-generation-inference/server/build-requirements.txt
@@ -1,3 +1,3 @@
 build
-grpcio-tools==1.62.0
+grpcio-tools==1.62.1
 mypy-protobuf==3.2.0
diff --git a/text-generation-inference/server/pyproject.toml b/text-generation-inference/server/pyproject.toml
index 78d6b0af..f6e94c96 100644
--- a/text-generation-inference/server/pyproject.toml
+++ b/text-generation-inference/server/pyproject.toml
@@ -4,14 +4,14 @@ build-backend = "setuptools.build_meta"
 
 [project]
 name = "text-generation-server"
-version = "VERSION"
+dynamic = ["version"]
 authors = [{name="Alvaro Moran", email="alvaro.moran@huggingface.co" }]
 description = "TGI compatible inference server for Google TPU platforms"
 dependencies = [
-    'protobuf > 3.20.1, < 5',
-    'grpcio == 1.62.0',
-    'grpcio-status == 1.62.0',
-    'grpcio-reflection == 1.62.0',
+    'protobuf',
+    'grpcio == 1.62.1',
+    'grpcio-status == 1.62.1',
+    'grpcio-reflection == 1.62.1',
     'grpc-interceptor == 0.15.2',
     'typer == 0.6.1',
     'safetensors == 0.4.2',
@@ -22,5 +22,8 @@ dependencies = [
 [tool.setuptools]
 packages = ["text_generation_server", "text_generation_server.pb"]
 
+[tool.setuptools.dynamic]
+version = {attr = "text_generation_server.version.__version__"}
+
 [project.scripts]
 text-generation-server = 'text_generation_server.cli:app'
diff --git a/text-generation-inference/server/text_generation_server/generator.py b/text-generation-inference/server/text_generation_server/generator.py
index 6bb327ad..17fa439f 100644
--- a/text-generation-inference/server/text_generation_server/generator.py
+++ b/text-generation-inference/server/text_generation_server/generator.py
@@ -11,8 +11,9 @@
 from loguru import logger
 from transformers import AutoTokenizer, PreTrainedTokenizerBase, StaticCache
 from transformers.generation import GenerationConfig
+from optimum.tpu.modeling import TpuModelForCausalLM
+from optimum.tpu.generation import TokenSelector
 
-from .modeling import TpuModelForCausalLM
 from .pb.generate_pb2 import (
     Batch,
     CachedBatch,
@@ -23,7 +24,6 @@
     Request,
     Tokens,
 )
-from .token_selector import TokenSelector
 
 
 # Disable optimum-tpu warnings as it seems to block the server after a while
@@ -250,9 +250,7 @@ def append(self, next_token: int) -> str:
         Return:
             The corresponding decoded text (if any).
         """
-        self._tokens = torch.cat(
-            [self._tokens, torch.tensor([next_token], dtype=self._tokens.dtype)]
-        )
+        self._tokens = torch.cat([self._tokens, torch.tensor([next_token], dtype=self._tokens.dtype)])
         # Update mask only if it was set previously
         if self._mask is not None:
             self._mask = torch.cat([self._mask, torch.tensor([1], device=self._device, dtype=self._mask.dtype)])
@@ -536,7 +534,9 @@ def _generate_token(
         ret = self._post_generate(outputs, next_batch_id, input_ids)
         return ret
 
-    def _post_generate(self, outputs: Dict, next_batch_id: int, input_ids: torch.LongTensor) -> Tuple[List[Generation], CachedBatch]:
+    def _post_generate(
+        self, outputs: Dict, next_batch_id: int, input_ids: torch.LongTensor
+    ) -> Tuple[List[Generation], CachedBatch]:
         generations = []
         active_slots = False
         for i, slot in enumerate(self.slots):
diff --git a/text-generation-inference/server/text_generation_server/version.py b/text-generation-inference/server/text_generation_server/version.py
new file mode 100644
index 00000000..26e4cdb9
--- /dev/null
+++ b/text-generation-inference/server/text_generation_server/version.py
@@ -0,0 +1,4 @@
+from pkg_resources import parse_version
+
+__version__ = "0.1.0.dev0"
+VERSION = parse_version(__version__)
diff --git a/text-generation-inference/tests/test_gemma.py b/text-generation-inference/tests/test_gemma.py
index 69a85a08..707cce44 100644
--- a/text-generation-inference/tests/test_gemma.py
+++ b/text-generation-inference/tests/test_gemma.py
@@ -2,7 +2,7 @@
 import os
 from tqdm import tqdm
 from text_generation_server.generator import TpuGenerator
-from text_generation_server.model import fetch_model
+from optimum.tpu.model import fetch_model
 from text_generation_server.pb.generate_pb2 import (
     Batch,
     NextTokenChooserParameters,
diff --git a/text-generation-inference/tests/test_gpt2.py b/text-generation-inference/tests/test_gpt2.py
index 2638f620..4a0d4960 100644
--- a/text-generation-inference/tests/test_gpt2.py
+++ b/text-generation-inference/tests/test_gpt2.py
@@ -2,7 +2,7 @@
 import os
 from tqdm import tqdm
 from text_generation_server.generator import TpuGenerator
-from text_generation_server.model import fetch_model
+from optimum.tpu.model import fetch_model
 from text_generation_server.pb.generate_pb2 import (
     Batch,
     NextTokenChooserParameters,