.github/workflows/test-pytorch-xla-tpu-tgi.yml

name: Optimum TPU / Test TGI on TPU

on:
  push:
    branches: [ quick-ci-test ]
  pull_request:
    branches: [ main ]
    paths:
      - "text-generation-inference/**"

concurrency:
  group: ${{ github.workflow }}-${{ github.head_ref || github.run_id }}
  cancel-in-progress: true

jobs:
  do-the-job:
    name: Run TGI tests
    runs-on: optimum-tpu
    container:
      # Use a nightly image that works with TPU (release was not working)
      image: us-central1-docker.pkg.dev/tpu-pytorch-releases/docker/xla:r2.4.0_3.10_tpuvm
      options: --shm-size "16gb" --ipc host --privileged
    env:
      PJRT_DEVICE: TPU
    steps:
      - name: Checkout
        uses: actions/checkout@v4

      - name: Build and test TGI server
        run: |
          HF_TOKEN=${{ secrets.HF_TOKEN_OPTIMUM_TPU_CI }} make tgi_test

      # Use a different step to test the Jetstream Pytorch version, to avoid conflicts with torch-xla[tpu]
      - name: Install and test TGI server (Jetstream Pytorch)
        run: |
          pip install -U .[jetstream-pt] \
            -f https://storage.googleapis.com/jax-releases/jax_nightly_releases.html \
            -f https://storage.googleapis.com/jax-releases/jaxlib_nightly_releases.html \
            -f https://storage.googleapis.com/libtpu-releases/index.html
          JETSTREAM_PT=1 HF_TOKEN=${{ secrets.HF_TOKEN_OPTIMUM_TPU_CI }} python -m \
            pytest -sv text-generation-inference/tests -k jetstream