.github/workflows/test-pytorch-xla-tpu-tgi.yml

name: Optimum TPU / Test TGI on TPU

on:
  push:
    branches: [ testpbmci ]
    #branches: [ main ]
    #paths:
    #  - "text-generation-inference/**"
  pull_request:
    branches: [ main ]
    paths:
      - "text-generation-inference/**"

concurrency:
  group: ${{ github.workflow }}-${{ github.head_ref || github.run_id }}
  cancel-in-progress: true

jobs:
  do-the-job:
    name: Run TGI tests
    #runs-on: optimum-tpu
    runs-on: tpu-runners-dind2
    container:
      # Use an image that works with TPU with Pytorch 2.3.0 (release was not working)
      #image: us-central1-docker.pkg.dev/tpu-pytorch-releases/docker/xla@sha256:8f1dcd5b03f993e4da5c20d17c77aff6a5f22d5455f8eb042d2e4b16ac460526
      image: us-central1-docker.pkg.dev/tpu-pytorch-releases/docker/xla@sha256:a0b5294d9e00d01876105e5afc5a187ea4c3e481f6f5a407b6621f9f1af7d9b2
      options: --shm-size "16gb" --ipc host --privileged
    env:
      PJRT_DEVICE: TPU
    steps:        
      - name: Checkout
        uses: actions/checkout@v4
      #- name: test
      #  run:  sleep 60m

      #- name: Checking Pytorch/XLA installation
      #  run: python -c "import torch_xla.core.xla_model as xm; assert xm.xla_device().type == 'xla', 'XLA device not available'"

      - name: Build and test TGI server
        run: |
          pip install accelerate==0.27.2
          HF_TOKEN=${{ secrets.HF_TOKEN_OPTIMUM_TPU_CI }} make tgi_test