From 3afbe82e6216a648d73f0f51479c699ecab38499 Mon Sep 17 00:00:00 2001 From: Alvaro Moran Date: Wed, 28 Feb 2024 12:24:28 +0000 Subject: [PATCH] WIP --- .github/actions/pytorch-xla-tpu/Dockerfile | 1 + .../workflows/test-pytorch-xla-tpu-tgi.yml | 50 ++++++++++++------- 2 files changed, 33 insertions(+), 18 deletions(-) diff --git a/.github/actions/pytorch-xla-tpu/Dockerfile b/.github/actions/pytorch-xla-tpu/Dockerfile index 57214a86..0c53e8b2 100644 --- a/.github/actions/pytorch-xla-tpu/Dockerfile +++ b/.github/actions/pytorch-xla-tpu/Dockerfile @@ -13,6 +13,7 @@ RUN apt-get update -y \ libpython3.10 \ gnupg2 \ wget \ + curl \ && rm -rf /var/lib/apt/lists/* \ && apt-get clean RUN pip3 --no-cache-dir install --upgrade pip diff --git a/.github/workflows/test-pytorch-xla-tpu-tgi.yml b/.github/workflows/test-pytorch-xla-tpu-tgi.yml index f089bf35..798e2287 100644 --- a/.github/workflows/test-pytorch-xla-tpu-tgi.yml +++ b/.github/workflows/test-pytorch-xla-tpu-tgi.yml @@ -20,30 +20,44 @@ jobs: name: Run TGI tests runs-on: optimum-tpu container: - image: us-central1-docker.pkg.dev/tpu-pytorch-releases/docker/xla:r2.2.0_3.10_tpuvm + # Use an image that works with TPU with Pytorch 2.3.0 (release was not working) + image: us-central1-docker.pkg.dev/tpu-pytorch-releases/docker/xla@sha256:8f1dcd5b03f993e4da5c20d17c77aff6a5f22d5455f8eb042d2e4b16ac460526 + # image: ubuntu:22.04 options: --shm-size "16gb" --ipc host --privileged env: PJRT_DEVICE: TPU steps: - name: Checkout uses: actions/checkout@v4 - - name: Create venv and install Pytorch/XLA + + - name: Test env run: | - echo "PJRT_DEVICE set to $PJRT_DEVICE" - pwd - ls - cat /etc/issue - echo "-----" - # sudo apt install python3.10-venv -y - # echo "Creating virtual environment..." - # python3 -m venv venv-tpu-pytorch - # source venv-tpu-pytorch/bin/activate - echo "Installing basic packages" - python -m pip install -U pip - # python -m pip install "torch~=2.2.0" "torch_xla[tpu]~=2.2.0" -f https://storage.googleapis.com/libtpu-releases/index.html numpy echo "Testing XLA installation..." python -c "import torch_xla.core.xla_model as xm; assert xm.xla_device().type == 'xla', 'XLA device not available'" - - name: Run TGI server python tests - run: | - # source venv-tpu-pytorch/bin/activate - make tgi_test + + + # - name: Containerized Build and Test TGI + # uses: ./.github/actions/pytorch-xla-tpu + # with: + # make-target: tgi_test + + # - name: Create venv and install Pytorch/XLA + # run: | + # echo "PJRT_DEVICE set to $PJRT_DEVICE" + # pwd + # ls + # cat /etc/issue + # echo "-----" + # # sudo apt install python3.10-venv -y + # # echo "Creating virtual environment..." + # # python3 -m venv venv-tpu-pytorch + # # source venv-tpu-pytorch/bin/activate + # echo "Installing basic packages" + # python -m pip install -U pip + # # python -m pip install "torch~=2.2.0" "torch_xla[tpu]~=2.2.0" -f https://storage.googleapis.com/libtpu-releases/index.html numpy + # echo "Testing XLA installation..." + # python -c "import torch_xla.core.xla_model as xm; assert xm.xla_device().type == 'xla', 'XLA device not available'" + # - name: Run TGI server python tests + # run: | + # # source venv-tpu-pytorch/bin/activate + # make tgi_test