GPU Benchmarks #22
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Copyright 2025 The OpenXLA Authors. All Rights Reserved. | |
# | |
# Licensed under the Apache License, Version 2.0 (the "License"); | |
# you may not use this file except in compliance with the License. | |
# You may obtain a copy of the License at | |
# | |
# http://www.apache.org/licenses/LICENSE-2.0 | |
# | |
# Unless required by applicable law or agreed to in writing, software | |
# distributed under the License is distributed on an "AS IS" BASIS, | |
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |
# See the License for the specific language governing permissions and | |
# limitations under the License. | |
# ============================================================================ | |
name: GPU Benchmarks | |
permissions: | |
contents: read | |
on: | |
workflow_dispatch: # Allows manual triggering | |
schedule: | |
- cron: '0 */6 * * *' # Run every 6 hours (at minute 0 of hours 0, 6, 12, 18) | |
jobs: | |
Tests: | |
strategy: | |
# Don't fail fast - want to see results for all builds even if one fails. | |
fail-fast: false | |
matrix: | |
job_info: [ | |
{ | |
os: "linux-x86-g2-48-l4-4gpu", | |
container: "us-central1-docker.pkg.dev/tensorflow-sigs/tensorflow/ml-build:latest", | |
pretty_name: "Linux X86 runner with 4 NVIDIA L4 GPUs", | |
}, | |
# Expect more GPU types in the future. | |
] | |
name: ${{ matrix.job_info.pretty_name }} | |
runs-on: ${{ matrix.job_info.os }} | |
container: ${{ matrix.job_info.container }} | |
defaults: | |
run: | |
shell: bash | |
timeout-minutes: 360 | |
steps: | |
- name: Checkout XLA | |
uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11 # v4.1.1 | |
- name: Print machine specs | |
run: | | |
nvidia-smi | |
free -h # Memory information | |
df -h # Disk space information | |
uname -a # Kernel information | |
- name: Create results directory | |
run: mkdir -p results | |
- name: Configure XLA for GPU backend | |
run: | | |
./configure.py --backend CUDA --nccl | |
- name: Set TF_CPP_MAX_VLOG_LEVEL | |
env: | |
TF_CPP_MAX_VLOG_LEVEL: 1 | |
run: | | |
echo "TF_CPP_MAX_VLOG_LEVEL is: $TF_CPP_MAX_VLOG_LEVEL" | |
- name: Build hlo_runner_main | |
run: bazel build -c opt --config=cuda --dynamic_mode=off //xla/tools/multihost_hlo_runner:hlo_runner_main | |
# TODO(juliagmt): Add more performance-critical HLOs to benchmark. | |
- name: Run hlo_opt and generate xspace.pb | |
run: | | |
./bazel-bin/xla/tools/multihost_hlo_runner/hlo_runner_main --device_type=gpu --log_output=True --use_spmd_partitioning --xla_gpu_dump_xspace_to=results/xspace.pb xla/tools/hlo_opt/tests/gpu_hlo_pass.hlo | |
- name: Compute the cost of gpu_hlo_pass.hlo | |
run: | | |
PWD=$(pwd) | |
bazel run //xla/tools:compute_cost -- --input=$PWD/xla/tools/hlo_opt/tests/gpu_hlo_pass.hlo --format=hlo --gpu | |
- name: Checkout juliagmt-google/xla | |
uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11 # v4.1.1 | |
with: | |
repository: juliagmt-google/xla | |
path: juliagmt-google-xla | |
- name: Compute the device stats of gpu_hlo_pass.hlo | |
env: | |
XSPACE_PATH: ${{ github.workspace }}/results/xspace.pb | |
run: | | |
bazel run //xla/tools:get_device_stats_main -- --input="$XSPACE_PATH" | |
working-directory: juliagmt-google-xla | |
- name: Upload XSpace | |
uses: actions/upload-artifact@65c4c4a1ddee5b72f698fdd19549f0f0fb45cf08 # v4.6.0 | |
with: | |
name: gpu-xla-benchmarks-xspace | |
path: results/xspace.pb |