GPU Benchmarks #22

Workflow file for this run

.github/workflows/gpu_benchmarks.yml at 0431bf6

	# Copyright 2025 The OpenXLA Authors. All Rights Reserved.
	#
	# Licensed under the Apache License, Version 2.0 (the "License");
	# you may not use this file except in compliance with the License.
	# You may obtain a copy of the License at
	#
	# http://www.apache.org/licenses/LICENSE-2.0
	#
	# Unless required by applicable law or agreed to in writing, software
	# distributed under the License is distributed on an "AS IS" BASIS,
	# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
	# See the License for the specific language governing permissions and
	# limitations under the License.
	# ============================================================================
	name: GPU Benchmarks
	permissions:
	contents: read
	on:
	workflow_dispatch: # Allows manual triggering
	schedule:
	- cron: '0 /6 * *' # Run every 6 hours (at minute 0 of hours 0, 6, 12, 18)

	jobs:
	Tests:
	strategy:
	# Don't fail fast - want to see results for all builds even if one fails.
	fail-fast: false
	matrix:
	job_info: [
	{
	os: "linux-x86-g2-48-l4-4gpu",
	container: "us-central1-docker.pkg.dev/tensorflow-sigs/tensorflow/ml-build:latest",
	pretty_name: "Linux X86 runner with 4 NVIDIA L4 GPUs",
	},
	# Expect more GPU types in the future.
	]
	name: ${{ matrix.job_info.pretty_name }}
	runs-on: ${{ matrix.job_info.os }}
	container: ${{ matrix.job_info.container }}
	defaults:
	run:
	shell: bash
	timeout-minutes: 360
	steps:
	- name: Checkout XLA
	uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11 # v4.1.1

	- name: Print machine specs
	run: \|
	nvidia-smi
	free -h # Memory information
	df -h # Disk space information
	uname -a # Kernel information

	- name: Create results directory
	run: mkdir -p results

	- name: Configure XLA for GPU backend
	run: \|
	./configure.py --backend CUDA --nccl

	- name: Set TF_CPP_MAX_VLOG_LEVEL
	env:
	TF_CPP_MAX_VLOG_LEVEL: 1
	run: \|
	echo "TF_CPP_MAX_VLOG_LEVEL is: $TF_CPP_MAX_VLOG_LEVEL"

	- name: Build hlo_runner_main
	run: bazel build -c opt --config=cuda --dynamic_mode=off //xla/tools/multihost_hlo_runner:hlo_runner_main

	# TODO(juliagmt): Add more performance-critical HLOs to benchmark.
	- name: Run hlo_opt and generate xspace.pb
	run: \|
	./bazel-bin/xla/tools/multihost_hlo_runner/hlo_runner_main --device_type=gpu --log_output=True --use_spmd_partitioning --xla_gpu_dump_xspace_to=results/xspace.pb xla/tools/hlo_opt/tests/gpu_hlo_pass.hlo

	- name: Compute the cost of gpu_hlo_pass.hlo
	run: \|
	PWD=$(pwd)
	bazel run //xla/tools:compute_cost -- --input=$PWD/xla/tools/hlo_opt/tests/gpu_hlo_pass.hlo --format=hlo --gpu

	- name: Checkout juliagmt-google/xla
	uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11 # v4.1.1
	with:
	repository: juliagmt-google/xla
	path: juliagmt-google-xla

	- name: Compute the device stats of gpu_hlo_pass.hlo
	env:
	XSPACE_PATH: ${{ github.workspace }}/results/xspace.pb
	run: \|
	bazel run //xla/tools:get_device_stats_main -- --input="$XSPACE_PATH"
	working-directory: juliagmt-google-xla

	- name: Upload XSpace
	uses: actions/upload-artifact@65c4c4a1ddee5b72f698fdd19549f0f0fb45cf08 # v4.6.0
	with:
	name: gpu-xla-benchmarks-xspace
	path: results/xspace.pb

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

GPU Benchmarks #22

Workflow file

GPU Benchmarks #22

Jobs

Run details

Workflow file for this run