diff --git a/.github/workflows/v2-bisection.yml b/.github/workflows/v2-bisection.yml index f1452dc9a0..75ace9d7d6 100644 --- a/.github/workflows/v2-bisection.yml +++ b/.github/workflows/v2-bisection.yml @@ -11,15 +11,10 @@ jobs: bisection: environment: docker-s3-upload env: - BISECT_CONDA_ENV: "bisection-ci-v2" + CONDA_ENV_NAME: "bisection-ci-v2" BISECT_DIR: ".torchbench/v2-bisection-ci" BISECT_BRANCH: "v2.0" - PYTHON_VER: "3.10" - CUDA_VER: "12.1" - NUMPY_VER: "1.21.2" - CMAKE_VER: "3.26" - MKL_VER: "2021.2.0" - MAGMA_VER: "magma-cuda121" + SETUP_SCRIPT: "/data/nvme/bin/setup_instance.sh" if: ${{ github.repository_owner == 'pytorch' }} runs-on: [self-hosted, bm-runner] timeout-minutes: 2880 # 48 hours @@ -31,32 +26,27 @@ jobs: - name: Create conda environment run: | set -x - conda create -y -n "${BISECT_CONDA_ENV}" python="${PYTHON_VER}" - . activate "${BISECT_CONDA_ENV}" - . /data/nvme/bin/setup_instance.sh - conda install -y numpy="${NUMPY_VER}" mkl="${MKL_VER}" mkl-include="${MKL_VER}" \ - requests ninja pyyaml setuptools cffi sympy ffmpeg \ - typing_extensions future six dataclasses tabulate gitpython git-lfs tqdm regex - # install cmake 3.26 from conda-forge, cmake > 3.24 is required by torchaudio - conda install -y cmake="${CMAKE_VER}" -c conda-forge - # Install magma - conda install -y -c pytorch "${MAGMA_VER}" + python3 ./utils/python_utils.py --create-conda-env ${CONDA_ENV_NAME} + . activate "${CONDA_ENV_NAME}" + . "${SETUP_SCRIPT}" + python utils/cuda_utils.py --install-torch-build-deps + python utils/cuda_utils.py --install-torchbench-deps - name: Bisection run: | + . activate "${CONDA_ENV_NAME}" + . "${SETUP_SCRIPT}" export BISECT_ISSUE="${{ github.event.inputs.issue_name }}" export BISECT_BASE="${HOME}/${BISECT_DIR}/${BISECT_ISSUE}" export TORCHBENCH_SRC_DIR="${PWD}" - . activate "$BISECT_CONDA_ENV" - . /data/nvme/bin/setup_instance.sh bash ./.github/scripts/run-bisection.sh # Update the result json symbolic link ln -sf "${BISECT_BASE}/gh${GITHUB_RUN_ID}/result.json" "${BISECT_BASE}/result.json" - name: Analyze bisection result run: | + . activate "${CONDA_ENV_NAME}" + . "${SETUP_SCRIPT}" export BISECT_ISSUE="${{ github.event.inputs.issue_name }}" export BISECT_BASE="${HOME}/${BISECT_DIR}/${BISECT_ISSUE}" - . activate "$BISECT_CONDA_ENV" - . /data/nvme/bin/setup_instance.sh python ./.github/scripts/bmutils/analyze-bisection-result.py --bisection-root "${BISECT_BASE}" --gh-workflow-id "${GITHUB_RUN_ID}" cp -r "${BISECT_BASE}" ./bisection-result - name: Create the github issue diff --git a/.github/workflows/v2-nightly.yml b/.github/workflows/v2-nightly.yml index 52f3044ecc..8fc40295f5 100644 --- a/.github/workflows/v2-nightly.yml +++ b/.github/workflows/v2-nightly.yml @@ -10,19 +10,16 @@ jobs: env: TORCHBENCH_VER: "v2" CONFIG_VER: "v2" - PYTHON_VER: "3.10" - CUDA_VER: "12.1" - MAGMA_VERSION: "magma-cuda121" CONDA_ENV_NAME: "torchbench-v2-nightly-ci" OUTPUT_DIR: ".torchbench/v2-nightly-ci" BISECTION_ROOT: ".torchbench/v2-bisection-ci" - CUDA_VERSION: "cu121" SCRIBE_GRAPHQL_ACCESS_TOKEN: ${{ secrets.SCRIBE_GRAPHQL_ACCESS_TOKEN }} AWS_ACCESS_KEY_ID: ${{ secrets.AWS_ACCESS_KEY_ID }} AWS_SECRET_ACCESS_KEY: ${{ secrets.AWS_SECRET_ACCESS_KEY }} IS_GHA: 1 AWS_DEFAULT_REGION: us-east-1 BUILD_ENVIRONMENT: benchmark-nightly + SETUP_SCRIPT: "/data/nvme/bin/setup_instance.sh" if: ${{ github.repository_owner == 'pytorch' }} runs-on: [self-hosted, bm-runner] steps: @@ -32,41 +29,29 @@ jobs: ref: v2.0 - name: Create conda env run: | - conda create -y -q --name "${CONDA_ENV_NAME}" python=${{ env.PYTHON_VER }} + python3 ./utils/python_utils.py --create-conda-env ${CONDA_ENV_NAME} - name: Install PyTorch nightly run: | . activate "${CONDA_ENV_NAME}" - . /data/nvme/bin/setup_instance.sh - # Install dependencies - pip install requests bs4 argparse gitpython boto3 regex - # Check if nightly builds are available - NIGHTLIES=$(python torchbenchmark/util/torch_nightly.py --packages torch) - # If failed, the script will generate empty result - if [ -z $NIGHTLIES ]; then - echo "Torch nightly build failed. Cancel the workflow." - exit 1 - fi - # Install magma - conda install -y -c pytorch "${MAGMA_VERSION}" - # Install PyTorch nightly from pip - pip install --no-cache-dir --pre torch torchvision torchaudio --index-url \ - https://download.pytorch.org/whl/nightly/${CUDA_VERSION} - - name: Install other TorchBench dependencies + . "${SETUP_SCRIPT}" + python utils/cuda_utils.py --install-torch-deps + python utils/cuda_utils.py --install-torch-nightly + - name: Install Torchbench models run: | . activate "${CONDA_ENV_NAME}" - . /data/nvme/bin/setup_instance.sh - conda install -y git-lfs + . "${SETUP_SCRIPT}" python install.py - name: Run benchmark run: | . activate "${CONDA_ENV_NAME}" - . /data/nvme/bin/setup_instance.sh + . "${SETUP_SCRIPT}" WORKFLOW_HOME="${HOME}/${{ env.OUTPUT_DIR }}/gh${GITHUB_RUN_ID}" bash ./.github/scripts/run.sh "${WORKFLOW_HOME}" - name: Generate the bisection config run: | set -x . activate "${CONDA_ENV_NAME}" + . "${SETUP_SCRIPT}" WORKFLOW_HOME="${HOME}/${{ env.OUTPUT_DIR }}/gh${GITHUB_RUN_ID}" mkdir -p benchmark-output/ # Update the self-hosted pytorch version diff --git a/.github/workflows/v2-sweep.yml b/.github/workflows/v2-sweep.yml deleted file mode 100644 index d773c355ae..0000000000 --- a/.github/workflows/v2-sweep.yml +++ /dev/null @@ -1,90 +0,0 @@ -name: TorchBench V2 sweep -on: - workflow_dispatch: - inputs: - sweep_name: - description: "Sweep job name" - required: true - default: "sweep-example" - upload_result: - description: "Upload result" - required: true - default: "no" - -jobs: - run-benchmark: - environment: docker-s3-upload - env: - TORCHBENCH_VER: "v2" - CONFIG_VER: "v2" - SWEEP_DIR: ".torchbench/v2-sweep-ci" - PYTHON_VER: "3.10" - CUDA_VER: "12.1" - MAGMA_VERSION: "magma-cuda121" - AWS_ACCESS_KEY_ID: ${{ secrets.AWS_ACCESS_KEY_ID }} - AWS_SECRET_ACCESS_KEY: ${{ secrets.AWS_SECRET_ACCESS_KEY }} - AWS_DEFAULT_REGION: us-east-1 - BUILD_ENVIRONMENT: benchmark-sweep - IS_GHA: 1 - if: ${{ github.repository_owner == 'pytorch' }} - runs-on: [self-hosted, bm-runner] - timeout-minutes: 2880 # 48 hours - steps: - - name: Checkout - uses: actions/checkout@v3 - with: - ref: v2.0 - - name: Run sweep job - run: | - SWEEP_JOB=${{ github.event.inputs.sweep_name }} - SWEEP_JOB_ROOT="${HOME}/${SWEEP_DIR}/${SWEEP_JOB}" - SWEEP_JOB_OUTPUT="${SWEEP_JOB_ROOT}/results/gh${GITHUB_RUN_ID}" - for CONFIG in ${SWEEP_JOB_ROOT}/configs/*.txt; do - echo "Running config $CONFIG ..." - # Create a new conda env - CONFIG_BASE=$(basename ${CONFIG}) - CONDA_ENV_NAME=$(echo "${CONFIG_BASE}" | sed 's/[^-]*-\(.*\)\.txt/\1/') - conda create -y -q --name ${CONDA_ENV_NAME} python=${PYTHON_VER} - . activate ${CONDA_ENV_NAME} - . /data/nvme/bin/setup_instance.sh - conda install -y git-lfs - # Install magma - conda install -y -c pytorch "${MAGMA_VERSION}" - pip install -r "${CONFIG}" - python install.py - bash .github/scripts/run.sh "${SWEEP_JOB_OUTPUT}" - # Remove the conda env - conda deactivate - conda env remove --name ${CONDA_ENV_NAME} - done - echo "Finished running tasks" - - name: Upload result - run: | - UPLOAD_COND=${{ github.event.inputs.upload_result }} - # Quit if upload is not specified - if [ "$UPLOAD_COND" != "yes" ]; then - exit 0 - fi - # Otherwise, continue upload - SWEEP_JOB=${{ github.event.inputs.sweep_name }} - SWEEP_JOB_ROOT="${HOME}/${SWEEP_DIR}/${SWEEP_JOB}" - SWEEP_JOB_OUTPUT="${SWEEP_JOB_ROOT}/results/gh${GITHUB_RUN_ID}" - CONDA_ENV_NAME=sweep-ci - conda create -y -q --name ${CONDA_ENV_NAME} python=${PYTHON_VER} - . activate ${CONDA_ENV_NAME} - pip install -r requirements.txt - pip install boto3 - mkdir -p "${SWEEP_JOB_OUTPUT}/scores" - for RESULT in ${SWEEP_JOB_OUTPUT}/*.json; do - # Upload when the file is non-empty - if [ -s $RESULT ]; then - # Generate score file - RESULT_BASENAME=$(basename "${RESULT}") - SCORE_FILE="${SWEEP_JOB_OUTPUT}/scores/${RESULT_BASENAME}.score.json" - python compute_score.py --score_version "${CONFIG_VER}" --benchmark_data_file "${RESULT}" > "${SCORE_FILE}" - # Upload score - python scripts/upload_scribe_${CONFIG_VER}.py --pytest_bench_json "${RESULT}" --torchbench_score_file "${SCORE_FILE}" - fi - done - conda deactivate - conda env remove --name ${CONDA_ENV_NAME}