[DO NOT MERGE] TESTING GITHUB ACTIONS PIPELINE FOR PYTHON UPDATE #12400
Workflow file for this run
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# This workflow will install Python dependencies, run tests and lint with a variety of Python versions | ||
# For more information see: https://help.github.com/actions/language-and-framework-guides/using-python-with-github-actions | ||
name: pytest | ||
on: | ||
push: | ||
branches: ["master", "release-*"] | ||
pull_request: | ||
branches: ["master", "release-*"] | ||
# We want an ongoing run of this workflow to be canceled by a later commit | ||
# so that there is only one concurrent run of this workflow for each branch | ||
concurrency: | ||
group: pytest-${{ github.head_ref || github.sha }} | ||
cancel-in-progress: true | ||
jobs: | ||
pytest: | ||
runs-on: ${{ matrix.os }} | ||
strategy: | ||
fail-fast: false | ||
matrix: | ||
os: [ubuntu-latest] | ||
#python-version: ["3.10", "3.11", "3.12"] | ||
python-version: ["3.11"] | ||
test-markers: ["not distributed"] #["not distributed", "distributed"] | ||
env: | ||
MARKERS: ${{ matrix.test-markers }} | ||
AWS_ACCESS_KEY_ID: ${{ secrets.LUDWIG_TESTS_AWS_ACCESS_KEY_ID }} | ||
AWS_SECRET_ACCESS_KEY: ${{ secrets.LUDWIG_TESTS_AWS_SECRET_ACCESS_KEY }} | ||
KAGGLE_USERNAME: ${{ secrets.KAGGLE_USERNAME }} | ||
KAGGLE_KEY: ${{ secrets.KAGGLE_KEY }} | ||
IS_NOT_FORK: ${{ !(github.event.pull_request.base.repo.full_name == 'ludwig-ai/ludwig' && github.event.pull_request.head.repo.fork) }} | ||
name: py${{ matrix.python-version }}, ${{ matrix.test-markers }}, ${{ matrix.os }} | ||
services: | ||
minio: | ||
image: fclairamb/minio-github-actions | ||
env: | ||
MINIO_ACCESS_KEY: minio | ||
MINIO_SECRET_KEY: minio123 | ||
ports: | ||
- 9000:9000 | ||
timeout-minutes: 150 | ||
steps: | ||
- name: Setup ludwigai/ludwig-ray container for local testing with act. | ||
if: ${{ env.ACT }} | ||
run: | | ||
curl -fsSL https://deb.nodesource.com/setup_16.x | sudo -E bash - | ||
sudo apt-get install -y nodejs | ||
sudo mkdir -p /opt/hostedtoolcache/ | ||
sudo chmod 777 -R /opt/hostedtoolcache/ | ||
- uses: actions/checkout@v2 | ||
- name: Set up Python ${{ matrix.python-version }} | ||
uses: actions/setup-python@v2 | ||
with: | ||
python-version: ${{ matrix.python-version }} | ||
- name: Setup Linux | ||
if: runner.os == 'linux' | ||
run: | | ||
sudo apt-get update && sudo apt-get install -y build-essential cmake liblapack-dev gfortran libsndfile1 wget libsox-dev libopenblas-dev | ||
- name: Download longintrepr.h | ||
run: | | ||
sudo mkdir -p /usr/include/python3.11 | ||
sudo curl -o /usr/include/python3.11/longintrepr.h https://raw.githubusercontent.com/python/cpython/refs/heads/main/Include/cpython/longintrepr.h | ||
# - name: Install GPy | ||
# run: | | ||
# python -m pip install -U pip | ||
# pip install Cython==0.29.35 | ||
# pip install GPy==1.10.0 | ||
- name: Setup macOS | ||
if: runner.os == 'macOS' | ||
run: | | ||
brew install libuv | ||
- name: pip cache | ||
if: ${{ !env.ACT }} | ||
uses: actions/cache@v2 | ||
with: | ||
path: ~/.cache/pip | ||
key: ${{ runner.os }}-pip-py${{ matrix.python-version }}-${{ matrix.test-markers }}-${{ hashFiles('.github/workflows/pytest.yml') }} | ||
- name: Debug out of space | ||
run: | | ||
du -h -d 1 ~ | ||
df -h | ||
- name: Install dependencies | ||
run: | | ||
python --version | ||
pip --version | ||
python -m pip install -U pip | ||
cmake --version | ||
# pip install --prefer-binary . | ||
pip list | ||
shell: bash | ||
# - name: Incremental dependency installation | ||
# run: | | ||
# pip install pytest | ||
# echo "Installed pytest successfully." | ||
# pip install pytest-timeout | ||
# echo "Installed pytest-timeout successfully." | ||
# pip install pytest-cov | ||
# echo "Installed pytest-cov successfully." | ||
# pip install tifffile | ||
# echo "Installed tifffile successfully." | ||
# pip install wget | ||
# echo "Installed wget successfully." | ||
# pip install six>=1.13.0 | ||
# echo "Installed six>=1.13.0 successfully." | ||
# pip install aim | ||
# echo "Installed aim successfully." | ||
# pip install wandb | ||
# echo "Installed wandb successfully." | ||
# pip install comet_ml | ||
# echo "Installed comet_ml successfully." | ||
# pip install mlflow | ||
# echo "Installed mlflow successfully." | ||
# pip install "sqlalchemy<2" | ||
# echo "Installed sqlalchemy<2 successfully." | ||
# pip install hpbandster | ||
# echo "Installed hpbandster successfully." | ||
# pip install ConfigSpace==0.7.1 | ||
# echo "Installed ConfigSpace==0.7.1 successfully." | ||
# pip install flaml[blendsearch] | ||
# echo "Installed flaml[blendsearch] successfully." | ||
# pip install HEBO | ||
# echo "Installed HEBO successfully." | ||
# pip install nevergrad | ||
# echo "Installed nevergrad successfully." | ||
# pip install zoopt | ||
# echo "Installed zoopt successfully." | ||
# pip install s3fs>=2022.8.2 | ||
# echo "Installed s3fs>=2022.8.2 successfully." | ||
# shell: bash | ||
# - name: Install distributed, explain, and extra dependencies | ||
# run: | | ||
# # Distributed dependencies | ||
# pip install awscli | ||
# echo "Installed distributed dependency: awscli successfully." | ||
# pip install "dask[dataframe]<2023.4.0" | ||
# echo "Installed distributed dependency: dask[dataframe]<2023.4.0 successfully." | ||
# pip install "deepspeed!=0.11.0,<0.13.0" | ||
# echo "Installed distributed dependency: deepspeed!=0.11.0,<0.13.0 successfully." | ||
# pip install "getdaft[ray]==0.1.20" | ||
# echo "Installed distributed dependency: getdaft[ray]==0.1.20 successfully." | ||
# pip install GPUtil | ||
# echo "Installed distributed dependency: GPUtil successfully." | ||
# pip install pyarrow | ||
# echo "Installed distributed dependency: pyarrow successfully." | ||
# pip install "ray[default,data,serve,tune]==2.3.1" | ||
# echo "Installed distributed dependency: ray[default,data,serve,tune]==2.3.1 successfully." | ||
# pip install tblib | ||
# echo "Installed distributed dependency: tblib successfully." | ||
# pip install "tensorboardX<2.3" | ||
# echo "Installed distributed dependency: tensorboardX<2.3 successfully." | ||
# # Explain dependencies | ||
# pip install captum | ||
# echo "Installed explain dependency: captum successfully." | ||
# # Extra dependencies | ||
# pip install "horovod[pytorch]>=0.24.0,!=0.26.0" | ||
# echo "Installed extra dependency: horovod[pytorch]>=0.24.0,!=0.26.0 successfully." | ||
# pip install "modin[ray]" | ||
# echo "Installed extra dependency: modin[ray] successfully." | ||
# pip install "predibase>=2023.10.2" | ||
# echo "Installed extra dependency: predibase>=2023.10.2 successfully." | ||
# - name: Install hyperopt, llm, serve, tree, and viz dependencies | ||
# run: | | ||
# # Hyperopt dependencies | ||
# pip install hyperopt | ||
# echo "Installed hyperopt successfully." | ||
# pip install "ray[default,tune]>=2.0.0" | ||
# echo "Installed hyperopt dependency: ray[default,tune]>=2.0.0 successfully." | ||
# # LLM dependencies | ||
# pip install accelerate | ||
# echo "Installed llm dependency: accelerate successfully." | ||
# pip install faiss-cpu | ||
# echo "Installed llm dependency: faiss-cpu successfully." | ||
# pip install loralib | ||
# echo "Installed llm dependency: loralib successfully." | ||
# pip install "peft>=0.10.0" | ||
# echo "Installed llm dependency: peft>=0.10.0 successfully." | ||
# pip install sentence-transformers | ||
# echo "Installed llm dependency: sentence-transformers successfully." | ||
# # Serve dependencies | ||
# pip install cartonml-nightly | ||
# echo "Installed serve dependency: cartonml-nightly successfully." | ||
# pip install fastapi | ||
# echo "Installed serve dependency: fastapi successfully." | ||
# pip install httpx | ||
# echo "Installed serve dependency: httpx successfully." | ||
# pip install "neuropod==0.3.0rc6 ; platform_system != 'Windows' and python_version < '3.9'" | ||
# echo "Installed serve dependency: neuropod==0.3.0rc6 successfully (if applicable)." | ||
# pip install python-multipart | ||
# echo "Installed serve dependency: python-multipart successfully." | ||
# pip install uvicorn | ||
# echo "Installed serve dependency: uvicorn successfully." | ||
# pip install starlette | ||
# echo "Installed serve dependency: starlette successfully." | ||
# # Tree dependencies | ||
# pip install "hummingbird-ml>=0.4.8" | ||
# echo "Installed tree dependency: hummingbird-ml>=0.4.8 successfully." | ||
# pip install lightgbm | ||
# echo "Installed tree dependency: lightgbm successfully." | ||
# pip install lightgbm-ray | ||
# echo "Installed tree dependency: lightgbm-ray successfully." | ||
# # Viz dependencies | ||
# pip install hiplot | ||
# echo "Installed viz dependency: hiplot successfully." | ||
# pip install "matplotlib==3.9.3" | ||
# echo "Installed viz dependency: matplotlib==3.9.3 successfully." | ||
# pip install ptitprince | ||
# echo "Installed viz dependency: ptitprince successfully." | ||
# pip install "seaborn>=0.7,<0.12" | ||
# echo "Installed viz dependency: seaborn>=0.7,<0.12 successfully." | ||
- name: Test install all | ||
run : | | ||
pip install .[dev,test,benchmarking,distributed,explain,extra,hyperopt,llm,serve,tree,viz] | ||
- name: Debug dependency tree | ||
run: | | ||
python -m pip install pipdeptree | ||
pipdeptree > dependency-tree.txt | ||
cat dependency-tree.txt | ||
shell: bash | ||
- name: Unit Tests | ||
run: | | ||
RUN_PRIVATE=$IS_NOT_FORK LUDWIG_TEST_SUITE_TIMEOUT_S=5400 pytest -v --timeout 300 --durations 100 -m "$MARKERS and not slow and not combinatorial and not horovod and not llm" --junitxml pytest.xml tests/ludwig | ||
- name: Regression Tests | ||
run: | | ||
RUN_PRIVATE=$IS_NOT_FORK LUDWIG_TEST_SUITE_TIMEOUT_S=5400 pytest -v --timeout 300 --durations 100 -m "$MARKERS and not slow and not combinatorial and not horovod or benchmark and not llm" --junitxml pytest.xml tests/regression_tests | ||
# Skip Horovod and replace with DDP. | ||
# https://github.com/ludwig-ai/ludwig/issues/3468 | ||
# - name: Install Horovod if necessary | ||
# if: matrix.test-markers == 'distributed' && matrix.pytorch-version != 'nightly' | ||
# env: | ||
# HOROVOD_WITH_PYTORCH: 1 | ||
# HOROVOD_WITHOUT_MPI: 1 | ||
# HOROVOD_WITHOUT_TENSORFLOW: 1 | ||
# HOROVOD_WITHOUT_MXNET: 1 | ||
# run: | | ||
# pip install -r requirements_extra.txt | ||
# HOROVOD_BUILT=$(python -c "import horovod.torch; horovod.torch.nccl_built(); print('SUCCESS')" || true) | ||
# if [[ $HOROVOD_BUILT != "SUCCESS" ]]; then | ||
# pip uninstall -y horovod | ||
# pip install --no-cache-dir git+https://github.com/horovod/horovod.git@master | ||
# fi | ||
# horovodrun --check-build | ||
# shell: bash | ||
# Skip Horovod tests and replace with DDP. | ||
# https://github.com/ludwig-ai/ludwig/issues/3468 | ||
# - name: Horovod Tests | ||
# if: matrix.test-markers == 'distributed' && matrix.pytorch-version != 'nightly' | ||
# run: | | ||
# RUN_PRIVATE=$IS_NOT_FORK LUDWIG_TEST_SUITE_TIMEOUT_S=5400 pytest -v --timeout 300 --durations 100 -m "$MARKERS and horovod and not slow and not combinatorial and not llm" --junitxml pytest.xml tests/ | ||
- name: Upload Unit Test Results | ||
if: ${{ always() && !env.ACT }} | ||
uses: actions/upload-artifact@v4 | ||
with: | ||
name: Unit Test Results (Python ${{ matrix.python-version }} ${{ matrix.test-markers }}) | ||
path: pytest-${{ matrix.python-version }}-${{ matrix.test-markers }}.xml | ||
# integration-tests: | ||
# name: ${{ matrix.test-markers }} | ||
# runs-on: ubuntu-latest | ||
# strategy: | ||
# fail-fast: false | ||
# matrix: | ||
# test-markers: | ||
# - "integration_tests_a" | ||
# - "integration_tests_b" | ||
# - "integration_tests_c" | ||
# - "integration_tests_d" | ||
# - "integration_tests_e" | ||
# - "integration_tests_f" | ||
# env: | ||
# AWS_ACCESS_KEY_ID: ${{ secrets.LUDWIG_TESTS_AWS_ACCESS_KEY_ID }} | ||
# AWS_SECRET_ACCESS_KEY: ${{ secrets.LUDWIG_TESTS_AWS_SECRET_ACCESS_KEY }} | ||
# KAGGLE_USERNAME: ${{ secrets.KAGGLE_USERNAME }} | ||
# KAGGLE_KEY: ${{ secrets.KAGGLE_KEY }} | ||
# IS_NOT_FORK: ${{ !(github.event.pull_request.base.repo.full_name == 'ludwig-ai/ludwig' && github.event.pull_request.head.repo.fork) }} | ||
# MARKERS: ${{ matrix.test-markers }} | ||
# services: | ||
# minio: | ||
# image: fclairamb/minio-github-actions | ||
# env: | ||
# MINIO_ACCESS_KEY: minio | ||
# MINIO_SECRET_KEY: minio123 | ||
# ports: | ||
# - 9000:9000 | ||
# timeout-minutes: 90 | ||
# steps: | ||
# - uses: actions/checkout@v2 | ||
# - name: Set up Python 3.10 | ||
# uses: actions/setup-python@v2 | ||
# with: | ||
# python-version: "3.10" | ||
# - name: Setup Linux | ||
# if: runner.os == 'linux' | ||
# run: | | ||
# sudo apt-get update && sudo apt-get install -y cmake libsndfile1 | ||
# - name: Setup macOS | ||
# if: runner.os == 'macOS' | ||
# run: | | ||
# brew install libuv | ||
# - name: Install dependencies | ||
# run: | | ||
# python --version | ||
# pip --version | ||
# python -m pip install -U pip | ||
# # remove torch and ray from the dependencies so we can add them depending on the matrix args for the job. | ||
# cat requirements.txt | sed '/^torch[>=<\b]/d' | sed '/^torchtext/d' | sed '/^torchvision/d' | sed '/^torchaudio/d' > requirements-temp && mv requirements-temp requirements.txt | ||
# cat requirements_distributed.txt | sed '/^ray[\[]/d' | ||
# pip install torch==2.0.0 torchtext torchvision torchaudio | ||
# pip install ray==2.3.0 | ||
# pip install '.[test]' | ||
# pip list | ||
# shell: bash | ||
# - name: Free Disk Space (Ubuntu) | ||
# uses: jlumbroso/free-disk-space@main | ||
# with: | ||
# tool-cache: false | ||
# android: true | ||
# dotnet: true | ||
# haskell: true | ||
# large-packages: false | ||
# docker-images: true | ||
# swap-storage: true | ||
# - name: Clean out /tmp directory | ||
# run: | | ||
# sudo rm -rf /tmp/* | ||
# - name: Integration Tests | ||
# run: | | ||
# RUN_PRIVATE=$IS_NOT_FORK LUDWIG_TEST_SUITE_TIMEOUT_S=7200 pytest -v --timeout 300 --durations 100 -m "not slow and not combinatorial and not horovod and not llm and $MARKERS" --junitxml pytest.xml tests/integration_tests | ||
# llm-tests: | ||
# name: LLM Tests | ||
# runs-on: ubuntu-latest | ||
# timeout-minutes: 60 | ||
# steps: | ||
# - uses: actions/checkout@v2 | ||
# - name: Set up Python 3.9 | ||
# uses: actions/setup-python@v2 | ||
# with: | ||
# python-version: 3.9 | ||
# - name: Setup Linux | ||
# if: runner.os == 'linux' | ||
# run: | | ||
# sudo apt-get update && sudo apt-get install -y cmake libsndfile1 | ||
# - name: Setup macOS | ||
# if: runner.os == 'macOS' | ||
# run: | | ||
# brew install libuv | ||
# - name: Install dependencies | ||
# run: | | ||
# python --version | ||
# pip --version | ||
# python -m pip install -U pip | ||
# # remove torch and ray from the dependencies so we can add them depending on the matrix args for the job. | ||
# cat requirements.txt | sed '/^torch[>=<\b]/d' | sed '/^torchtext/d' | sed '/^torchvision/d' | sed '/^torchaudio/d' > requirements-temp && mv requirements-temp requirements.txt | ||
# cat requirements_distributed.txt | sed '/^ray[\[]/d' | ||
# pip install torch==2.0.0 torchtext torchvision torchaudio | ||
# pip install ray==2.3.0 | ||
# pip install '.[test]' | ||
# pip list | ||
# shell: bash | ||
# - name: LLM Tests | ||
# run: | | ||
# pytest -vs --durations 100 -m "llm" --junitxml pytest.xml tests | ||
#COMMENTED OUT COMBINATORIAL TEST ######################################## | ||
# combinatorial-tests: | ||
# name: Combinatorial Tests | ||
# runs-on: ubuntu-latest | ||
# timeout-minutes: 60 | ||
# steps: | ||
# - uses: actions/checkout@v2 | ||
# - name: Set up Python 3.11 | ||
# uses: actions/setup-python@v5 | ||
# with: | ||
# python-version: "3.11" | ||
# - name: Setup Linux | ||
# if: runner.os == 'linux' | ||
# run: | | ||
# sudo apt-get update && sudo apt-get install -y cmake libsndfile1 | ||
# - name: Setup macOS | ||
# if: runner.os == 'macOS' | ||
# run: | | ||
# brew install libuv | ||
# - name: Install dependencies | ||
# run: | | ||
# python --version | ||
# pip --version | ||
# python -m pip install -U pip | ||
# pip install '.[test]' | ||
# pip list | ||
# shell: bash | ||
# - name: Testing combinatorial config generation code | ||
# run: | | ||
# pytest -vs --durations 100 -m "combinatorial" --junitxml pytest.xml tests/ludwig/config_sampling | ||
# - name: Combinatorial Tests | ||
# run: | | ||
# pytest -rx --durations 100 -m "combinatorial" --junitxml pytest.xml tests/training_success | ||
#COMMENTED OUT COMBINATORIAL TEST ######################################## | ||
test-minimal-install: | ||
name: Test Minimal Install | ||
runs-on: ubuntu-latest | ||
timeout-minutes: 15 | ||
steps: | ||
- uses: actions/checkout@v2 | ||
- name: Set up Python 3.11 | ||
uses: actions/setup-python@v2 | ||
with: | ||
python-version: "3.11" | ||
- name: Setup Linux | ||
if: runner.os == 'linux' | ||
run: | | ||
sudo apt-get update && sudo apt-get install -y cmake libsndfile1 | ||
- name: Setup macOS | ||
if: runner.os == 'macOS' | ||
run: | | ||
brew install libuv | ||
- name: Install dependencies | ||
run: | | ||
python --version | ||
pip --version | ||
python -m pip install -U pip | ||
pip install -e '.' | ||
pip list | ||
shell: bash | ||
- name: Check Install | ||
run: | | ||
ludwig check_install | ||
shell: bash | ||
- name: Test Getting Started | ||
run: | | ||
cd examples/getting_started && sh ./run.sh | ||
shell: bash | ||
# start-runner: | ||
# name: Start self-hosted EC2 runner | ||
# if: > | ||
# always() && needs.pytest.result != 'failure' && ( | ||
# github.event_name == 'schedule' && github.repository == 'ludwig-ai/ludwig' || | ||
# github.event_name == 'push' && github.repository == 'ludwig-ai/ludwig' || | ||
# github.event_name == 'pull_request' && github.event.pull_request.base.repo.full_name == 'ludwig-ai/ludwig' && !github.event.pull_request.head.repo.fork) | ||
# needs: pytest | ||
# runs-on: ubuntu-latest | ||
# outputs: | ||
# label: ${{ steps.start-ec2-runner.outputs.label }} | ||
# ec2-instance-id: ${{ steps.start-ec2-runner.outputs.ec2-instance-id }} | ||
# steps: | ||
# - name: Configure AWS credentials | ||
# uses: aws-actions/configure-aws-credentials@v1 | ||
# with: | ||
# aws-access-key-id: ${{ secrets.AWS_ACCESS_KEY_ID }} | ||
# aws-secret-access-key: ${{ secrets.AWS_SECRET_ACCESS_KEY }} | ||
# aws-region: ${{ secrets.AWS_REGION }} | ||
# - name: Start EC2 runner | ||
# id: start-ec2-runner | ||
# uses: machulav/ec2-github-runner@v2.3.2 | ||
# with: | ||
# mode: start | ||
# github-token: ${{ secrets.GH_PERSONAL_ACCESS_TOKEN }} | ||
# ec2-image-id: ami-0759580dedc953d1f | ||
# ec2-instance-type: g4dn.xlarge | ||
# subnet-id: subnet-0983be43 | ||
# security-group-id: sg-4cba0d08 | ||
# aws-resource-tags: > | ||
# [ | ||
# {"Key": "Name", "Value": "ludwig-github-${{ github.head_ref || github.sha }}"}, | ||
# {"Key": "GitHubRepository", "Value": "${{ github.repository }}"}, | ||
# {"Key": "GitHubHeadRef", "Value": "${{ github.head_ref }}"}, | ||
# {"Key": "GitHubSHA", "Value": "${{ github.sha }}"} | ||
# ] | ||
# pytest-gpu: | ||
# if: needs.start-runner.result != 'skipped' | ||
# needs: start-runner # required to start the main job when the runner is ready | ||
# runs-on: ${{ needs.start-runner.outputs.label }} # run the job on the newly created runners | ||
# strategy: | ||
# fail-fast: false | ||
# matrix: | ||
# python-version: [3.7] | ||
# include: | ||
# - python-version: 3.7 | ||
# pytorch-version: 1.10.0 | ||
# torchscript-version: 1.10.2 | ||
# env: | ||
# PYTORCH: ${{ matrix.pytorch-version }} | ||
# NEUROPOD_BASE_DIR: "/usr/local/lib/neuropod" | ||
# NEUROPOD_VERISON: "0.3.0-rc6" | ||
# TORCHSCRIPT_VERSION: ${{ matrix.torchscript-version }} | ||
# name: py${{ matrix.python-version }}, torch-${{ matrix.pytorch-version }}, gpu | ||
# timeout-minutes: 70 | ||
# steps: | ||
# - uses: actions/checkout@v2 | ||
# - name: Set up Python ${{ matrix.python-version }} | ||
# uses: actions/setup-python@v2 | ||
# with: | ||
# python-version: ${{ matrix.python-version }} | ||
# - name: Setup Linux | ||
# if: runner.os == 'linux' | ||
# run: | | ||
# sudo apt-get update && sudo apt-get install -y libsndfile1 cmake ccache build-essential g++-8 gcc-8 | ||
# cmake --version | ||
# - name: Install CUDA drivers | ||
# run: | | ||
# wget https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2004/x86_64/cuda-ubuntu2004.pin | ||
# sudo mv cuda-ubuntu2004.pin /etc/apt/preferences.d/cuda-repository-pin-600 | ||
# wget https://developer.download.nvidia.com/compute/cuda/11.5.1/local_installers/cuda-repo-ubuntu2004-11-5-local_11.5.1-495.29.05-1_amd64.deb | ||
# sudo dpkg -i cuda-repo-ubuntu2004-11-5-local_11.5.1-495.29.05-1_amd64.deb | ||
# sudo apt-key add /var/cuda-repo-ubuntu2004-11-5-local/7fa2af80.pub | ||
# sudo apt-get update | ||
# sudo apt-get -y install cuda | ||
# shell: bash | ||
# - name: pip cache | ||
# uses: actions/cache@v2 | ||
# with: | ||
# path: ~/.cache/pip | ||
# key: ${{ runner.os }}-pip-py${{ matrix.python-version }}-torch${{ matrix.pytorch-version }}-${{ hashFiles('requirements*.txt') }} | ||
# restore-keys: | | ||
# ${{ runner.os }}-pip-py${{ matrix.python-version }}-torch${{ matrix.pytorch-version }}- | ||
# - name: Install dependencies | ||
# env: | ||
# HOROVOD_WITH_PYTORCH: 1 | ||
# HOROVOD_WITHOUT_MPI: 1 | ||
# HOROVOD_WITHOUT_TENSORFLOW: 1 | ||
# HOROVOD_WITHOUT_MXNET: 1 | ||
# run: | | ||
# python --version | ||
# pip --version | ||
# python -m pip install -U pip | ||
# if [ $PYTORCH == "nightly" ]; then | ||
# cat requirements.txt | sed '/^torch[>=<]/d' > requirements-temp && mv requirements-temp requirements.txt | ||
# pip install --pre torch torchvision -f https://download.pytorch.org/whl/torch_stable.html | ||
# else | ||
# pip install torch==${PYTORCH}+cu111 -f https://download.pytorch.org/whl/torch_stable.html | ||
# fi | ||
# # pip install --no-cache-dir git+https://github.com/horovod/horovod.git@master | ||
# pip install dulwich==0.20.26 # workaround for `/usr/bin/ld: cannot find -lpython3.7m` | ||
# pip install '.[test]' | ||
# pip list | ||
# shell: bash | ||
# - name: Install Neuropod backend | ||
# run: | | ||
# sudo mkdir -p "$NEUROPOD_BASE_DIR" | ||
# curl -L https://github.com/uber/neuropod/releases/download/v${{ env.NEUROPOD_VERISON }}/libneuropod-cpu-linux-v${{ env.NEUROPOD_VERISON }}-torchscript-${{ env.TORCHSCRIPT_VERSION }}-backend.tar.gz | sudo tar -xz -C "$NEUROPOD_BASE_DIR" | ||
# shell: bash | ||
# - name: Reinstall Horovod if necessary | ||
# env: | ||
# HOROVOD_WITH_PYTORCH: 1 | ||
# HOROVOD_WITHOUT_MPI: 1 | ||
# HOROVOD_WITHOUT_TENSORFLOW: 1 | ||
# HOROVOD_WITHOUT_MXNET: 1 | ||
# run: | | ||
# HOROVOD_BUILT=$(python -c "import horovod.torch; horovod.torch.nccl_built(); print('SUCCESS')" || true) | ||
# if [[ $HOROVOD_BUILT != "SUCCESS" ]]; then | ||
# pip uninstall -y horovod | ||
# pip install --no-cache-dir git+https://github.com/horovod/horovod.git@master | ||
# fi | ||
# horovodrun --check-build | ||
# shell: bash | ||
# - name: Check CUDA is available | ||
# run: | | ||
# python -c "import torch; assert torch.cuda.is_available()" | ||
# - name: Tests | ||
# run: | | ||
# pytest -v --timeout 300 --durations 10 --junitxml pytest.xml tests | ||
# - name: Upload Unit Test Results | ||
# if: always() | ||
# uses: actions/upload-artifact@v2 | ||
# with: | ||
# name: Unit Test Results (Python ${{ matrix.python-version }} gpu | ||
# path: pytest.xml | ||
event_file: | ||
name: "Event File" | ||
runs-on: ubuntu-latest | ||
steps: | ||
- name: Upload | ||
if: ${{ !env.ACT }} | ||
uses: actions/upload-artifact@v4 | ||
with: | ||
name: Event File | ||
path: ${{ github.event_path }} | ||
# stop-runner: | ||
# name: Stop self-hosted EC2 runner | ||
# # required to stop the runner even if the error happened in the previous job | ||
# if: always() && needs.start-runner.result != 'skipped' | ||
# needs: | ||
# - start-runner # required to get output from the start-runner job | ||
# - pytest-gpu # required to wait when the main job is done | ||
# runs-on: ubuntu-latest | ||
# steps: | ||
# - name: Configure AWS credentials | ||
# uses: aws-actions/configure-aws-credentials@v1 | ||
# with: | ||
# aws-access-key-id: ${{ secrets.AWS_ACCESS_KEY_ID }} | ||
# aws-secret-access-key: ${{ secrets.AWS_SECRET_ACCESS_KEY }} | ||
# aws-region: ${{ secrets.AWS_REGION }} | ||
# - name: Stop EC2 runner | ||
# uses: machulav/ec2-github-runner@v2.3.1 | ||
# with: | ||
# mode: stop | ||
# github-token: ${{ secrets.GH_PERSONAL_ACCESS_TOKEN }} | ||
# label: ${{ needs.start-runner.outputs.label }} | ||
# ec2-instance-id: ${{ needs.start-runner.outputs.ec2-instance-id }} |