Skip to content

Commit

Permalink
add v100 on 1424 (#683)
Browse files Browse the repository at this point in the history
* add v100 on 1424

* add v100 on 1424

* add v100 on 1424

* add v100 on 1424

* add v100 on 1424

* add v100 on 1424

* add v100 on 1424

* add sco and 1424

* add sco and 1424

* add sco and 1424

* add sco and 1424

* add sco and 1424

* add sco and 1424

* add sco and 1424

* add sco and 1424

* add sco and 1424

* add sco and 1424

* add sco and 1424

* add sco and 1424

* add sco and 1424

* add sco and 1424

* add sco and 1424

* add sco and 1424

* add sco and 1424

* add sco and 1424

* add sco and 1424

* add sco and 1424

* add sco and 1424

* add sco and 1424

* add sco and 1424
  • Loading branch information
wugeshui authored Feb 26, 2024
1 parent 831b1e0 commit 512158e
Show file tree
Hide file tree
Showing 4 changed files with 397 additions and 144 deletions.
85 changes: 85 additions & 0 deletions .github/workflows/_runs-on-nv-step1.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,85 @@
name: runs on nv step 1

on:
workflow_call:
inputs:
runner:
description: Set up the runner
type: string
required: false
default: "tps-sco-ci"
deeplink_path:
description: ci work home
type: string
required: false
default: '/mnt/cache/share/deeplinkci/github/${{ github.repository }}/'
env_path:
description: env file path
type: string
required: false
default: '/mnt/cache/share/deeplinkci/github'

jobs:
Build-Cuda:
name: Build-dipu-cuda
runs-on: ${{ inputs.runner }}
env:
GETRUNNER: ${{ inputs.runner }}
DEEPLINK_PATH: ${{ inputs.deeplink_path }}
ENV_PATH: ${{ inputs.env_path }}
CUDA_PARTATION: "pat_dev"
steps:
- name: Build dipu
run: |
if [[ "${GETRUNNER}" == *sco* ]];then
set -e
cd ${DEEPLINK_PATH}/ && ls -al && find ${DEEPLINK_PATH}/ -maxdepth 1 -mmin +240 -type d |xargs rm -rf
cd ${DEEPLINK_PATH}/${GITHUB_RUN_NUMBER} && rm -rf ${GITHUB_JOB} && cp -R source ${GITHUB_JOB}
srun --job-name=${GITHUB_JOB} bash -c "export USE_COVERAGE=ON && cd ${DEEPLINK_PATH}/${GITHUB_RUN_NUMBER}/${GITHUB_JOB}/dipu \
&& source ${ENV_PATH}/dipu_env \
&& rsync -a /mnt/lustre/share_data/PAT/datasets/huggingface mmlab_pack/ \
&& bash scripts/ci/nv/ci_nv_script.sh build_dipu" || ( cd ${DEEPLINK_PATH}/${GITHUB_RUN_NUMBER}/ && rm -rf ${GITHUB_JOB} && exit 1 )
else
ssh SH1424 """
set -e
export USE_COVERAGE=ON
cd ${DEEPLINK_PATH}/${GITHUB_RUN_NUMBER} && rm -rf ${GITHUB_JOB} && cp -R source ${GITHUB_JOB} && cd ${GITHUB_JOB}/dipu
source ${ENV_PATH}/dipu_env
rsync -a /mnt/lustre/share_data/PAT/datasets/huggingface mmlab_pack/
srun --job-name=${GITHUB_RUN_NUMBER}_${GITHUB_JOB} --partition=${CUDA_PARTATION} --gres=gpu:1 --time=30 bash scripts/ci/nv/ci_nv_script.sh build_dipu \
|| ( cd ${DEEPLINK_PATH}/${GITHUB_RUN_NUMBER}/ && rm -rf ${GITHUB_JOB} && exit 1 )
"""
fi
Tidy-Cuda:
name: Run tidy (cuda)
needs: [Build-Cuda]
runs-on: ${{ inputs.runner }}
env:
GETRUNNER: ${{ inputs.runner }}
DEEPLINK_PATH: ${{ inputs.deeplink_path }}
ENV_PATH: ${{ inputs.env_path }}
CUDA_PARTATION: "pat_dev"
steps:
- name: Check SupportedDiopiFunctions.txt
run: |
if [[ "${GETRUNNER}" == *sco* ]];then
cd $DEEPLINK_PATH/$GITHUB_RUN_NUMBER/Build-Cuda && \
git diff -s --exit-code dipu/SupportedDiopiFunctions.txt || \
{ echo "::error file=dipu/SupportedDiopiFunctions.txt,title=File Not Match::Please commit your compiled SupportedDiopiFunctions.txt" && exit 1; }
else
ssh SH1424 """
cd $DEEPLINK_PATH/$GITHUB_RUN_NUMBER/Build-Cuda && \
git diff -s --exit-code dipu/SupportedDiopiFunctions.txt || \
{ echo "::error file=dipu/SupportedDiopiFunctions.txt,title=File Not Match::Please commit your compiled SupportedDiopiFunctions.txt" && exit 1; }
"""
fi
- name: Run clang-tidy
run: |
if [[ "${GETRUNNER}" == *sco* ]];then
srun --job-name=$GITHUB_JOB bash -c "bash $DEEPLINK_PATH/$GITHUB_RUN_NUMBER/Build-Cuda/dipu/scripts/ci/nv/ci_nv_tidy.sh"
else
ssh SH1424 """
bash $DEEPLINK_PATH/$GITHUB_RUN_NUMBER/Build-Cuda/dipu/scripts/ci/nv/ci_nv_tidy.sh
"""
fi
285 changes: 285 additions & 0 deletions .github/workflows/_runs-on-nv-step2.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,285 @@
name: runs on nv step 2

on:
workflow_call:
inputs:
runner:
description: Set up the runner
type: string
required: false
default: "tps-sco-ci"
deeplink_path:
description: ci work home
type: string
required: false
default: '/mnt/cache/share/deeplinkci/github/${{ github.repository }}/'
env_path:
description: env file path
type: string
required: false
default: '/mnt/cache/share/deeplinkci/github'
all_coverage:
description: all coverage
type: string
required: true
default: 'false'
require_coverage:
description: input coverage rate
type: string
required: false
default: '0'
jobs:
Test-Cuda:
name: Test-dipu-cuda
runs-on: ${{ inputs.runner }}
env:
GETRUNNER: ${{ inputs.runner }}
DEEPLINK_PATH: ${{ inputs.deeplink_path }}
ENV_PATH: ${{ inputs.env_path }}
CUDA_PARTATION: "pat_dev"
ALL_COVERAGE: ${{ inputs.all_coverage }}
REQUIRE_COVERAGE: ${{ inputs.require_coverage }}
steps:
- name: Run-test
run: |
if [[ "${GETRUNNER}" == *sco* ]];then
set -e
srun --job-name=${GITHUB_JOB} bash -c "export USE_COVERAGE=ON && cd ${DEEPLINK_PATH}/${GITHUB_RUN_NUMBER}/Build-Cuda/dipu \
&& source ${ENV_PATH}/dipu_env \
&& bash tests/run_nv_tests.sh"
if [ "${ALL_COVERAGE}" = "ON" ]; then
bash /mnt/cache/share/platform/dep/sonar/coverage_DIPU_nv.sh ${DEEPLINK_PATH}/${GITHUB_RUN_NUMBER}/Build-Cuda ${GITHUB_RUN_NUMBER} || echo "get coverage fail"
fi
else
ssh SH1424 """
set -ex
export USE_COVERAGE=ON
cd ${DEEPLINK_PATH}/${GITHUB_RUN_NUMBER}/ && cd Build-Cuda/dipu
source ${ENV_PATH}/dipu_env
srun --job-name=${GITHUB_RUN_NUMBER}_${GITHUB_JOB} --partition=${CUDA_PARTATION} --gres=gpu:1 --cpus-per-task=5 --mem=16G --time=70 sh tests/run_nv_tests.sh
if [ "${ALL_COVERAGE}" = "ON" ]; then
bash /mnt/cache/share/platform/dep/sonar/coverage_DIPU_nv.sh ${DEEPLINK_PATH}/${GITHUB_RUN_NUMBER}/Build-Cuda ${GITHUB_RUN_NUMBER} || echo "get coverage fail"
fi
"""
fi
- name: increment coverage check
if: ${{ contains( github.event_name, 'pull_request' ) && contains( github.base_ref, 'main' ) }}
run: |
if [[ "${GETRUNNER}" == *sco* ]];then
set -e
cd ${DEEPLINK_PATH}/${GITHUB_RUN_NUMBER}/Build-Cuda
ln -s ${DEEPLINK_PATH}/${GITHUB_RUN_NUMBER}/source-main/dipu/third_party/DIOPI/scripts scripts
source ${ENV_PATH}/dipu_env
bash scripts/increment_coverage.sh ${REQUIRE_COVERAGE}
else
ssh SH1424 """
set -e
cd ${DEEPLINK_PATH}/${GITHUB_RUN_NUMBER}/Build-Cuda/
rm -rf scripts
ln -s ${DEEPLINK_PATH}/${GITHUB_RUN_NUMBER}/source-main/dipu/third_party/DIOPI/scripts scripts
source /mnt/cache/share/platform/env/pt2.0_diopi
bash scripts/increment_coverage.sh ${REQUIRE_COVERAGE}
"""
fi
Test-One-Iter_Cuda:
name: Test-one-iter-cuda
runs-on: ${{ inputs.runner }}
env:
GETRUNNER: ${{ inputs.runner }}
DEEPLINK_PATH: ${{ inputs.deeplink_path }}
ENV_PATH: ${{ inputs.env_path }}
CUDA_PARTATION: "pat_dev"
steps:
- name: build some env
run: |
if [[ "${GETRUNNER}" == *sco* ]];then
set -e
export basic_path=${DEEPLINK_PATH}/${GITHUB_RUN_NUMBER}/Build-Cuda/dipu/
srun --job-name=${GITHUB_JOB} bash -c "cd ${basic_path} \
&& export PYTHONPATH=${basic_path}/mmlab_pack:${basic_path}/mmlab_pack/mmengine:${basic_path}/mmlab_pack/mmcv:$PYTHONPATH \
&& source ${ENV_PATH}/dipu_env && cd mmlab_pack \
&& bash ${DEEPLINK_PATH}/${GITHUB_RUN_NUMBER}/Build-Cuda/dipu/scripts/ci/ci_one_iter.sh build_cuda"
else
ssh SH1424 """
set -ex
cd ${DEEPLINK_PATH}/${GITHUB_RUN_NUMBER}/ && cd Build-Cuda/dipu
source ${ENV_PATH}/dipu_env
basic_path=${DEEPLINK_PATH}/${GITHUB_RUN_NUMBER}/Build-Cuda/dipu/mmlab_pack
export PYTHONPATH=\${basic_path}/mmengine:\$PYTHONPATH
export PYTHONPATH=\${basic_path}/mmcv:\$PYTHONPATH
export PYTHONPATH=\$(pwd):\$PYTHONPATH
cd mmlab_pack
srun --job-name=${GITHUB_RUN_NUMBER}_${GITHUB_JOB} --partition=${CUDA_PARTATION} --gres=gpu:1 --time=20 bash ../scripts/ci/ci_one_iter.sh build_cuda
"""
fi
- name: run-one-iter-for-tradition
run: |
if [[ "${GETRUNNER}" == *sco* ]];then
set -e
cd ${DEEPLINK_PATH}/${GITHUB_RUN_NUMBER}/Build-Cuda/dipu/ \
&& source ${ENV_PATH}/dipu_env && cd mmlab_pack \
&& rm -rf one_iter_data \
&& python ${DEEPLINK_PATH}/${GITHUB_RUN_NUMBER}/Build-Cuda/dipu/scripts/ci/ci_run_one_iter.py sco ${GITHUB_JOB} gpu sco && rm -rf one_iter_data || (rm -rf one_iter_data && exit 1)
else
ssh SH1424 """
cd ${DEEPLINK_PATH}/${GITHUB_RUN_NUMBER}/ && cd Build-Cuda/dipu
source ${ENV_PATH}/dipu_env
basic_path=${DEEPLINK_PATH}/${GITHUB_RUN_NUMBER}/Build-Cuda/dipu/mmlab_pack
source scripts/ci/ci_one_iter.sh export_pythonpath_cuda \${basic_path}
export PYTHONPATH=\$(pwd):\$PYTHONPATH
cd mmlab_pack
rm -rf one_iter_data
python ../scripts/ci/ci_run_one_iter.py cuda ${GITHUB_RUN_NUMBER}_${GITHUB_JOB} "gpu:1" \"${CUDA_PARTATION}\" && rm -rf one_iter_data || (rm -rf one_iter_data && exit 1)
"""
fi
- name: run-one-iter-for-llm
run: |
if [[ "${GETRUNNER}" == *sco* ]];then
set -e
cd ${DEEPLINK_PATH}/${GITHUB_RUN_NUMBER}/Build-Cuda/dipu/ \
&& source ${ENV_PATH}/dipu_env && cd mmlab_pack \
&& rm -rf one_iter_data \
&& python ${DEEPLINK_PATH}/${GITHUB_RUN_NUMBER}/Build-Cuda/dipu/scripts/ci/ci_run_one_iter.py sco ${GITHUB_JOB} gpu sco "llm" && rm -rf one_iter_data || (rm -rf one_iter_data && exit 1)
else
ssh SH1424 """
cd ${DEEPLINK_PATH}/${GITHUB_RUN_NUMBER}/ && cd Build-Cuda/dipu
source ${ENV_PATH}/dipu_env
basic_path=${DEEPLINK_PATH}/${GITHUB_RUN_NUMBER}/Build-Cuda/dipu/mmlab_pack
source scripts/ci/ci_one_iter.sh export_pythonpath_cuda \${basic_path}
export PYTHONPATH=\$(pwd):\$PYTHONPATH
cd mmlab_pack
rm -rf one_iter_data
python ../scripts/ci/ci_run_one_iter.py cuda ${GITHUB_RUN_NUMBER}_${GITHUB_JOB} "gpu:1" \"${CUDA_PARTATION}\" "llm" && rm -rf one_iter_data || (rm -rf one_iter_data && exit 1)
"""
fi
- name: Perform cleanup one iter data
if: always()
run: |
if [[ "${GETRUNNER}" == *sco* ]];then
set -e
cd ${DEEPLINK_PATH}/${GITHUB_RUN_NUMBER}/Build-Cuda/dipu/mmlab_pack
rm -rf one_iter_data
touch one_iter_data #用于占位,防止创建新的 one_iter_data 文件夹
else
ssh SH1424 """
set -ex
echo "${GITHUB_RUN_NUMBER}_${GITHUB_JOB}"
scancel -n "${GITHUB_RUN_NUMBER}_${GITHUB_JOB}"
cd ${DEEPLINK_PATH}/${GITHUB_RUN_NUMBER}/Build-Cuda/dipu/mmlab_pack
rm -rf one_iter_data
touch one_iter_data # 用于占位,防止创建新的 one_iter_data 文件夹
"""
fi
- name: Check for failure
if: ${{ failure() }}
run: exit 1

Build-Cuda-Latest-Target:
name: Build-dipu-cuda-latest-target
runs-on: ${{ inputs.runner }}
env:
GETRUNNER: ${{ inputs.runner }}
DEEPLINK_PATH: ${{ inputs.deeplink_path }}
ENV_PATH: ${{ inputs.env_path }}
CUDA_PARTATION: "pat_dev"
steps:
- name: Build dipu diopi-latest-target
run: |
if [[ "${GETRUNNER}" == *sco* ]];then
set -e
cd ${DEEPLINK_PATH}/${GITHUB_RUN_NUMBER} && rm -rf ${GITHUB_JOB} && cp -R source-main ${GITHUB_JOB}
srun --job-name=${GITHUB_JOB} bash -c "export USE_COVERAGE=ON && cd ${DEEPLINK_PATH}/${GITHUB_RUN_NUMBER}/${GITHUB_JOB}/dipu \
&& source ${ENV_PATH}/dipu_env \
&& bash scripts/ci/nv/ci_nv_script.sh build_dipu" || ( cd ${DEEPLINK_PATH}/${GITHUB_RUN_NUMBER}/ && rm -rf ${GITHUB_JOB} && exit 1 )
else
ssh SH1424 """
set -ex
cd ${DEEPLINK_PATH}/${GITHUB_RUN_NUMBER} && rm -rf ${GITHUB_JOB} && cp -R source-main ${GITHUB_JOB} && cd ${GITHUB_JOB}/dipu
source ${ENV_PATH}/dipu_env
srun --job-name=${GITHUB_RUN_NUMBER}_${GITHUB_JOB} --partition=${CUDA_PARTATION} --gres=gpu:1 --cpus-per-task=5 --mem=16G --time=30 bash scripts/ci/nv/ci_nv_script.sh build_dipu \
|| ( cd ${DEEPLINK_PATH}/${GITHUB_RUN_NUMBER}/ && rm -rf ${GITHUB_JOB} && exit 1 )
"""
fi
Test-Cuda-Latest-Target:
name: Test-dipu-cuda-latest-target
needs: [Build-Cuda-Latest-Target]
runs-on: ${{ inputs.runner }}
env:
GETRUNNER: ${{ inputs.runner }}
DEEPLINK_PATH: ${{ inputs.deeplink_path }}
ENV_PATH: ${{ inputs.env_path }}
CUDA_PARTATION: "pat_dev"
steps:
- name: Run-test
run: |
if [[ "${GETRUNNER}" == *sco* ]];then
set -e
srun --job-name=${GITHUB_JOB} bash -c "cd ${DEEPLINK_PATH}/${GITHUB_RUN_NUMBER}/Build-Cuda-Latest-Target/dipu \
&& source ${ENV_PATH}/dipu_env \
&& bash tests/run_nv_tests.sh" && cd ${DEEPLINK_PATH}/${GITHUB_RUN_NUMBER}/ && rm -rf Build-Cuda-Latest-Target
else
ssh SH1424 """
set -ex
cd ${DEEPLINK_PATH}/${GITHUB_RUN_NUMBER}/ && cd Build-Cuda-Latest-Target/dipu
source ${ENV_PATH}/dipu_env
srun --job-name=${GITHUB_RUN_NUMBER}_${GITHUB_JOB} --partition=${CUDA_PARTATION} --gres=gpu:1 --cpus-per-task=5 --mem=16G --time=60 sh tests/run_nv_tests.sh && cd ${DEEPLINK_PATH}/${GITHUB_RUN_NUMBER}/ && rm -rf Build-Cuda-Latest-Target \
|| ( cd ${DEEPLINK_PATH}/${GITHUB_RUN_NUMBER}/ && rm -rf Build-Cuda-Latest-Target && exit 1 )
"""
fi
Build-Cuda-Pt211:
name: Build-dipu-cuda-pt211
runs-on: ${{ inputs.runner }}
env:
GETRUNNER: ${{ inputs.runner }}
DEEPLINK_PATH: ${{ inputs.deeplink_path }}
ENV_PATH: ${{ inputs.env_path }}
CUDA_PARTATION: "pat_dev"
steps:
- name: Build dipu
run: |
if [[ "${GETRUNNER}" == *sco* ]];then
set -e
cd ${DEEPLINK_PATH}/${GITHUB_RUN_NUMBER} && rm -rf ${GITHUB_JOB} && cp -R source ${GITHUB_JOB}
srun --job-name=${GITHUB_JOB} bash -c "cd ${DEEPLINK_PATH}/${GITHUB_RUN_NUMBER}/${GITHUB_JOB}/dipu \
&& source ${ENV_PATH}/dipu_env 2.1.1 \
&& bash scripts/ci/nv/ci_nv_script.sh build_dipu " || ( cd ${DEEPLINK_PATH}/${GITHUB_RUN_NUMBER}/ && rm -rf ${GITHUB_JOB} && exit 1 )
else
ssh SH1424 """
set -ex
cd ${DEEPLINK_PATH}/${GITHUB_RUN_NUMBER} && rm -rf ${GITHUB_JOB} && cp -R source ${GITHUB_JOB} && cd ${GITHUB_JOB}/dipu
source ${ENV_PATH}/dipu_env 2.1.1
srun --job-name=${GITHUB_RUN_NUMBER}_${GITHUB_JOB} --partition=${CUDA_PARTATION} --gres=gpu:1 --cpus-per-task=5 --mem=16G --time=60 bash scripts/ci/nv/ci_nv_script.sh build_dipu \
|| ( cd ${DEEPLINK_PATH}/${GITHUB_RUN_NUMBER}/ && rm -rf ${GITHUB_JOB} && exit 1 )
"""
fi
Test-Cuda-Pt211:
name: Test-dipu-cuda-pt211
needs: [Build-Cuda-Pt211]
runs-on: ${{ inputs.runner }}
env:
GETRUNNER: ${{ inputs.runner }}
DEEPLINK_PATH: ${{ inputs.deeplink_path }}
ENV_PATH: ${{ inputs.env_path }}
CUDA_PARTATION: "pat_dev"
steps:
- name: Run-test
run: |
if [[ "${GETRUNNER}" == *sco* ]];then
set -e
srun --job-name=${GITHUB_JOB} bash -c "cd ${DEEPLINK_PATH}/${GITHUB_RUN_NUMBER}/Build-Cuda-Pt211/dipu \
&& source ${ENV_PATH}/dipu_env 2.1.1 \
&& bash tests/run_nv_tests.sh" && cd ${DEEPLINK_PATH}/${GITHUB_RUN_NUMBER}/ && rm -rf Build-Cuda-Pt211
else
ssh SH1424 """
set -ex
cd ${DEEPLINK_PATH}/${GITHUB_RUN_NUMBER}/Build-Cuda-Pt211/dipu
source ${ENV_PATH}/dipu_env 2.1.1
srun --job-name=${GITHUB_RUN_NUMBER}_${GITHUB_JOB} --partition=${CUDA_PARTATION} --gres=gpu:1 --cpus-per-task=5 bash tests/run_nv_tests.sh \
|| ( cd ${DEEPLINK_PATH}/${GITHUB_RUN_NUMBER}/ && rm -rf Build-Cuda-Pt211 && exit 1 )
"""
fi
Loading

0 comments on commit 512158e

Please sign in to comment.