Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[backport] fix/ci: Linux NPM race condition editing "except" CIDR and NPM pipeline fixes #2882

Merged
merged 7 commits into from
Jul 30, 2024
Original file line number Diff line number Diff line change
Expand Up @@ -73,7 +73,7 @@ jobs:
uses: actions/upload-artifact@v4
if: always()
with:
name: logs
name: logs-${{ matrix.profile }}
path: |
./npm-logs_${{ matrix.profile }}.txt
./cyclonus-test_${{ matrix.profile }}.txt
7 changes: 4 additions & 3 deletions .github/workflows/cyclonus-netpol-test.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@ on:
push:
branches:
- main
- release/*
pull_request:
paths:
- 'npm/**'
Expand Down Expand Up @@ -49,7 +50,7 @@ jobs:
- name: Make NPM image
run: |
make npm-image NPM_PLATFORM_TAG=cyclonus PLATFORM=linux/amd64 CONTAINER_BUILDER=docker BUILDX_ACTION='--load'

- name: Install Azure NPM
# set the ConfigMap based on the build matrix
# currently have to restart the daemonset because changing the ConfigMap doesn't restart NPM
Expand All @@ -60,7 +61,7 @@ jobs:
echo "Applying profile: ${{ matrix.profile }}"
kubectl apply -f ./npm/profiles/${{ matrix.profile }}
kubectl rollout restart ds azure-npm -n kube-system

- name: Check Cluster Components
run: |
sleep 10
Expand All @@ -80,7 +81,7 @@ jobs:
uses: actions/upload-artifact@v4
if: always()
with:
name: logs
name: logs-${{ matrix.profile }}
path: |
./npm-logs_${{ matrix.profile }}.txt
./cyclonus-test_${{ matrix.profile }}.txt
270 changes: 138 additions & 132 deletions .pipelines/npm/npm-conformance-tests-latest-release.yaml
Original file line number Diff line number Diff line change
@@ -1,5 +1,10 @@
trigger:
- master
branches:
include:
- master
tags:
include:
- "*"

variables:
- name: VNET_NAME
Expand Down Expand Up @@ -113,7 +118,7 @@ jobs:
scriptLocation: "inlineScript"
failOnStderr: true
inlineScript: |
# get kubectl
# get kubectl
curl -LO https://dl.k8s.io/release/v1.23.0/bin/linux/amd64/kubectl
chmod +x kubectl
echo Cluster $(AZURE_CLUSTER)
Expand Down Expand Up @@ -177,7 +182,7 @@ jobs:
./kubectl --kubeconfig=./kubeconfig set image daemonset/azure-npm-win -n kube-system azure-npm=$IMAGE_REGISTRY/azure-npm:windows-amd64-ltsc2022-$(TAG)

else
echo "Creating Linux Cluster";
echo "Creating Linux Cluster";
az aks create --no-ssh-key \
--resource-group $(RESOURCE_GROUP) \
--name $(AZURE_CLUSTER) \
Expand Down Expand Up @@ -261,7 +266,7 @@ jobs:
declare -a conformancePIDs
for round in $(seq 1 $NUM_PARALLEL_JOBS_FOR_STRESS_TEST); do
# for each iteration, run the conformance test and echos in the background, and write the output of the conformance test to a file

# run the conformance test in the foreground and write the output to stdout and a file
if [[ $(AZURE_CLUSTER) == *ws22 ]] # * is used for pattern matching
then
Expand Down Expand Up @@ -314,133 +319,134 @@ jobs:
condition: always()
artifact: NpmLogs_$(AZURE_CLUSTER)

- job: Create_Windows_Cluster_and_Run_Test
timeoutInMinutes: 360
displayName: "Run Windows Cyclonus"
pool:
name: $(BUILD_POOL_NAME_DEFAULT)
demands:
- agent.os -equals Linux
- Role -equals Build
dependsOn: [setup]
variables:
RESOURCE_GROUP: $[ dependencies.setup.outputs['EnvironmentalVariables.RESOURCE_GROUP'] ]
TAG: $[ dependencies.setup.outputs['EnvironmentalVariables.TAG'] ]
FQDN: empty
strategy:
matrix:
v2-windows:
PROFILE: "cyc-ws22"
steps:
- checkout: self
- download: none

- task: AzureCLI@2
displayName: "Create AKS Cluster"
inputs:
azureSubscription: $(BUILD_VALIDATIONS_SERVICE_CONNECTION)
scriptType: "bash"
scriptLocation: "inlineScript"
failOnStderr: true
inlineScript: |
az extension add --name aks-preview
az extension update --name aks-preview

export CLUSTER_NAME=$(RESOURCE_GROUP)-$(PROFILE)

echo "Creating resource group named $CLUSTER_NAME"
az group create --name $CLUSTER_NAME -l $(LOCATION) -o table

echo "Creating resource group named $CLUSTER_NAME"
az aks create \
--resource-group $CLUSTER_NAME \
--name $CLUSTER_NAME \
--generate-ssh-keys \
--windows-admin-username e2eadmin \
--windows-admin-password alpha@numeric!password2 \
--network-plugin azure \
--vm-set-type VirtualMachineScaleSets \
--node-vm-size Standard_D4s_v3 \
--node-count 1

# don't schedule anything on the linux system pool
echo "Updating $CLUSTER_NAME to not schedule anything on linux pool..."
az aks nodepool update \
--cluster-name $CLUSTER_NAME \
-g $CLUSTER_NAME \
-n nodepool1 \
--node-taints CriticalAddonsOnly=true:NoSchedule

echo "Adding Windows nodepool to $CLUSTER_NAME"
az aks nodepool add \
--resource-group $CLUSTER_NAME \
--cluster-name $CLUSTER_NAME \
--name awin22 \
--os-type Windows \
--os-sku Windows2022 \
--node-vm-size Standard_D4s_v3 \
--node-count 3

echo "Getting credentials to $CLUSTER_NAME"
az aks get-credentials -g $CLUSTER_NAME -n $CLUSTER_NAME --overwrite-existing --file ./kubeconfig
mkdir -p ~/.kube/
cp ./kubeconfig ~/.kube/config

- task: AzureCLI@2
displayName: "Deploy NPM to Test Cluster"
inputs:
azureSubscription: $(BUILD_VALIDATIONS_SERVICE_CONNECTION)
scriptType: "bash"
scriptLocation: "inlineScript"
failOnStderr: true
inlineScript: |
export CLUSTER_NAME=$(RESOURCE_GROUP)-$(PROFILE)

curl -LO https://dl.k8s.io/release/v1.23.0/bin/linux/amd64/kubectl
chmod +x kubectl

# deploy azure-npm
./kubectl --kubeconfig=./kubeconfig apply -f https://raw.githubusercontent.com/Azure/azure-container-networking/master/npm/examples/windows/azure-npm.yaml

# swap azure-npm image with one built during run
./kubectl --kubeconfig=./kubeconfig set image daemonset/azure-npm-win -n kube-system azure-npm=$IMAGE_REGISTRY/azure-npm:windows-amd64-ltsc2022-$(TAG)

echo "sleeping 3 minutes to allow NPM pods to restart"
sleep 180

./kubectl --kubeconfig=./kubeconfig get po -n kube-system -owide -A

echo "Showing cluster status for $CLUSTER_NAME"
FQDN=`az aks show -n $CLUSTER_NAME -g $CLUSTER_NAME --query fqdn -o tsv`
echo "##vso[task.setvariable variable=FQDN]$FQDN"

- script: |
cat ~/.kube/config
curl -fsSL github.com/mattfenwick/cyclonus/releases/latest/download/cyclonus_linux_amd64.tar.gz | tar -zxv
name: download_cyclonus
displayName: "Download Cyclonus"
failOnStderr: false
condition: always()

- script: |
./test/cyclonus/test-cyclonus-windows.sh
name: cyclonus
displayName: "Run Cyclonus Test"
failOnStderr: false
condition: always()

- bash: |
export CLUSTER_NAME=$(RESOURCE_GROUP)-$(PROFILE)
cp cyclonus-$CLUSTER_NAME $(System.DefaultWorkingDirectory)/$CLUSTER_NAME/cyclonus-$CLUSTER_NAME
echo "Getting cluster state for $CLUSTER_NAME"
mkdir -p $(System.DefaultWorkingDirectory)/$CLUSTER_NAME
kubectl logs -n kube-system -l k8s-app=azure-npm --tail -1 --prefix > $(System.DefaultWorkingDirectory)/$CLUSTER_NAME/npm-logs_$(PROFILE).txt
cp ./kubeconfig $(System.DefaultWorkingDirectory)/$CLUSTER_NAME/.kubeconfig
condition: always()

- publish: $(System.DefaultWorkingDirectory)/$(RESOURCE_GROUP)-$(PROFILE)
condition: always()
artifact: NpmLogs-$(RESOURCE_GROUP)-$(PROFILE)
# 2024/07/23: Windows Cyclonus is consistently timing after 6 hours
# - job: Create_Windows_Cluster_and_Run_Test
# timeoutInMinutes: 360
# displayName: "Run Windows Cyclonus"
# pool:
# name: $(BUILD_POOL_NAME_DEFAULT)
# demands:
# - agent.os -equals Linux
# - Role -equals Build
# dependsOn: [setup]
# variables:
# RESOURCE_GROUP: $[ dependencies.setup.outputs['EnvironmentalVariables.RESOURCE_GROUP'] ]
# TAG: $[ dependencies.setup.outputs['EnvironmentalVariables.TAG'] ]
# FQDN: empty
# strategy:
# matrix:
# v2-windows:
# PROFILE: "cyc-ws22"
# steps:
# - checkout: self
# - download: none

# - task: AzureCLI@2
# displayName: "Create AKS Cluster"
# inputs:
# azureSubscription: $(BUILD_VALIDATIONS_SERVICE_CONNECTION)
# scriptType: "bash"
# scriptLocation: "inlineScript"
# failOnStderr: true
# inlineScript: |
# az extension add --name aks-preview
# az extension update --name aks-preview

# export CLUSTER_NAME=$(RESOURCE_GROUP)-$(PROFILE)

# echo "Creating resource group named $CLUSTER_NAME"
# az group create --name $CLUSTER_NAME -l $(LOCATION) -o table

# echo "Creating resource group named $CLUSTER_NAME"
# az aks create \
# --resource-group $CLUSTER_NAME \
# --name $CLUSTER_NAME \
# --generate-ssh-keys \
# --windows-admin-username e2eadmin \
# --windows-admin-password alpha@numeric!password2 \
# --network-plugin azure \
# --vm-set-type VirtualMachineScaleSets \
# --node-vm-size Standard_D4s_v3 \
# --node-count 1

# # don't schedule anything on the linux system pool
# echo "Updating $CLUSTER_NAME to not schedule anything on linux pool..."
# az aks nodepool update \
# --cluster-name $CLUSTER_NAME \
# -g $CLUSTER_NAME \
# -n nodepool1 \
# --node-taints CriticalAddonsOnly=true:NoSchedule

# echo "Adding Windows nodepool to $CLUSTER_NAME"
# az aks nodepool add \
# --resource-group $CLUSTER_NAME \
# --cluster-name $CLUSTER_NAME \
# --name awin22 \
# --os-type Windows \
# --os-sku Windows2022 \
# --node-vm-size Standard_D4s_v3 \
# --node-count 3

# echo "Getting credentials to $CLUSTER_NAME"
# az aks get-credentials -g $CLUSTER_NAME -n $CLUSTER_NAME --overwrite-existing --file ./kubeconfig
# mkdir -p ~/.kube/
# cp ./kubeconfig ~/.kube/config

# - task: AzureCLI@2
# displayName: "Deploy NPM to Test Cluster"
# inputs:
# azureSubscription: $(BUILD_VALIDATIONS_SERVICE_CONNECTION)
# scriptType: "bash"
# scriptLocation: "inlineScript"
# failOnStderr: true
# inlineScript: |
# export CLUSTER_NAME=$(RESOURCE_GROUP)-$(PROFILE)

# curl -LO https://dl.k8s.io/release/v1.23.0/bin/linux/amd64/kubectl
# chmod +x kubectl

# # deploy azure-npm
# ./kubectl --kubeconfig=./kubeconfig apply -f https://raw.githubusercontent.com/Azure/azure-container-networking/master/npm/examples/windows/azure-npm.yaml

# # swap azure-npm image with one built during run
# ./kubectl --kubeconfig=./kubeconfig set image daemonset/azure-npm-win -n kube-system azure-npm=$IMAGE_REGISTRY/azure-npm:windows-amd64-ltsc2022-$(TAG)

# echo "sleeping 3 minutes to allow NPM pods to restart"
# sleep 180

# ./kubectl --kubeconfig=./kubeconfig get po -n kube-system -owide -A

# echo "Showing cluster status for $CLUSTER_NAME"
# FQDN=`az aks show -n $CLUSTER_NAME -g $CLUSTER_NAME --query fqdn -o tsv`
# echo "##vso[task.setvariable variable=FQDN]$FQDN"

# - script: |
# cat ~/.kube/config
# curl -fsSL github.com/mattfenwick/cyclonus/releases/latest/download/cyclonus_linux_amd64.tar.gz | tar -zxv
# name: download_cyclonus
# displayName: "Download Cyclonus"
# failOnStderr: false
# condition: always()

# - script: |
# ./test/cyclonus/test-cyclonus-windows.sh
# name: cyclonus
# displayName: "Run Cyclonus Test"
# failOnStderr: false
# condition: always()

# - bash: |
# export CLUSTER_NAME=$(RESOURCE_GROUP)-$(PROFILE)
# cp cyclonus-$CLUSTER_NAME $(System.DefaultWorkingDirectory)/$CLUSTER_NAME/cyclonus-$CLUSTER_NAME
# echo "Getting cluster state for $CLUSTER_NAME"
# mkdir -p $(System.DefaultWorkingDirectory)/$CLUSTER_NAME
# kubectl logs -n kube-system -l k8s-app=azure-npm --tail -1 --prefix > $(System.DefaultWorkingDirectory)/$CLUSTER_NAME/npm-logs_$(PROFILE).txt
# cp ./kubeconfig $(System.DefaultWorkingDirectory)/$CLUSTER_NAME/.kubeconfig
# condition: always()

# - publish: $(System.DefaultWorkingDirectory)/$(RESOURCE_GROUP)-$(PROFILE)
# condition: always()
# artifact: NpmLogs-$(RESOURCE_GROUP)-$(PROFILE)

- job: clean_up
displayName: "Cleanup"
Expand All @@ -450,7 +456,7 @@ jobs:
- agent.os -equals Linux
- Role -equals Build
dependsOn:
[Create_Cluster_and_Run_Test, Create_Windows_Cluster_and_Run_Test, setup]
[Create_Cluster_and_Run_Test, setup]
variables:
RESOURCE_GROUP: $[ dependencies.setup.outputs['EnvironmentalVariables.RESOURCE_GROUP'] ]
steps:
Expand Down
Loading
Loading