Skip to content

Commit

Permalink
adding integration test to validate node pdb
Browse files Browse the repository at this point in the history
  • Loading branch information
svijaykumar-px committed Jul 9, 2024
1 parent 8732a9e commit 6340944
Show file tree
Hide file tree
Showing 6 changed files with 382 additions and 23 deletions.
118 changes: 113 additions & 5 deletions pkg/util/test/util.go
Original file line number Diff line number Diff line change
Expand Up @@ -217,6 +217,7 @@ var (
opVer23_10_2, _ = version.NewVersion("23.10.2-")
OpVer23_10_3, _ = version.NewVersion("23.10.3-")
opVer24_1_0, _ = version.NewVersion("24.1.0-")
opVer24_2_0, _ = version.NewVersion("24.2.0-")

minOpVersionForKubeSchedConfig, _ = version.NewVersion("1.10.2-")
minimumCcmGoVersionCO, _ = version.NewVersion("1.2.3")
Expand All @@ -227,9 +228,10 @@ var (
// OCP Dynamic Plugin is only supported in starting with OCP 4.12+ which is k8s v1.25.0+
minK8sVersionForDynamicPlugin, _ = version.NewVersion("1.25.0")

pxVer2_13, _ = version.NewVersion("2.13")
pxVer3_0, _ = version.NewVersion("3.0")
pxVer3_1, _ = version.NewVersion("3.1")
pxVer2_13, _ = version.NewVersion("2.13")
pxVer3_0, _ = version.NewVersion("3.0")
pxVer3_1, _ = version.NewVersion("3.1")
pxVer3_1_2, _ = version.NewVersion("3.1.2")

// minimumPxVersionCCMJAVA minimum PX version to install ccm-java
minimumPxVersionCCMJAVA, _ = version.NewVersion("2.8")
Expand Down Expand Up @@ -5337,7 +5339,7 @@ func ValidateTelemetryV1Enabled(pxImageList map[string]string, cluster *corev1.S

// ValidatePodDisruptionBudget validates the value of minavailable and number of disruptions for px-storage poddisruptionbudget
func ValidatePodDisruptionBudget(cluster *corev1.StorageCluster, timeout, interval time.Duration) error {
logrus.Info("Validate px-storage poddisruptionbudget minAvailable and allowed disruptions")
logrus.Info("Validate portworx storage poddisruptionbudget")

kbVer, err := GetK8SVersion()
if err != nil {
Expand All @@ -5348,10 +5350,47 @@ func ValidatePodDisruptionBudget(cluster *corev1.StorageCluster, timeout, interv
if err != nil {
return err
}
pxVersion := GetPortworxVersion(cluster)

// PodDisruptionBudget is supported for k8s version greater than or equal to 1.21 and operator version greater than or equal to 1.5.0
// Changing opVersion to 23.10.0 for PTX-23350 | TODO: add better logic with PTX-23407
if k8sVersion.GreaterThanOrEqual(minSupportedK8sVersionForPdb) && opVersion.GreaterThanOrEqual(opVer23_10) {

// Smart and parallel upgrades is supported from px version 3.1.2 and operator version 24.2.0
if k8sVersion.GreaterThanOrEqual(minSupportedK8sVersionForPdb) && opVersion.GreaterThanOrEqual(opVer24_2_0) && pxVersion.GreaterThanOrEqual(pxVer3_1_2) {
t := func() (interface{}, bool, error) {
nodes, err := operatorops.Instance().ListStorageNodes(cluster.Namespace)
if err != nil {
return nil, true, fmt.Errorf("failed to get storage nodes, Err: %v", err)
}
availablenodes := 0
for _, node := range nodes.Items {
if *node.Status.NodeAttributes.Storage && node.Status.Phase == "Online" {
availablenodes++
} else {
logrus.Infof("Node [%s] in state [%s] is not Online, PDB might be incorrect", node.Name, node.Status.Phase)
}
}
pdbs, err := policyops.Instance().ListPodDisruptionBudget(cluster.Namespace)
if err != nil {
return nil, true, fmt.Errorf("failed to list all poddisruptionbudgets, Err: %v", err)
}
actualNodePDBcount := 0
for _, pdb := range pdbs.Items {
if strings.HasPrefix(pdb.Name, "px-") && pdb.Name != "px-kvdb" {
actualNodePDBcount++
}
}
if actualNodePDBcount == availablenodes {
return nil, false, nil
}
return nil, true, fmt.Errorf("incorrect node PDB count. Expected node PDB count [%d], Actual node PDB count [%d]", availablenodes, actualNodePDBcount)

}
if _, err := task.DoRetryWithTimeout(t, timeout, interval); err != nil {
return err
}
return nil
} else if k8sVersion.GreaterThanOrEqual(minSupportedK8sVersionForPdb) && opVersion.GreaterThanOrEqual(opVer23_10) {
// This is only for non async DR setup
t := func() (interface{}, bool, error) {

Expand Down Expand Up @@ -5948,3 +5987,72 @@ func RestoreEtcHosts(t *testing.T) {
assert.Equal(t, bb.Len(), n, "short write")
fd.Close()
}

func ValidateNodePDB(cluster *corev1.StorageCluster, timeout, interval time.Duration) error {
t := func() (interface{}, bool, error) {
nodes, err := coreops.Instance().GetNodes()
if err != nil {
return nil, true, fmt.Errorf("failed to get storage nodes, Err: %v", err)
}
pxNodesMap := make(map[string]bool)

pdbs, err := policyops.Instance().ListPodDisruptionBudget(cluster.Namespace)
if err != nil {
return nil, true, fmt.Errorf("failed to get px-storage poddisruptionbudget, Err: %v", err)
}

for _, pdb := range pdbs.Items {
if strings.HasPrefix(pdb.Name, "px-") && pdb.Name != "px-kvdb" {
pxNodesMap[pdb.Name] = true
if pdb.Spec.MinAvailable.IntValue() != 1 {
return nil, true, fmt.Errorf("incorrect PDB minAvailable value for node %s. Expected PDB [%d], Actual PDB [%d]", strings.TrimPrefix(pdb.Name, "px-"), 1, pdb.Spec.MinAvailable.IntValue())
}
}
}

for _, node := range nodes.Items {
if coreops.Instance().IsNodeMaster(node) {
continue
}
if _, ok := pxNodesMap["px-"+node.Name]; !ok {
return nil, true, fmt.Errorf("PDB for node %s is missing", node.Name)
}
}
return nil, false, nil
}
if _, err := task.DoRetryWithTimeout(t, timeout, interval); err != nil {
return err
}
return nil
}

func ValidateNodesSelectedForUpgrade(cluster *corev1.StorageCluster, quorumValue int, timeout, interval time.Duration) error {
t := func() (interface{}, bool, error) {
if quorumValue == -1 {
nodes, err := operatorops.Instance().ListStorageNodes(cluster.Namespace)
if err != nil {
return nil, true, fmt.Errorf("failed to get storage nodes, Err: %v", err)
}
quorumValue = (len(nodes.Items) / 2) + 1
}

pdbs, err := policyops.Instance().ListPodDisruptionBudget(cluster.Namespace)
if err != nil {
return nil, true, fmt.Errorf("failed to get px-storage poddisruptionbudget, Err: %v", err)
}
nodesReadyForUpgrade := 0
for _, pdb := range pdbs.Items {
if strings.HasPrefix(pdb.Name, "px-") && pdb.Spec.MinAvailable.IntValue() == 0 {
nodesReadyForUpgrade++
}
}
if nodesReadyForUpgrade < quorumValue {
return nil, false, nil
}
return nil, true, fmt.Errorf("nodes selected for upgrade are more than quorum value")
}
if _, err := task.DoRetryWithTimeout(t, timeout, interval); err != nil {
return err
}
return nil
}
205 changes: 205 additions & 0 deletions test/integration_test/node_pdb_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,205 @@
package integrationtest

import (
"fmt"
//"math"
"testing"

"github.com/hashicorp/go-version"
corev1 "github.com/libopenstorage/operator/pkg/apis/core/v1"
testutil "github.com/libopenstorage/operator/pkg/util/test"
"github.com/libopenstorage/operator/test/integration_test/types"
ci_utils "github.com/libopenstorage/operator/test/integration_test/utils"

"github.com/portworx/sched-ops/k8s/operator"
"github.com/sirupsen/logrus"
"github.com/stretchr/testify/require"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
)

var (
pxVer3_1_2, _ = version.NewVersion("3.1.2")
)

var testNodePDBCases = []types.TestCase{
{
TestName: "CreateNodePDBBasic",
TestrailCaseIDs: []string{"C299571", "C299572"},
TestSpec: ci_utils.CreateStorageClusterTestSpecFunc(&corev1.StorageCluster{
ObjectMeta: metav1.ObjectMeta{Name: "test-stc"},
}),
ShouldSkip: func(tc *types.TestCase) bool {
kbVer, err := testutil.GetK8SVersion()
if err != nil {
logrus.Info("Skipping PDB test due to Err: ", err)
return true
}
k8sVersion, _ := version.NewVersion(kbVer)
return ci_utils.PxOperatorVersion.LessThan(ci_utils.PxOperatorVer24_2_0) || k8sVersion.LessThan(minSupportedK8sVersionForPdb)
},
TestFunc: CreateNodePDBBasic,
},
{
TestName: "CreateNodePDBWithStoragelessNode",
TestrailCaseIDs: []string{"C299573"},
TestSpec: ci_utils.CreateStorageClusterTestSpecFunc(&corev1.StorageCluster{
ObjectMeta: metav1.ObjectMeta{Name: "test-stc"},
}),
ShouldSkip: func(tc *types.TestCase) bool {
if len(ci_utils.PxDeviceSpecs) == 0 {
logrus.Info("--portworx-device-specs is empty, cannot run PDBWithStoragelessNode test")
return true
}
kbVer, err := testutil.GetK8SVersion()
if err != nil {
logrus.Info("Skipping PDB test due to Err: ", err)
return true
}
k8sVersion, _ := version.NewVersion(kbVer)
return ci_utils.PxOperatorVersion.LessThan(ci_utils.PxOperatorVer24_2_0) || k8sVersion.LessThan(minSupportedK8sVersionForPdb)
},
TestFunc: CreateNodePDBWithStoragelessNode,
},
{
TestName: "MaxNodesAvailableForUpgrade",
TestrailCaseIDs: []string{"C299574", "C299575"},
TestSpec: ci_utils.CreateStorageClusterTestSpecFunc(&corev1.StorageCluster{
ObjectMeta: metav1.ObjectMeta{Name: "test-stc"},
}),
ShouldSkip: func(tc *types.TestCase) bool {
kbVer, err := testutil.GetK8SVersion()
if err != nil {
logrus.Info("Skipping PDB test due to Err: ", err)
return true
}
k8sVersion, _ := version.NewVersion(kbVer)
return ci_utils.PxOperatorVersion.LessThan(ci_utils.PxOperatorVer24_2_0) || k8sVersion.LessThan(minSupportedK8sVersionForPdb)
},
TestFunc: MaxNodesAvailableForUpgrade,
},
{
TestName: "NodePDBDisablingParallelUpgrade",
TestrailCaseIDs: []string{"C299576", "C299577"},
TestSpec: ci_utils.CreateStorageClusterTestSpecFunc(&corev1.StorageCluster{
ObjectMeta: metav1.ObjectMeta{Name: "test-stc"},
}),
ShouldSkip: func(tc *types.TestCase) bool {
kbVer, err := testutil.GetK8SVersion()
if err != nil {
logrus.Info("Skipping PDB test due to Err: ", err)
return true
}
k8sVersion, _ := version.NewVersion(kbVer)
return ci_utils.PxOperatorVersion.LessThan(ci_utils.PxOperatorVer24_2_0) || k8sVersion.LessThan(minSupportedK8sVersionForPdb)
},
TestFunc: NodePDBDisablingParallelUpgrade,
},
}

func CreateNodePDBBasic(tc *types.TestCase) func(*testing.T) {
return func(t *testing.T) {
testSpec := tc.TestSpec(t)
cluster, ok := testSpec.(*corev1.StorageCluster)
require.True(t, ok)
cluster = ci_utils.DeployAndValidateStorageCluster(cluster, ci_utils.PxSpecImages, t)
pxVersion := testutil.GetPortworxVersion(cluster)

if pxVersion.GreaterThanOrEqual(pxVer3_1_2) {
logrus.Infof("Validating Node PDB names and default minAvailable")
err := testutil.ValidateNodePDB(cluster, ci_utils.DefaultValidateUpgradeTimeout, ci_utils.DefaultValidateUpgradeRetryInterval)
require.NoError(t, err)
}
ci_utils.UninstallAndValidateStorageCluster(cluster, t)
}

}
func CreateNodePDBWithStoragelessNode(tc *types.TestCase) func(*testing.T) {
return func(t *testing.T) {
testSpec := tc.TestSpec(t)
cluster, ok := testSpec.(*corev1.StorageCluster)
require.True(t, ok)

*cluster.Spec.CloudStorage.MaxStorageNodesPerZone = uint32(3)
logrus.Info("Validating PDB with storageless nodes using maxstoragenodesperzone value: ", *cluster.Spec.CloudStorage.MaxStorageNodesPerZone)
cluster = ci_utils.DeployAndValidateStorageCluster(cluster, ci_utils.PxSpecImages, t)
ci_utils.UninstallAndValidateStorageCluster(cluster, t)

}
}

func MaxNodesAvailableForUpgrade(tc *types.TestCase) func(*testing.T) {
return func(t *testing.T) {
testSpec := tc.TestSpec(t)
cluster, ok := testSpec.(*corev1.StorageCluster)
require.True(t, ok)
cluster = ci_utils.DeployAndValidateStorageCluster(cluster, ci_utils.PxSpecImages, t)
pxVersion := testutil.GetPortworxVersion(cluster)

if pxVersion.GreaterThanOrEqual(pxVer3_1_2) {
err := ci_utils.CordonNodes()
require.NoError(t, err)

logrus.Infof("Validating number of nodes ready for upgrade without minAvailable annotation")
err = testutil.ValidateNodesSelectedForUpgrade(cluster, -1, ci_utils.DefaultValidateUpgradeTimeout, ci_utils.DefaultValidateUpgradeRetryInterval)
require.NoError(t, err)

k8snodecount, err := ci_utils.GetNonMasterK8sNodeCount()
require.NoError(t, err)
cluster, err = operator.Instance().GetStorageCluster(cluster.Name, cluster.Namespace)
require.NoError(t, err)
cluster.Annotations["portworx.io/storage-pdb-min-available"] = fmt.Sprintf("%d", k8snodecount-1)
cluster, err = ci_utils.UpdateStorageCluster(cluster)
require.NoError(t, err)

logrus.Infof("Validating number of nodes ready for upgrade with minAvailable annotation %d", k8snodecount-1)
err = testutil.ValidateNodesSelectedForUpgrade(cluster, k8snodecount-1, ci_utils.DefaultValidateUpgradeTimeout, ci_utils.DefaultValidateUpgradeRetryInterval)
require.NoError(t, err)

err = ci_utils.UncordonNodes()
require.NoError(t, err)
}
ci_utils.UninstallAndValidateStorageCluster(cluster, t)
}
}

func NodePDBDisablingParallelUpgrade(tc *types.TestCase) func(*testing.T) {
return func(t *testing.T) {
testSpec := tc.TestSpec(t)
cluster, ok := testSpec.(*corev1.StorageCluster)
require.True(t, ok)
if cluster.Annotations == nil {
cluster.Annotations = make(map[string]string)
}
cluster.Annotations["portworx.io/disable-non-disruptive-upgrade"] = "true"
cluster = ci_utils.DeployAndValidateStorageCluster(cluster, ci_utils.PxSpecImages, t)
pxVersion := testutil.GetPortworxVersion(cluster)
if pxVersion.GreaterThanOrEqual(pxVer3_1_2) {
err := ci_utils.CordonNodes()
require.NoError(t, err)
k8snodecount, err := ci_utils.GetNonMasterK8sNodeCount()
require.NoError(t, err)
logrus.Infof("Validating number of nodes ready for upgrade without minAvailable annotation after disabling non-disruptive upgrade")
err = testutil.ValidateNodesSelectedForUpgrade(cluster, k8snodecount-1, ci_utils.DefaultValidateUpgradeTimeout, ci_utils.DefaultValidateUpgradeRetryInterval)
require.NoError(t, err)

cluster, err = operator.Instance().GetStorageCluster(cluster.Name, cluster.Namespace)
require.NoError(t, err)
cluster.Annotations["portworx.io/storage-pdb-min-available"] = fmt.Sprintf("%d", k8snodecount-2)
cluster, err = ci_utils.UpdateStorageCluster(cluster)
require.NoError(t, err)
logrus.Infof("Validating number of nodes ready for upgrade with minAvailable annotation %d after disabling non-disruptive upgrade", k8snodecount-2)
err = testutil.ValidateNodesSelectedForUpgrade(cluster, k8snodecount-2, ci_utils.DefaultValidateUpgradeTimeout, ci_utils.DefaultValidateUpgradeRetryInterval)
require.NoError(t, err)

err = ci_utils.UncordonNodes()
require.NoError(t, err)
}
ci_utils.UninstallAndValidateStorageCluster(cluster, t)

}
}
func TestNodePDB(t *testing.T) {
for _, tc := range testNodePDBCases {
tc.RunTest(t)
}
}
6 changes: 3 additions & 3 deletions test/integration_test/pdb_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,7 @@ var testStorageClusterPDBCases = []types.TestCase{
return true
}
k8sVersion, _ := version.NewVersion(kbVer)
return ci_utils.PxOperatorVersion.LessThan(ci_utils.PxOperatorVer1_5_0) || k8sVersion.LessThan(minSupportedK8sVersionForPdb)
return ci_utils.PxOperatorVersion.LessThan(ci_utils.PxOperatorVer1_5_0) || k8sVersion.LessThan(minSupportedK8sVersionForPdb) || ci_utils.PxOperatorVersion.GreaterThanOrEqual(ci_utils.PxOperatorVer24_2_0)
},
TestFunc: StoragelessNodePDB,
},
Expand All @@ -54,7 +54,7 @@ var testStorageClusterPDBCases = []types.TestCase{
return true
}
k8sVersion, _ := version.NewVersion(kbVer)
return ci_utils.PxOperatorVersion.LessThan(ci_utils.PxOperatorVer23_10_2) || k8sVersion.LessThan(minSupportedK8sVersionForPdb)
return ci_utils.PxOperatorVersion.LessThan(ci_utils.PxOperatorVer23_10_2) || k8sVersion.LessThan(minSupportedK8sVersionForPdb) || ci_utils.PxOperatorVersion.GreaterThanOrEqual(ci_utils.PxOperatorVer24_2_0)
},
TestFunc: OverridePDBUsingValidAnnotation,
},
Expand All @@ -72,7 +72,7 @@ var testStorageClusterPDBCases = []types.TestCase{
return true
}
k8sVersion, _ := version.NewVersion(kbVer)
return ci_utils.PxOperatorVersion.LessThan(ci_utils.PxOperatorVer24_1_0) || k8sVersion.LessThan(minSupportedK8sVersionForPdb)
return ci_utils.PxOperatorVersion.LessThan(ci_utils.PxOperatorVer24_1_0) || k8sVersion.LessThan(minSupportedK8sVersionForPdb) || ci_utils.PxOperatorVersion.GreaterThanOrEqual(ci_utils.PxOperatorVer24_2_0)
},
TestFunc: OverridePDBUsingInvalidAnnotation,
},
Expand Down
Loading

0 comments on commit 6340944

Please sign in to comment.