From 6340944d83f53c43da821e89ca7aefe2aac7bbfc Mon Sep 17 00:00:00 2001 From: svijaykumar-px Date: Tue, 9 Jul 2024 06:52:29 +0000 Subject: [PATCH] adding integration test to validate node pdb --- pkg/util/test/util.go | 118 +++++++++- test/integration_test/node_pdb_test.go | 205 ++++++++++++++++++ test/integration_test/pdb_test.go | 6 +- test/integration_test/utils/k8s.go | 59 +++++ test/integration_test/utils/px_operator.go | 2 + test/integration_test/utils/storagecluster.go | 15 -- 6 files changed, 382 insertions(+), 23 deletions(-) create mode 100644 test/integration_test/node_pdb_test.go diff --git a/pkg/util/test/util.go b/pkg/util/test/util.go index 74eef47591..a54263658b 100644 --- a/pkg/util/test/util.go +++ b/pkg/util/test/util.go @@ -217,6 +217,7 @@ var ( opVer23_10_2, _ = version.NewVersion("23.10.2-") OpVer23_10_3, _ = version.NewVersion("23.10.3-") opVer24_1_0, _ = version.NewVersion("24.1.0-") + opVer24_2_0, _ = version.NewVersion("24.2.0-") minOpVersionForKubeSchedConfig, _ = version.NewVersion("1.10.2-") minimumCcmGoVersionCO, _ = version.NewVersion("1.2.3") @@ -227,9 +228,10 @@ var ( // OCP Dynamic Plugin is only supported in starting with OCP 4.12+ which is k8s v1.25.0+ minK8sVersionForDynamicPlugin, _ = version.NewVersion("1.25.0") - pxVer2_13, _ = version.NewVersion("2.13") - pxVer3_0, _ = version.NewVersion("3.0") - pxVer3_1, _ = version.NewVersion("3.1") + pxVer2_13, _ = version.NewVersion("2.13") + pxVer3_0, _ = version.NewVersion("3.0") + pxVer3_1, _ = version.NewVersion("3.1") + pxVer3_1_2, _ = version.NewVersion("3.1.2") // minimumPxVersionCCMJAVA minimum PX version to install ccm-java minimumPxVersionCCMJAVA, _ = version.NewVersion("2.8") @@ -5337,7 +5339,7 @@ func ValidateTelemetryV1Enabled(pxImageList map[string]string, cluster *corev1.S // ValidatePodDisruptionBudget validates the value of minavailable and number of disruptions for px-storage poddisruptionbudget func ValidatePodDisruptionBudget(cluster *corev1.StorageCluster, timeout, interval time.Duration) error { - logrus.Info("Validate px-storage poddisruptionbudget minAvailable and allowed disruptions") + logrus.Info("Validate portworx storage poddisruptionbudget") kbVer, err := GetK8SVersion() if err != nil { @@ -5348,10 +5350,47 @@ func ValidatePodDisruptionBudget(cluster *corev1.StorageCluster, timeout, interv if err != nil { return err } + pxVersion := GetPortworxVersion(cluster) // PodDisruptionBudget is supported for k8s version greater than or equal to 1.21 and operator version greater than or equal to 1.5.0 // Changing opVersion to 23.10.0 for PTX-23350 | TODO: add better logic with PTX-23407 - if k8sVersion.GreaterThanOrEqual(minSupportedK8sVersionForPdb) && opVersion.GreaterThanOrEqual(opVer23_10) { + + // Smart and parallel upgrades is supported from px version 3.1.2 and operator version 24.2.0 + if k8sVersion.GreaterThanOrEqual(minSupportedK8sVersionForPdb) && opVersion.GreaterThanOrEqual(opVer24_2_0) && pxVersion.GreaterThanOrEqual(pxVer3_1_2) { + t := func() (interface{}, bool, error) { + nodes, err := operatorops.Instance().ListStorageNodes(cluster.Namespace) + if err != nil { + return nil, true, fmt.Errorf("failed to get storage nodes, Err: %v", err) + } + availablenodes := 0 + for _, node := range nodes.Items { + if *node.Status.NodeAttributes.Storage && node.Status.Phase == "Online" { + availablenodes++ + } else { + logrus.Infof("Node [%s] in state [%s] is not Online, PDB might be incorrect", node.Name, node.Status.Phase) + } + } + pdbs, err := policyops.Instance().ListPodDisruptionBudget(cluster.Namespace) + if err != nil { + return nil, true, fmt.Errorf("failed to list all poddisruptionbudgets, Err: %v", err) + } + actualNodePDBcount := 0 + for _, pdb := range pdbs.Items { + if strings.HasPrefix(pdb.Name, "px-") && pdb.Name != "px-kvdb" { + actualNodePDBcount++ + } + } + if actualNodePDBcount == availablenodes { + return nil, false, nil + } + return nil, true, fmt.Errorf("incorrect node PDB count. Expected node PDB count [%d], Actual node PDB count [%d]", availablenodes, actualNodePDBcount) + + } + if _, err := task.DoRetryWithTimeout(t, timeout, interval); err != nil { + return err + } + return nil + } else if k8sVersion.GreaterThanOrEqual(minSupportedK8sVersionForPdb) && opVersion.GreaterThanOrEqual(opVer23_10) { // This is only for non async DR setup t := func() (interface{}, bool, error) { @@ -5948,3 +5987,72 @@ func RestoreEtcHosts(t *testing.T) { assert.Equal(t, bb.Len(), n, "short write") fd.Close() } + +func ValidateNodePDB(cluster *corev1.StorageCluster, timeout, interval time.Duration) error { + t := func() (interface{}, bool, error) { + nodes, err := coreops.Instance().GetNodes() + if err != nil { + return nil, true, fmt.Errorf("failed to get storage nodes, Err: %v", err) + } + pxNodesMap := make(map[string]bool) + + pdbs, err := policyops.Instance().ListPodDisruptionBudget(cluster.Namespace) + if err != nil { + return nil, true, fmt.Errorf("failed to get px-storage poddisruptionbudget, Err: %v", err) + } + + for _, pdb := range pdbs.Items { + if strings.HasPrefix(pdb.Name, "px-") && pdb.Name != "px-kvdb" { + pxNodesMap[pdb.Name] = true + if pdb.Spec.MinAvailable.IntValue() != 1 { + return nil, true, fmt.Errorf("incorrect PDB minAvailable value for node %s. Expected PDB [%d], Actual PDB [%d]", strings.TrimPrefix(pdb.Name, "px-"), 1, pdb.Spec.MinAvailable.IntValue()) + } + } + } + + for _, node := range nodes.Items { + if coreops.Instance().IsNodeMaster(node) { + continue + } + if _, ok := pxNodesMap["px-"+node.Name]; !ok { + return nil, true, fmt.Errorf("PDB for node %s is missing", node.Name) + } + } + return nil, false, nil + } + if _, err := task.DoRetryWithTimeout(t, timeout, interval); err != nil { + return err + } + return nil +} + +func ValidateNodesSelectedForUpgrade(cluster *corev1.StorageCluster, quorumValue int, timeout, interval time.Duration) error { + t := func() (interface{}, bool, error) { + if quorumValue == -1 { + nodes, err := operatorops.Instance().ListStorageNodes(cluster.Namespace) + if err != nil { + return nil, true, fmt.Errorf("failed to get storage nodes, Err: %v", err) + } + quorumValue = (len(nodes.Items) / 2) + 1 + } + + pdbs, err := policyops.Instance().ListPodDisruptionBudget(cluster.Namespace) + if err != nil { + return nil, true, fmt.Errorf("failed to get px-storage poddisruptionbudget, Err: %v", err) + } + nodesReadyForUpgrade := 0 + for _, pdb := range pdbs.Items { + if strings.HasPrefix(pdb.Name, "px-") && pdb.Spec.MinAvailable.IntValue() == 0 { + nodesReadyForUpgrade++ + } + } + if nodesReadyForUpgrade < quorumValue { + return nil, false, nil + } + return nil, true, fmt.Errorf("nodes selected for upgrade are more than quorum value") + } + if _, err := task.DoRetryWithTimeout(t, timeout, interval); err != nil { + return err + } + return nil +} diff --git a/test/integration_test/node_pdb_test.go b/test/integration_test/node_pdb_test.go new file mode 100644 index 0000000000..fbef873272 --- /dev/null +++ b/test/integration_test/node_pdb_test.go @@ -0,0 +1,205 @@ +package integrationtest + +import ( + "fmt" + //"math" + "testing" + + "github.com/hashicorp/go-version" + corev1 "github.com/libopenstorage/operator/pkg/apis/core/v1" + testutil "github.com/libopenstorage/operator/pkg/util/test" + "github.com/libopenstorage/operator/test/integration_test/types" + ci_utils "github.com/libopenstorage/operator/test/integration_test/utils" + + "github.com/portworx/sched-ops/k8s/operator" + "github.com/sirupsen/logrus" + "github.com/stretchr/testify/require" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" +) + +var ( + pxVer3_1_2, _ = version.NewVersion("3.1.2") +) + +var testNodePDBCases = []types.TestCase{ + { + TestName: "CreateNodePDBBasic", + TestrailCaseIDs: []string{"C299571", "C299572"}, + TestSpec: ci_utils.CreateStorageClusterTestSpecFunc(&corev1.StorageCluster{ + ObjectMeta: metav1.ObjectMeta{Name: "test-stc"}, + }), + ShouldSkip: func(tc *types.TestCase) bool { + kbVer, err := testutil.GetK8SVersion() + if err != nil { + logrus.Info("Skipping PDB test due to Err: ", err) + return true + } + k8sVersion, _ := version.NewVersion(kbVer) + return ci_utils.PxOperatorVersion.LessThan(ci_utils.PxOperatorVer24_2_0) || k8sVersion.LessThan(minSupportedK8sVersionForPdb) + }, + TestFunc: CreateNodePDBBasic, + }, + { + TestName: "CreateNodePDBWithStoragelessNode", + TestrailCaseIDs: []string{"C299573"}, + TestSpec: ci_utils.CreateStorageClusterTestSpecFunc(&corev1.StorageCluster{ + ObjectMeta: metav1.ObjectMeta{Name: "test-stc"}, + }), + ShouldSkip: func(tc *types.TestCase) bool { + if len(ci_utils.PxDeviceSpecs) == 0 { + logrus.Info("--portworx-device-specs is empty, cannot run PDBWithStoragelessNode test") + return true + } + kbVer, err := testutil.GetK8SVersion() + if err != nil { + logrus.Info("Skipping PDB test due to Err: ", err) + return true + } + k8sVersion, _ := version.NewVersion(kbVer) + return ci_utils.PxOperatorVersion.LessThan(ci_utils.PxOperatorVer24_2_0) || k8sVersion.LessThan(minSupportedK8sVersionForPdb) + }, + TestFunc: CreateNodePDBWithStoragelessNode, + }, + { + TestName: "MaxNodesAvailableForUpgrade", + TestrailCaseIDs: []string{"C299574", "C299575"}, + TestSpec: ci_utils.CreateStorageClusterTestSpecFunc(&corev1.StorageCluster{ + ObjectMeta: metav1.ObjectMeta{Name: "test-stc"}, + }), + ShouldSkip: func(tc *types.TestCase) bool { + kbVer, err := testutil.GetK8SVersion() + if err != nil { + logrus.Info("Skipping PDB test due to Err: ", err) + return true + } + k8sVersion, _ := version.NewVersion(kbVer) + return ci_utils.PxOperatorVersion.LessThan(ci_utils.PxOperatorVer24_2_0) || k8sVersion.LessThan(minSupportedK8sVersionForPdb) + }, + TestFunc: MaxNodesAvailableForUpgrade, + }, + { + TestName: "NodePDBDisablingParallelUpgrade", + TestrailCaseIDs: []string{"C299576", "C299577"}, + TestSpec: ci_utils.CreateStorageClusterTestSpecFunc(&corev1.StorageCluster{ + ObjectMeta: metav1.ObjectMeta{Name: "test-stc"}, + }), + ShouldSkip: func(tc *types.TestCase) bool { + kbVer, err := testutil.GetK8SVersion() + if err != nil { + logrus.Info("Skipping PDB test due to Err: ", err) + return true + } + k8sVersion, _ := version.NewVersion(kbVer) + return ci_utils.PxOperatorVersion.LessThan(ci_utils.PxOperatorVer24_2_0) || k8sVersion.LessThan(minSupportedK8sVersionForPdb) + }, + TestFunc: NodePDBDisablingParallelUpgrade, + }, +} + +func CreateNodePDBBasic(tc *types.TestCase) func(*testing.T) { + return func(t *testing.T) { + testSpec := tc.TestSpec(t) + cluster, ok := testSpec.(*corev1.StorageCluster) + require.True(t, ok) + cluster = ci_utils.DeployAndValidateStorageCluster(cluster, ci_utils.PxSpecImages, t) + pxVersion := testutil.GetPortworxVersion(cluster) + + if pxVersion.GreaterThanOrEqual(pxVer3_1_2) { + logrus.Infof("Validating Node PDB names and default minAvailable") + err := testutil.ValidateNodePDB(cluster, ci_utils.DefaultValidateUpgradeTimeout, ci_utils.DefaultValidateUpgradeRetryInterval) + require.NoError(t, err) + } + ci_utils.UninstallAndValidateStorageCluster(cluster, t) + } + +} +func CreateNodePDBWithStoragelessNode(tc *types.TestCase) func(*testing.T) { + return func(t *testing.T) { + testSpec := tc.TestSpec(t) + cluster, ok := testSpec.(*corev1.StorageCluster) + require.True(t, ok) + + *cluster.Spec.CloudStorage.MaxStorageNodesPerZone = uint32(3) + logrus.Info("Validating PDB with storageless nodes using maxstoragenodesperzone value: ", *cluster.Spec.CloudStorage.MaxStorageNodesPerZone) + cluster = ci_utils.DeployAndValidateStorageCluster(cluster, ci_utils.PxSpecImages, t) + ci_utils.UninstallAndValidateStorageCluster(cluster, t) + + } +} + +func MaxNodesAvailableForUpgrade(tc *types.TestCase) func(*testing.T) { + return func(t *testing.T) { + testSpec := tc.TestSpec(t) + cluster, ok := testSpec.(*corev1.StorageCluster) + require.True(t, ok) + cluster = ci_utils.DeployAndValidateStorageCluster(cluster, ci_utils.PxSpecImages, t) + pxVersion := testutil.GetPortworxVersion(cluster) + + if pxVersion.GreaterThanOrEqual(pxVer3_1_2) { + err := ci_utils.CordonNodes() + require.NoError(t, err) + + logrus.Infof("Validating number of nodes ready for upgrade without minAvailable annotation") + err = testutil.ValidateNodesSelectedForUpgrade(cluster, -1, ci_utils.DefaultValidateUpgradeTimeout, ci_utils.DefaultValidateUpgradeRetryInterval) + require.NoError(t, err) + + k8snodecount, err := ci_utils.GetNonMasterK8sNodeCount() + require.NoError(t, err) + cluster, err = operator.Instance().GetStorageCluster(cluster.Name, cluster.Namespace) + require.NoError(t, err) + cluster.Annotations["portworx.io/storage-pdb-min-available"] = fmt.Sprintf("%d", k8snodecount-1) + cluster, err = ci_utils.UpdateStorageCluster(cluster) + require.NoError(t, err) + + logrus.Infof("Validating number of nodes ready for upgrade with minAvailable annotation %d", k8snodecount-1) + err = testutil.ValidateNodesSelectedForUpgrade(cluster, k8snodecount-1, ci_utils.DefaultValidateUpgradeTimeout, ci_utils.DefaultValidateUpgradeRetryInterval) + require.NoError(t, err) + + err = ci_utils.UncordonNodes() + require.NoError(t, err) + } + ci_utils.UninstallAndValidateStorageCluster(cluster, t) + } +} + +func NodePDBDisablingParallelUpgrade(tc *types.TestCase) func(*testing.T) { + return func(t *testing.T) { + testSpec := tc.TestSpec(t) + cluster, ok := testSpec.(*corev1.StorageCluster) + require.True(t, ok) + if cluster.Annotations == nil { + cluster.Annotations = make(map[string]string) + } + cluster.Annotations["portworx.io/disable-non-disruptive-upgrade"] = "true" + cluster = ci_utils.DeployAndValidateStorageCluster(cluster, ci_utils.PxSpecImages, t) + pxVersion := testutil.GetPortworxVersion(cluster) + if pxVersion.GreaterThanOrEqual(pxVer3_1_2) { + err := ci_utils.CordonNodes() + require.NoError(t, err) + k8snodecount, err := ci_utils.GetNonMasterK8sNodeCount() + require.NoError(t, err) + logrus.Infof("Validating number of nodes ready for upgrade without minAvailable annotation after disabling non-disruptive upgrade") + err = testutil.ValidateNodesSelectedForUpgrade(cluster, k8snodecount-1, ci_utils.DefaultValidateUpgradeTimeout, ci_utils.DefaultValidateUpgradeRetryInterval) + require.NoError(t, err) + + cluster, err = operator.Instance().GetStorageCluster(cluster.Name, cluster.Namespace) + require.NoError(t, err) + cluster.Annotations["portworx.io/storage-pdb-min-available"] = fmt.Sprintf("%d", k8snodecount-2) + cluster, err = ci_utils.UpdateStorageCluster(cluster) + require.NoError(t, err) + logrus.Infof("Validating number of nodes ready for upgrade with minAvailable annotation %d after disabling non-disruptive upgrade", k8snodecount-2) + err = testutil.ValidateNodesSelectedForUpgrade(cluster, k8snodecount-2, ci_utils.DefaultValidateUpgradeTimeout, ci_utils.DefaultValidateUpgradeRetryInterval) + require.NoError(t, err) + + err = ci_utils.UncordonNodes() + require.NoError(t, err) + } + ci_utils.UninstallAndValidateStorageCluster(cluster, t) + + } +} +func TestNodePDB(t *testing.T) { + for _, tc := range testNodePDBCases { + tc.RunTest(t) + } +} diff --git a/test/integration_test/pdb_test.go b/test/integration_test/pdb_test.go index 9a4d5c8c64..cc5694c62f 100644 --- a/test/integration_test/pdb_test.go +++ b/test/integration_test/pdb_test.go @@ -37,7 +37,7 @@ var testStorageClusterPDBCases = []types.TestCase{ return true } k8sVersion, _ := version.NewVersion(kbVer) - return ci_utils.PxOperatorVersion.LessThan(ci_utils.PxOperatorVer1_5_0) || k8sVersion.LessThan(minSupportedK8sVersionForPdb) + return ci_utils.PxOperatorVersion.LessThan(ci_utils.PxOperatorVer1_5_0) || k8sVersion.LessThan(minSupportedK8sVersionForPdb) || ci_utils.PxOperatorVersion.GreaterThanOrEqual(ci_utils.PxOperatorVer24_2_0) }, TestFunc: StoragelessNodePDB, }, @@ -54,7 +54,7 @@ var testStorageClusterPDBCases = []types.TestCase{ return true } k8sVersion, _ := version.NewVersion(kbVer) - return ci_utils.PxOperatorVersion.LessThan(ci_utils.PxOperatorVer23_10_2) || k8sVersion.LessThan(minSupportedK8sVersionForPdb) + return ci_utils.PxOperatorVersion.LessThan(ci_utils.PxOperatorVer23_10_2) || k8sVersion.LessThan(minSupportedK8sVersionForPdb) || ci_utils.PxOperatorVersion.GreaterThanOrEqual(ci_utils.PxOperatorVer24_2_0) }, TestFunc: OverridePDBUsingValidAnnotation, }, @@ -72,7 +72,7 @@ var testStorageClusterPDBCases = []types.TestCase{ return true } k8sVersion, _ := version.NewVersion(kbVer) - return ci_utils.PxOperatorVersion.LessThan(ci_utils.PxOperatorVer24_1_0) || k8sVersion.LessThan(minSupportedK8sVersionForPdb) + return ci_utils.PxOperatorVersion.LessThan(ci_utils.PxOperatorVer24_1_0) || k8sVersion.LessThan(minSupportedK8sVersionForPdb) || ci_utils.PxOperatorVersion.GreaterThanOrEqual(ci_utils.PxOperatorVer24_2_0) }, TestFunc: OverridePDBUsingInvalidAnnotation, }, diff --git a/test/integration_test/utils/k8s.go b/test/integration_test/utils/k8s.go index 5a7eaf6afc..48c34baf82 100644 --- a/test/integration_test/utils/k8s.go +++ b/test/integration_test/utils/k8s.go @@ -392,3 +392,62 @@ func validateSpec(in interface{}) (runtime.Object, error) { } return nil, fmt.Errorf("unsupported object: %v", reflect.TypeOf(in)) } + +func GetNonMasterK8sNodeCount() (int, error) { + nodes, err := coreops.Instance().GetNodes() + if err != nil { + return -1, err + } + nodesCount := 0 + for _, node := range nodes.Items { + if coreops.Instance().IsNodeMaster(node) { + continue + } + nodesCount++ + } + return nodesCount, nil +} + +func CordonNodes() error { + nodes, err := coreops.Instance().GetNodes() + if err != nil { + logrus.Errorf("failed to get storage nodes, Err: %v", err) + } + for _, node := range nodes.Items { + if coreops.Instance().IsNodeMaster(node) { + continue + } + currNode, err := coreops.Instance().GetNodeByName(node.Name) + if err != nil { + return fmt.Errorf("failed to get node %s, Err: %v", node.Name, err) + } + currNode.Spec.Unschedulable = true + _, err = coreops.Instance().UpdateNode(currNode) + if err != nil { + return fmt.Errorf("failed to cordon node %s, Err: %v", node.Name, err) + } + } + return nil +} + +func UncordonNodes() error { + nodes, err := coreops.Instance().GetNodes() + if err != nil { + logrus.Errorf("failed to get storage nodes, Err: %v", err) + } + for _, node := range nodes.Items { + if coreops.Instance().IsNodeMaster(node) { + continue + } + currNode, err := coreops.Instance().GetNodeByName(node.Name) + if err != nil { + return fmt.Errorf("failed to get node %s, Err: %v", node.Name, err) + } + currNode.Spec.Unschedulable = false + _, err = coreops.Instance().UpdateNode(currNode) + if err != nil { + return fmt.Errorf("failed to uncordon node %s, Err: %v", node.Name, err) + } + } + return nil +} diff --git a/test/integration_test/utils/px_operator.go b/test/integration_test/utils/px_operator.go index bc354a2a0c..b793d4525c 100644 --- a/test/integration_test/utils/px_operator.go +++ b/test/integration_test/utils/px_operator.go @@ -42,6 +42,8 @@ var ( PxOperatorVer23_10_3, _ = version.NewVersion("23.10.3-") // PxOperatorVer24_1_0 portworx-operator 24.1.0 version to correct invalid PDB minAvailable PxOperatorVer24_1_0, _ = version.NewVersion("24.1.0-") + // PxOperatorVer24_2_0 portworx-operator 24.2.0 version is minimum version smart and parallel upgrades is supported + PxOperatorVer24_2_0, _ = version.NewVersion("24.2.0-") ) // TODO: Install portworx-operator in test automation diff --git a/test/integration_test/utils/storagecluster.go b/test/integration_test/utils/storagecluster.go index 04bc8e2ddb..355d0eb53d 100644 --- a/test/integration_test/utils/storagecluster.go +++ b/test/integration_test/utils/storagecluster.go @@ -551,18 +551,3 @@ func ValidateStorageClusterComponents(cluster *corev1.StorageCluster) error { // TODO: Validate the components are running with expected configuration return nil } - -func GetNonMasterK8sNodeCount() (int, error) { - nodes, err := schedopsCore.Instance().GetNodes() - if err != nil { - return -1, err - } - nodesCount := 0 - for _, node := range nodes.Items { - if schedopsCore.Instance().IsNodeMaster(node) { - continue - } - nodesCount++ - } - return nodesCount, nil -}