From 729e5e346866c989176a5a250880a41a3c5b97bb Mon Sep 17 00:00:00 2001 From: Neelesh Thakur Date: Wed, 5 Jun 2024 14:22:39 -0600 Subject: [PATCH] PWX-37573: tighten the checks for avoiding unnecessary VM live migration If the live-migration is in progress for a VM that we want to evict, do not create another live migration for that VM in the same Reconcile() cycle. Just before starting a live-migration, check the VMI one more time to verify that the VMI is pointing to the same node as the pod being evicted. These two extra checks reduce the window in which PX and operator may try to live-migrate the same VM out of the same node. Also, it handles any other unexpected live-migrations that might start. Signed-off-by: Neelesh Thakur --- .../storagecluster/controller_test.go | 29 ++++++-- pkg/controller/storagecluster/kubevirt.go | 66 +++++++++++++------ .../storagecluster/kubevirt_test.go | 59 +++++++++++++++-- pkg/controller/storagecluster/update.go | 2 +- pkg/mock/kubevirtmanager.mock.go | 8 +-- pkg/util/util.go | 9 +++ 6 files changed, 136 insertions(+), 37 deletions(-) diff --git a/pkg/controller/storagecluster/controller_test.go b/pkg/controller/storagecluster/controller_test.go index d8691cef0..ca4652c71 100644 --- a/pkg/controller/storagecluster/controller_test.go +++ b/pkg/controller/storagecluster/controller_test.go @@ -2695,9 +2695,10 @@ func TestKubevirtVMsDuringUpgrade(t *testing.T) { k8sNodes[1].Name: true, k8sNodes[2].Name: true, } + evictions := getVMPodEvictions(t, vmPods) kubevirt.EXPECT().GetVMPodsToEvictByNode(wantNodes).Return( - map[string][]v1.Pod{k8sNodes[1].Name: vmPods}, nil) - kubevirt.EXPECT().StartEvictingVMPods(vmPods, gomock.Any(), gomock.Any()) + map[string][]*util.VMPodEviction{k8sNodes[1].Name: evictions}, nil) + kubevirt.EXPECT().StartEvictingVMPods(evictions, gomock.Any(), gomock.Any()) result, err = controller.Reconcile(context.TODO(), request) require.NoError(t, err) @@ -2750,13 +2751,16 @@ func TestKubevirtVMsDuringUpgrade(t *testing.T) { }, }, } + evictionsNode0 := getVMPodEvictions(t, vmPodsNode0) + evictionsNode2 := getVMPodEvictions(t, vmPodsNode2) + kubevirt.EXPECT().ClusterHasVMPods().Return(true, nil) - kubevirt.EXPECT().GetVMPodsToEvictByNode(wantNodes).Return(map[string][]v1.Pod{ - k8sNodes[0].Name: vmPodsNode0, - k8sNodes[2].Name: vmPodsNode2, + kubevirt.EXPECT().GetVMPodsToEvictByNode(wantNodes).Return(map[string][]*util.VMPodEviction{ + k8sNodes[0].Name: evictionsNode0, + k8sNodes[2].Name: evictionsNode2, }, nil) - kubevirt.EXPECT().StartEvictingVMPods(vmPodsNode0, gomock.Any(), gomock.Any()) - kubevirt.EXPECT().StartEvictingVMPods(vmPodsNode2, gomock.Any(), gomock.Any()) + kubevirt.EXPECT().StartEvictingVMPods(evictionsNode0, gomock.Any(), gomock.Any()) + kubevirt.EXPECT().StartEvictingVMPods(evictionsNode2, gomock.Any(), gomock.Any()) result, err = controller.Reconcile(context.TODO(), request) require.NoError(t, err) @@ -10669,3 +10673,14 @@ func getNode(t *testing.T, k8sclient client.Client, nodeName string) *v1.Node { require.NoError(t, err) return node } + +func getVMPodEvictions(t *testing.T, podsToEvict []v1.Pod) []*util.VMPodEviction { + var evictions []*util.VMPodEviction + for _, vmPod := range podsToEvict { + evictions = append(evictions, &util.VMPodEviction{ + PodToEvict: vmPod, + LiveMigrationInProgress: false, + }) + } + return evictions +} diff --git a/pkg/controller/storagecluster/kubevirt.go b/pkg/controller/storagecluster/kubevirt.go index 2bc84fe65..3c00dea14 100644 --- a/pkg/controller/storagecluster/kubevirt.go +++ b/pkg/controller/storagecluster/kubevirt.go @@ -10,6 +10,7 @@ import ( metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "github.com/libopenstorage/operator/pkg/constants" + "github.com/libopenstorage/operator/pkg/util" coreops "github.com/portworx/sched-ops/k8s/core" kubevirt "github.com/portworx/sched-ops/k8s/kubevirt-dynamic" ) @@ -18,11 +19,11 @@ type KubevirtManager interface { // ClusterHasVMPods returns true if the cluster has any KubeVirt VM Pods (running or not) ClusterHasVMPods() (bool, error) - // GetVMPodsToEvictByNode returns a map of node name to a list of virt-launcher pods that are live-migratable - GetVMPodsToEvictByNode(wantNodes map[string]bool) (map[string][]v1.Pod, error) + // GetVMPodsToEvictByNode returns a map of node name to a list of virt-launcher pods that need to be evicted + GetVMPodsToEvictByNode(wantNodes map[string]bool) (map[string][]*util.VMPodEviction, error) // StartEvictingVMPods starts live-migrating the virt-launcher pods to other nodes - StartEvictingVMPods(virtLauncherPods []v1.Pod, controllerRevisionHash string, + StartEvictingVMPods(virtLauncherPods []*util.VMPodEviction, controllerRevisionHash string, failedToEvictVMEventFunc func(message string)) } @@ -53,8 +54,8 @@ func (k *kubevirtManagerImpl) ClusterHasVMPods() (bool, error) { return len(virtLauncherPods) > 0, nil } -func (k *kubevirtManagerImpl) GetVMPodsToEvictByNode(wantNodes map[string]bool) (map[string][]v1.Pod, error) { - virtLauncherPodsByNode := map[string][]v1.Pod{} +func (k *kubevirtManagerImpl) GetVMPodsToEvictByNode(wantNodes map[string]bool) (map[string][]*util.VMPodEviction, error) { + virtLauncherPodsByNode := map[string][]*util.VMPodEviction{} // get a list of virt-launcher pods for each node virtLauncherPods, err := k.getVirtLauncherPods() if err != nil { @@ -64,24 +65,37 @@ func (k *kubevirtManagerImpl) GetVMPodsToEvictByNode(wantNodes map[string]bool) if !wantNodes[pod.Spec.NodeName] { continue } - shouldEvict, err := k.shouldLiveMigrateVM(&pod) + shouldEvict, migrInProgress, err := k.shouldLiveMigrateVM(&pod) if err != nil { return nil, err } if shouldEvict { - virtLauncherPodsByNode[pod.Spec.NodeName] = append(virtLauncherPodsByNode[pod.Spec.NodeName], pod) + virtLauncherPodsByNode[pod.Spec.NodeName] = append( + virtLauncherPodsByNode[pod.Spec.NodeName], + &util.VMPodEviction{ + PodToEvict: pod, + LiveMigrationInProgress: migrInProgress, + }, + ) } } return virtLauncherPodsByNode, nil } func (k *kubevirtManagerImpl) StartEvictingVMPods( - virtLauncherPods []v1.Pod, controllerRevisionHash string, failedToEvictVMEventFunc func(message string), + evictions []*util.VMPodEviction, controllerRevisionHash string, failedToEvictVMEventFunc func(message string), ) { ctx := context.TODO() OUTER: - for _, pod := range virtLauncherPods { - vmiName := k.getVMIName(&pod) + for _, eviction := range evictions { + pod := &eviction.PodToEvict + if eviction.LiveMigrationInProgress { + // Wait until the next Reconcile() cycle to check if the live-migration is completed. + logrus.Infof("Skipping eviction of pod virt-launcher pod %s/%s until the next reconcile cycle", + pod.Namespace, pod.Name) + continue + } + vmiName := k.getVMIName(pod) if vmiName == "" { // vmName should not be empty. Don't pause upgrade for such badly formed pods. logrus.Warnf("Failed to get VMI name for virt-launcher pod %s/%s", pod.Namespace, pod.Name) @@ -119,6 +133,20 @@ OUTER: continue OUTER } } + // Check if the VMI is still pointing to the same node as the virt-launcher pod. We already checked for this + // in shouldLiveMigrateVM() but we need to check again here because the VMI could have been live-migrated in + // the meantime. This reduces the chance of unnecessary live-migrations but does not close the hole fully. + vmi, err := k.kubevirtOps.GetVirtualMachineInstance(ctx, pod.Namespace, vmiName) + if err != nil { + logrus.Warnf("Failed to get VMI %s when evicting pod %s/%s: %v", vmiName, pod.Namespace, pod.Name, err) + continue + } + if vmi.NodeName != pod.Spec.NodeName { + logrus.Infof("VMI %s/%s is running on node %s, not on node %s. Eviction not needed for pod %s.", + pod.Namespace, vmiName, vmi.NodeName, pod.Spec.NodeName, pod.Name) + continue + } + // All checks passed. Start the live-migration. labels := map[string]string{ constants.OperatorLabelManagedByKey: constants.OperatorLabelManagedByValue, } @@ -154,20 +182,20 @@ func (k *kubevirtManagerImpl) getVMIMigrations( return ret, nil } -func (k *kubevirtManagerImpl) shouldLiveMigrateVM(virtLauncherPod *v1.Pod) (bool, error) { +func (k *kubevirtManagerImpl) shouldLiveMigrateVM(virtLauncherPod *v1.Pod) (bool, bool, error) { // we only care about the pods that are not in a terminal state if virtLauncherPod.Status.Phase == v1.PodSucceeded || virtLauncherPod.Status.Phase == v1.PodFailed { - return false, nil + return false, false, nil } vmiName := k.getVMIName(virtLauncherPod) if vmiName == "" { logrus.Warnf("Failed to get VMI name for virt-launcher pod %s/%s. Skipping live-migration.", virtLauncherPod.Namespace, virtLauncherPod.Name) - return false, nil + return false, false, nil } migrations, err := k.getVMIMigrations(virtLauncherPod.Namespace, vmiName) if err != nil { - return false, err + return false, false, err } for _, migration := range migrations { if !migration.Completed { @@ -177,17 +205,17 @@ func (k *kubevirtManagerImpl) shouldLiveMigrateVM(virtLauncherPod *v1.Pod) (bool // Return "shouldEvict=true" and deal with it later. logrus.Infof("Will check whether to evict pod %s/%s after the live-migration %s (%s) is completed.", virtLauncherPod.Namespace, virtLauncherPod.Name, migration.Name, migration.Phase) - return true, nil + return true, true, nil } } // get VMI to check if the VM is live-migratable and if it is running on the same node as the virt-launcher pod vmi, err := k.kubevirtOps.GetVirtualMachineInstance(context.TODO(), virtLauncherPod.Namespace, vmiName) if err != nil { if !errors.IsNotFound(err) { - return false, fmt.Errorf("failed to get VMI %s/%s: %w", virtLauncherPod.Namespace, vmiName, err) + return false, false, fmt.Errorf("failed to get VMI %s/%s: %w", virtLauncherPod.Namespace, vmiName, err) } logrus.Warnf("VMI %s/%s was not found; skipping live-migration: %v", virtLauncherPod.Namespace, vmiName, err) - return false, nil + return false, false, nil } // We already checked that there is no live migration in progress for this VMI. // Ignore this pod if VMI says that the VM is running on another node. This can happen if @@ -196,10 +224,10 @@ func (k *kubevirtManagerImpl) shouldLiveMigrateVM(virtLauncherPod *v1.Pod) (bool if vmi.NodeName != virtLauncherPod.Spec.NodeName { logrus.Infof("VMI %s/%s is running on node %s, not on node %s. Skipping eviction of pod %s.", virtLauncherPod.Namespace, vmiName, vmi.NodeName, virtLauncherPod.Spec.NodeName, virtLauncherPod.Name) - return false, nil + return false, false, nil } // Ignore the VMs that are not live-migratable. - return vmi.LiveMigratable, nil + return vmi.LiveMigratable, false, nil } func (k *kubevirtManagerImpl) getVirtLauncherPods() ([]v1.Pod, error) { diff --git a/pkg/controller/storagecluster/kubevirt_test.go b/pkg/controller/storagecluster/kubevirt_test.go index 3d1dab810..b904d7387 100644 --- a/pkg/controller/storagecluster/kubevirt_test.go +++ b/pkg/controller/storagecluster/kubevirt_test.go @@ -7,6 +7,7 @@ import ( "github.com/libopenstorage/operator/pkg/constants" "github.com/libopenstorage/operator/pkg/mock/mockcore" "github.com/libopenstorage/operator/pkg/mock/mockkubevirtdy" + "github.com/libopenstorage/operator/pkg/util" kubevirt "github.com/portworx/sched-ops/k8s/kubevirt-dynamic" "github.com/stretchr/testify/assert" "github.com/stretchr/testify/require" @@ -89,7 +90,8 @@ func TestGetVMPodsToEvictByNode(t *testing.T) { require.NotEmpty(t, pods) require.Len(t, pods, 1) require.Len(t, pods[virtLauncherPod.Spec.NodeName], 1) - require.Equal(t, "virt-launcher-1", pods["node1"][0].Name) + require.Equal(t, "virt-launcher-1", pods["node1"][0].PodToEvict.Name) + require.False(t, pods["node1"][0].LiveMigrationInProgress) } // Test case: completed or failed virt-launcher pod should be ignored @@ -164,7 +166,8 @@ func TestGetVMPodsToEvictByNode(t *testing.T) { require.NotEmpty(t, pods) require.Len(t, pods, 1) require.Len(t, pods[virtLauncherPod.Spec.NodeName], 1) - require.Equal(t, "virt-launcher-1", pods["node1"][0].Name) + require.Equal(t, "virt-launcher-1", pods["node1"][0].PodToEvict.Name) + require.True(t, pods["node1"][0].LiveMigrationInProgress) // Test case: VM was migrated out already but the source virt-launcher pod has not finished yet. // VMI migration in completed state and VMI is pointing to a different node. @@ -204,9 +207,37 @@ func TestStartEvictingVMPods(t *testing.T) { // Test case: no migration exists virtLauncherPod, vmi := getTestVirtLauncherPodAndVMI("virt-launcher-1", "node1") mockKubeVirtOps.EXPECT().ListVirtualMachineInstanceMigrations(gomock.Any(), "", gomock.Any()).Return(nil, nil) + mockKubeVirtOps.EXPECT().GetVirtualMachineInstance(gomock.Any(), gomock.Any(), vmi.Name).Return(vmi, nil) mockKubeVirtOps.EXPECT().CreateVirtualMachineInstanceMigrationWithParams(gomock.Any(), "", vmi.Name, "", "", expectedAnnotations, expectedLabels).Return(nil, nil) - kvmgr.StartEvictingVMPods([]v1.Pod{*virtLauncherPod}, hash, func(message string) {}) + kvmgr.StartEvictingVMPods([]*util.VMPodEviction{ + { + PodToEvict: *virtLauncherPod, + LiveMigrationInProgress: false, + }, + }, hash, func(message string) {}) + + // Test case: migration was in progress when we checked shouldLiveMigrate + virtLauncherPod, _ = getTestVirtLauncherPodAndVMI("virt-launcher-1", "node1") + kvmgr.StartEvictingVMPods([]*util.VMPodEviction{ + { + PodToEvict: *virtLauncherPod, + LiveMigrationInProgress: true, + }, + }, hash, func(message string) {}) + + // Test case: VMI says that VM is running on a different node + virtLauncherPod, vmi = getTestVirtLauncherPodAndVMI("virt-launcher-1", "node1") + // vmi is running on node2 even though there is a running virt-launcher pod on node1 + vmi.NodeName = "node2" + mockKubeVirtOps.EXPECT().ListVirtualMachineInstanceMigrations(gomock.Any(), "", gomock.Any()).Return(nil, nil) + mockKubeVirtOps.EXPECT().GetVirtualMachineInstance(gomock.Any(), gomock.Any(), vmi.Name).Return(vmi, nil) + kvmgr.StartEvictingVMPods([]*util.VMPodEviction{ + { + PodToEvict: *virtLauncherPod, + LiveMigrationInProgress: false, + }, + }, hash, func(message string) {}) // Test case: migration in progress for the same VMI virtLauncherPod, vmi = getTestVirtLauncherPodAndVMI("virt-launcher-1", "node1") @@ -220,7 +251,12 @@ func TestStartEvictingVMPods(t *testing.T) { mockKubeVirtOps.EXPECT().ListVirtualMachineInstanceMigrations( gomock.Any(), "", gomock.Any()).Return(migrInProgress, nil) // No expectation for call to CreateMigration since no new migration should be created - kvmgr.StartEvictingVMPods([]v1.Pod{*virtLauncherPod}, hash, func(message string) {}) + kvmgr.StartEvictingVMPods([]*util.VMPodEviction{ + { + PodToEvict: *virtLauncherPod, + LiveMigrationInProgress: false, + }, + }, hash, func(message string) {}) // Test case: failed migration for the same VMI with the same controller revision hash from the same sourceNode. // Should not create a new migration. @@ -239,7 +275,12 @@ func TestStartEvictingVMPods(t *testing.T) { mockKubeVirtOps.EXPECT().ListVirtualMachineInstanceMigrations(gomock.Any(), "", gomock.Any()).Return(migrFailed, nil) // No expectation for call to CreateMigration since no new migration should be created eventMsg := "" - kvmgr.StartEvictingVMPods([]v1.Pod{*virtLauncherPod}, hash, func(message string) { eventMsg = message }) + kvmgr.StartEvictingVMPods([]*util.VMPodEviction{ + { + PodToEvict: *virtLauncherPod, + LiveMigrationInProgress: false, + }, + }, hash, func(message string) { eventMsg = message }) require.Contains(t, eventMsg, "Stop or migrate the VM so that the update of the storage node can proceed") // Test case: Failed or in-progress migrations for a different VMI, different revision hash, different source node etc. @@ -286,9 +327,15 @@ func TestStartEvictingVMPods(t *testing.T) { } mockKubeVirtOps.EXPECT().ListVirtualMachineInstanceMigrations( gomock.Any(), "", gomock.Any()).Return(migrations, nil) + mockKubeVirtOps.EXPECT().GetVirtualMachineInstance(gomock.Any(), gomock.Any(), vmi.Name).Return(vmi, nil) mockKubeVirtOps.EXPECT().CreateVirtualMachineInstanceMigrationWithParams(gomock.Any(), "", vmi.Name, "", "", expectedAnnotations, expectedLabels).Return(nil, nil) - kvmgr.StartEvictingVMPods([]v1.Pod{*virtLauncherPod}, hash, func(message string) {}) + kvmgr.StartEvictingVMPods([]*util.VMPodEviction{ + { + PodToEvict: *virtLauncherPod, + LiveMigrationInProgress: false, + }, + }, hash, func(message string) {}) } func getTestVirtLauncherPodAndVMI(podName, nodeName string) (*v1.Pod, *kubevirt.VirtualMachineInstance) { diff --git a/pkg/controller/storagecluster/update.go b/pkg/controller/storagecluster/update.go index a872d9707..aeb8bdedb 100644 --- a/pkg/controller/storagecluster/update.go +++ b/pkg/controller/storagecluster/update.go @@ -164,7 +164,7 @@ func (c *Controller) rollingUpdate(cluster *corev1.StorageCluster, hash string, } // check if we should live-migrate VMs before updating the storage node - virtLauncherPodsByNode := map[string][]v1.Pod{} + virtLauncherPodsByNode := map[string][]*operatorutil.VMPodEviction{} if len(oldPodsToDelete) > 0 && !forceContinueUpgrade(cluster) && evictVMsDuringUpdate(cluster) { vmPodsPresent, err := c.kubevirt.ClusterHasVMPods() if err != nil { diff --git a/pkg/mock/kubevirtmanager.mock.go b/pkg/mock/kubevirtmanager.mock.go index 465c6925d..198254230 100644 --- a/pkg/mock/kubevirtmanager.mock.go +++ b/pkg/mock/kubevirtmanager.mock.go @@ -8,7 +8,7 @@ import ( reflect "reflect" gomock "github.com/golang/mock/gomock" - v1 "k8s.io/api/core/v1" + util "github.com/libopenstorage/operator/pkg/util" ) // MockKubevirtManager is a mock of KubevirtManager interface. @@ -50,10 +50,10 @@ func (mr *MockKubevirtManagerMockRecorder) ClusterHasVMPods() *gomock.Call { } // GetVMPodsToEvictByNode mocks base method. -func (m *MockKubevirtManager) GetVMPodsToEvictByNode(arg0 map[string]bool) (map[string][]v1.Pod, error) { +func (m *MockKubevirtManager) GetVMPodsToEvictByNode(arg0 map[string]bool) (map[string][]*util.VMPodEviction, error) { m.ctrl.T.Helper() ret := m.ctrl.Call(m, "GetVMPodsToEvictByNode", arg0) - ret0, _ := ret[0].(map[string][]v1.Pod) + ret0, _ := ret[0].(map[string][]*util.VMPodEviction) ret1, _ := ret[1].(error) return ret0, ret1 } @@ -65,7 +65,7 @@ func (mr *MockKubevirtManagerMockRecorder) GetVMPodsToEvictByNode(arg0 interface } // StartEvictingVMPods mocks base method. -func (m *MockKubevirtManager) StartEvictingVMPods(arg0 []v1.Pod, arg1 string, arg2 func(string)) { +func (m *MockKubevirtManager) StartEvictingVMPods(arg0 []*util.VMPodEviction, arg1 string, arg2 func(string)) { m.ctrl.T.Helper() m.ctrl.Call(m, "StartEvictingVMPods", arg0, arg1, arg2) } diff --git a/pkg/util/util.go b/pkg/util/util.go index 3d715cb79..d2a34fbf6 100644 --- a/pkg/util/util.go +++ b/pkg/util/util.go @@ -89,6 +89,15 @@ const ( ValidMinAvailable = "ValidMinAvailable" ) +// VMPodEviction has info about the virt-launcher pod that needs to be evicted before upgrading PX on a node +type VMPodEviction struct { + // PodToEvict is the virt-launcher pod that needs to be evicted + PodToEvict v1.Pod + // LiveMigrationInProgress is true if in-progress live-migration exists for this VM. In this case, the eviction + // should be skipped until the next reconcile cycle + LiveMigrationInProgress bool +} + var ( // commonDockerRegistries is a map of commonly used Docker registries commonDockerRegistries = map[string]bool{