diff --git a/pkg/controller/storagecluster/controller_test.go b/pkg/controller/storagecluster/controller_test.go index 2150cc7c4..d8691cef0 100644 --- a/pkg/controller/storagecluster/controller_test.go +++ b/pkg/controller/storagecluster/controller_test.go @@ -2690,7 +2690,13 @@ func TestKubevirtVMsDuringUpgrade(t *testing.T) { }, } kubevirt.EXPECT().ClusterHasVMPods().Return(true, nil) - kubevirt.EXPECT().GetVMPodsToEvictByNode().Return(map[string][]v1.Pod{k8sNodes[1].Name: vmPods}, nil) + wantNodes := map[string]bool{ + k8sNodes[0].Name: true, + k8sNodes[1].Name: true, + k8sNodes[2].Name: true, + } + kubevirt.EXPECT().GetVMPodsToEvictByNode(wantNodes).Return( + map[string][]v1.Pod{k8sNodes[1].Name: vmPods}, nil) kubevirt.EXPECT().StartEvictingVMPods(vmPods, gomock.Any(), gomock.Any()) result, err = controller.Reconcile(context.TODO(), request) @@ -2745,7 +2751,7 @@ func TestKubevirtVMsDuringUpgrade(t *testing.T) { }, } kubevirt.EXPECT().ClusterHasVMPods().Return(true, nil) - kubevirt.EXPECT().GetVMPodsToEvictByNode().Return(map[string][]v1.Pod{ + kubevirt.EXPECT().GetVMPodsToEvictByNode(wantNodes).Return(map[string][]v1.Pod{ k8sNodes[0].Name: vmPodsNode0, k8sNodes[2].Name: vmPodsNode2, }, nil) diff --git a/pkg/controller/storagecluster/kubevirt.go b/pkg/controller/storagecluster/kubevirt.go index a53b98196..2bc84fe65 100644 --- a/pkg/controller/storagecluster/kubevirt.go +++ b/pkg/controller/storagecluster/kubevirt.go @@ -19,7 +19,7 @@ type KubevirtManager interface { ClusterHasVMPods() (bool, error) // GetVMPodsToEvictByNode returns a map of node name to a list of virt-launcher pods that are live-migratable - GetVMPodsToEvictByNode() (map[string][]v1.Pod, error) + GetVMPodsToEvictByNode(wantNodes map[string]bool) (map[string][]v1.Pod, error) // StartEvictingVMPods starts live-migrating the virt-launcher pods to other nodes StartEvictingVMPods(virtLauncherPods []v1.Pod, controllerRevisionHash string, @@ -53,7 +53,7 @@ func (k *kubevirtManagerImpl) ClusterHasVMPods() (bool, error) { return len(virtLauncherPods) > 0, nil } -func (k *kubevirtManagerImpl) GetVMPodsToEvictByNode() (map[string][]v1.Pod, error) { +func (k *kubevirtManagerImpl) GetVMPodsToEvictByNode(wantNodes map[string]bool) (map[string][]v1.Pod, error) { virtLauncherPodsByNode := map[string][]v1.Pod{} // get a list of virt-launcher pods for each node virtLauncherPods, err := k.getVirtLauncherPods() @@ -61,6 +61,9 @@ func (k *kubevirtManagerImpl) GetVMPodsToEvictByNode() (map[string][]v1.Pod, err return nil, err } for _, pod := range virtLauncherPods { + if !wantNodes[pod.Spec.NodeName] { + continue + } shouldEvict, err := k.shouldLiveMigrateVM(&pod) if err != nil { return nil, err @@ -84,29 +87,36 @@ OUTER: logrus.Warnf("Failed to get VMI name for virt-launcher pod %s/%s", pod.Namespace, pod.Name) continue } - migrations, err := k.kubevirtOps.ListVirtualMachineInstanceMigrations(ctx, pod.Namespace, metav1.ListOptions{}) + migrations, err := k.getVMIMigrations(pod.Namespace, vmiName) if err != nil { - logrus.Warnf("Failed to list VM live-migrations in namespace %s: %v", pod.Namespace, err) + logrus.Warnf("Cannot evict pod %s/%s: %v", pod.Namespace, pod.Name, err) continue } for _, migration := range migrations { - if migration.VMIName == vmiName { - if !migration.Completed { - logrus.Infof("VM live-migration %s/%s is in progress (%s) for VM %s", - pod.Namespace, migration.Name, migration.Phase, vmiName) - continue OUTER - } - if migration.Failed && - migration.Annotations[constants.AnnotationVMIMigrationSourceNode] == pod.Spec.NodeName && - migration.Annotations[constants.AnnotationControllerRevisionHashKey] == controllerRevisionHash { + if !migration.Completed { + logrus.Infof("VM live-migration %s/%s is in progress (%s) for VM %s", + pod.Namespace, migration.Name, migration.Phase, vmiName) + continue OUTER + } + if migration.Annotations[constants.AnnotationVMIMigrationSourceNode] == pod.Spec.NodeName && + migration.Annotations[constants.AnnotationControllerRevisionHashKey] == controllerRevisionHash { + if migration.Failed { msg := fmt.Sprintf("Live migration %s failed for VM %s/%s on node %s. "+ "Stop or migrate the VM so that the update of the storage node can proceed.", migration.Name, pod.Namespace, vmiName, pod.Spec.NodeName) logrus.Warnf(msg) failedToEvictVMEventFunc(msg) - continue OUTER + } else { + // We should not have to evict the same VM twice in the same upgrade. That probably means + // something went wrong elsewhere. Let's avoid creating too many live-migrations unnecessarily. + msg := fmt.Sprintf("Live migration %s has already succeeded for VM %s/%s on node %s. "+ + "But the VM pod %s is still running. Stop or migrate the VM if it is still running node %s.", + migration.Name, pod.Namespace, vmiName, pod.Spec.NodeName, pod.Name, pod.Spec.NodeName) + logrus.Warnf(msg) + failedToEvictVMEventFunc(msg) } + continue OUTER } } labels := map[string]string{ @@ -126,18 +136,51 @@ OUTER: } } +func (k *kubevirtManagerImpl) getVMIMigrations( + vmiNamespace, vmiName string, +) ([]*kubevirt.VirtualMachineInstanceMigration, error) { + + var ret []*kubevirt.VirtualMachineInstanceMigration + migrations, err := k.kubevirtOps.ListVirtualMachineInstanceMigrations( + context.TODO(), vmiNamespace, metav1.ListOptions{}) + if err != nil { + return nil, fmt.Errorf("failed to list VM live-migrations in namespace %s: %w", vmiNamespace, err) + } + for _, migration := range migrations { + if migration.VMIName == vmiName { + ret = append(ret, migration) + } + } + return ret, nil +} + func (k *kubevirtManagerImpl) shouldLiveMigrateVM(virtLauncherPod *v1.Pod) (bool, error) { // we only care about the pods that are not in a terminal state if virtLauncherPod.Status.Phase == v1.PodSucceeded || virtLauncherPod.Status.Phase == v1.PodFailed { return false, nil } - // ignore the VMs that are not live-migratable vmiName := k.getVMIName(virtLauncherPod) if vmiName == "" { logrus.Warnf("Failed to get VMI name for virt-launcher pod %s/%s. Skipping live-migration.", virtLauncherPod.Namespace, virtLauncherPod.Name) return false, nil } + migrations, err := k.getVMIMigrations(virtLauncherPod.Namespace, vmiName) + if err != nil { + return false, err + } + for _, migration := range migrations { + if !migration.Completed { + // We already checked that the virt-launcher pod is in not in a terminal state. + // There is a live-migration in progress for the VMI. + // Wait for the live-migration to finish before determining if we need to evict this pod. + // Return "shouldEvict=true" and deal with it later. + logrus.Infof("Will check whether to evict pod %s/%s after the live-migration %s (%s) is completed.", + virtLauncherPod.Namespace, virtLauncherPod.Name, migration.Name, migration.Phase) + return true, nil + } + } + // get VMI to check if the VM is live-migratable and if it is running on the same node as the virt-launcher pod vmi, err := k.kubevirtOps.GetVirtualMachineInstance(context.TODO(), virtLauncherPod.Namespace, vmiName) if err != nil { if !errors.IsNotFound(err) { @@ -146,6 +189,16 @@ func (k *kubevirtManagerImpl) shouldLiveMigrateVM(virtLauncherPod *v1.Pod) (bool logrus.Warnf("VMI %s/%s was not found; skipping live-migration: %v", virtLauncherPod.Namespace, vmiName, err) return false, nil } + // We already checked that there is no live migration in progress for this VMI. + // Ignore this pod if VMI says that the VM is running on another node. This can happen if + // the live migration that we started in the previous Reconcile() has completed but the source pod is still in + // the Running phase. We don't need to evict this pod, so don't start another live-migration unnecessarily. + if vmi.NodeName != virtLauncherPod.Spec.NodeName { + logrus.Infof("VMI %s/%s is running on node %s, not on node %s. Skipping eviction of pod %s.", + virtLauncherPod.Namespace, vmiName, vmi.NodeName, virtLauncherPod.Spec.NodeName, virtLauncherPod.Name) + return false, nil + } + // Ignore the VMs that are not live-migratable. return vmi.LiveMigratable, nil } diff --git a/pkg/controller/storagecluster/kubevirt_test.go b/pkg/controller/storagecluster/kubevirt_test.go index f7137d04a..3d1dab810 100644 --- a/pkg/controller/storagecluster/kubevirt_test.go +++ b/pkg/controller/storagecluster/kubevirt_test.go @@ -62,17 +62,17 @@ func TestGetVMPodsToEvictByNode(t *testing.T) { // Test case: no virt-launcher pods mockCoreOps.EXPECT().ListPods(gomock.Any()).Return(nil, nil) - pods, err := kvmgr.GetVMPodsToEvictByNode() + pods, err := kvmgr.GetVMPodsToEvictByNode(nil) require.NoError(t, err) require.Empty(t, pods) mockCoreOps.EXPECT().ListPods(gomock.Any()).Return(&v1.PodList{}, nil) - pods, err = kvmgr.GetVMPodsToEvictByNode() + pods, err = kvmgr.GetVMPodsToEvictByNode(nil) require.NoError(t, err) require.Empty(t, pods) mockCoreOps.EXPECT().ListPods(gomock.Any()).Return(&v1.PodList{Items: []v1.Pod{}}, nil) - pods, err = kvmgr.GetVMPodsToEvictByNode() + pods, err = kvmgr.GetVMPodsToEvictByNode(nil) require.NoError(t, err) require.Empty(t, pods) @@ -81,8 +81,10 @@ func TestGetVMPodsToEvictByNode(t *testing.T) { virtLauncherPod, vmi := getTestVirtLauncherPodAndVMI("virt-launcher-1", "node1") virtLauncherPod.Status.Phase = phase mockCoreOps.EXPECT().ListPods(gomock.Any()).Return(&v1.PodList{Items: []v1.Pod{*virtLauncherPod}}, nil) + mockKubeVirtOps.EXPECT().ListVirtualMachineInstanceMigrations( + gomock.Any(), vmi.NameSpace, gomock.Any()).Return(nil, nil) mockKubeVirtOps.EXPECT().GetVirtualMachineInstance(gomock.Any(), gomock.Any(), vmi.Name).Return(vmi, nil) - pods, err = kvmgr.GetVMPodsToEvictByNode() + pods, err = kvmgr.GetVMPodsToEvictByNode(map[string]bool{"node1": true}) require.NoError(t, err) require.NotEmpty(t, pods) require.Len(t, pods, 1) @@ -95,7 +97,7 @@ func TestGetVMPodsToEvictByNode(t *testing.T) { virtLauncherPod, _ := getTestVirtLauncherPodAndVMI("virt-launcher-1", "node1") virtLauncherPod.Status.Phase = phase mockCoreOps.EXPECT().ListPods(gomock.Any()).Return(&v1.PodList{Items: []v1.Pod{*virtLauncherPod}}, nil) - pods, err = kvmgr.GetVMPodsToEvictByNode() + pods, err = kvmgr.GetVMPodsToEvictByNode(map[string]bool{"node1": true}) require.NoError(t, err) require.Empty(t, pods) } @@ -104,14 +106,16 @@ func TestGetVMPodsToEvictByNode(t *testing.T) { virtLauncherPod, vmi := getTestVirtLauncherPodAndVMI("virt-launcher-1", "node1") vmi.LiveMigratable = false mockCoreOps.EXPECT().ListPods(gomock.Any()).Return(&v1.PodList{Items: []v1.Pod{*virtLauncherPod}}, nil) + mockKubeVirtOps.EXPECT().ListVirtualMachineInstanceMigrations( + gomock.Any(), vmi.NameSpace, gomock.Any()).Return(nil, nil) mockKubeVirtOps.EXPECT().GetVirtualMachineInstance(gomock.Any(), gomock.Any(), vmi.Name).Return(vmi, nil) - pods, err = kvmgr.GetVMPodsToEvictByNode() + pods, err = kvmgr.GetVMPodsToEvictByNode(map[string]bool{"node1": true}) require.NoError(t, err) require.Empty(t, pods) // Test case: error listing pods mockCoreOps.EXPECT().ListPods(gomock.Any()).Return(nil, assert.AnError) - pods, err = kvmgr.GetVMPodsToEvictByNode() + pods, err = kvmgr.GetVMPodsToEvictByNode(nil) require.Error(t, err) require.Empty(t, pods) @@ -119,17 +123,69 @@ func TestGetVMPodsToEvictByNode(t *testing.T) { virtLauncherPod, _ = getTestVirtLauncherPodAndVMI("virt-launcher-1", "node1") virtLauncherPod.OwnerReferences = nil mockCoreOps.EXPECT().ListPods(gomock.Any()).Return(&v1.PodList{Items: []v1.Pod{*virtLauncherPod}}, nil) - pods, err = kvmgr.GetVMPodsToEvictByNode() + pods, err = kvmgr.GetVMPodsToEvictByNode(map[string]bool{"node1": true}) require.NoError(t, err) require.Empty(t, pods) // Test case: error getting VMI virtLauncherPod, vmi = getTestVirtLauncherPodAndVMI("virt-launcher-1", "node1") mockCoreOps.EXPECT().ListPods(gomock.Any()).Return(&v1.PodList{Items: []v1.Pod{*virtLauncherPod}}, nil) + mockKubeVirtOps.EXPECT().ListVirtualMachineInstanceMigrations( + gomock.Any(), vmi.NameSpace, gomock.Any()).Return(nil, nil) mockKubeVirtOps.EXPECT().GetVirtualMachineInstance(gomock.Any(), gomock.Any(), vmi.Name).Return(nil, assert.AnError) - pods, err = kvmgr.GetVMPodsToEvictByNode() + pods, err = kvmgr.GetVMPodsToEvictByNode(map[string]bool{"node1": true}) require.Error(t, err) require.Empty(t, pods) + + // Test case: wantNode does not have VM's node + virtLauncherPod, _ = getTestVirtLauncherPodAndVMI("virt-launcher-1", "node1") + virtLauncherPod.Status.Phase = v1.PodRunning + mockCoreOps.EXPECT().ListPods(gomock.Any()).Return(&v1.PodList{Items: []v1.Pod{*virtLauncherPod}}, nil) + pods, err = kvmgr.GetVMPodsToEvictByNode(map[string]bool{"node2": true}) + require.NoError(t, err) + require.Empty(t, pods) + + // Test case: running virt-launcher pod exists with VMI migration in progress. + // In this case VM might migrate to this node, so shouldEvict should be true. + virtLauncherPod, vmi = getTestVirtLauncherPodAndVMI("virt-launcher-1", "node1") + virtLauncherPod.Status.Phase = v1.PodRunning + mockCoreOps.EXPECT().ListPods(gomock.Any()).Return(&v1.PodList{Items: []v1.Pod{*virtLauncherPod}}, nil) + migrCompleted := []*kubevirt.VirtualMachineInstanceMigration{ + { + VMIName: vmi.Name, + Completed: false, + Phase: "Running", + }, + } + mockKubeVirtOps.EXPECT().ListVirtualMachineInstanceMigrations( + gomock.Any(), vmi.NameSpace, gomock.Any()).Return(migrCompleted, nil) + pods, err = kvmgr.GetVMPodsToEvictByNode(map[string]bool{"node1": true}) + require.NoError(t, err) + require.NotEmpty(t, pods) + require.Len(t, pods, 1) + require.Len(t, pods[virtLauncherPod.Spec.NodeName], 1) + require.Equal(t, "virt-launcher-1", pods["node1"][0].Name) + + // Test case: VM was migrated out already but the source virt-launcher pod has not finished yet. + // VMI migration in completed state and VMI is pointing to a different node. + // In this case shouldEvict should be false. + virtLauncherPod, vmi = getTestVirtLauncherPodAndVMI("virt-launcher-1", "node1") + vmi.NodeName = "node2" // vmi is running on node2 even though there is a running virt-launcher pod on node1 + virtLauncherPod.Status.Phase = v1.PodRunning + mockCoreOps.EXPECT().ListPods(gomock.Any()).Return(&v1.PodList{Items: []v1.Pod{*virtLauncherPod}}, nil) + migrCompleted = []*kubevirt.VirtualMachineInstanceMigration{ + { + VMIName: vmi.Name, + Completed: true, + Phase: "Succeeded", + }, + } + mockKubeVirtOps.EXPECT().ListVirtualMachineInstanceMigrations( + gomock.Any(), vmi.NameSpace, gomock.Any()).Return(migrCompleted, nil) + mockKubeVirtOps.EXPECT().GetVirtualMachineInstance(gomock.Any(), gomock.Any(), vmi.Name).Return(vmi, nil) + pods, err = kvmgr.GetVMPodsToEvictByNode(map[string]bool{"node1": true}) + require.NoError(t, err) + require.Empty(t, pods) } func TestStartEvictingVMPods(t *testing.T) { @@ -161,7 +217,8 @@ func TestStartEvictingVMPods(t *testing.T) { Phase: "Running", }, } - mockKubeVirtOps.EXPECT().ListVirtualMachineInstanceMigrations(gomock.Any(), "", gomock.Any()).Return(migrInProgress, nil) + mockKubeVirtOps.EXPECT().ListVirtualMachineInstanceMigrations( + gomock.Any(), "", gomock.Any()).Return(migrInProgress, nil) // No expectation for call to CreateMigration since no new migration should be created kvmgr.StartEvictingVMPods([]v1.Pod{*virtLauncherPod}, hash, func(message string) {}) @@ -227,7 +284,8 @@ func TestStartEvictingVMPods(t *testing.T) { Labels: expectedLabels, }, } - mockKubeVirtOps.EXPECT().ListVirtualMachineInstanceMigrations(gomock.Any(), "", gomock.Any()).Return(migrations, nil) + mockKubeVirtOps.EXPECT().ListVirtualMachineInstanceMigrations( + gomock.Any(), "", gomock.Any()).Return(migrations, nil) mockKubeVirtOps.EXPECT().CreateVirtualMachineInstanceMigrationWithParams(gomock.Any(), "", vmi.Name, "", "", expectedAnnotations, expectedLabels).Return(nil, nil) kvmgr.StartEvictingVMPods([]v1.Pod{*virtLauncherPod}, hash, func(message string) {}) @@ -254,5 +312,6 @@ func getTestVirtLauncherPodAndVMI(podName, nodeName string) (*v1.Pod, *kubevirt. }, &kubevirt.VirtualMachineInstance{ LiveMigratable: true, Name: vmiName, + NodeName: nodeName, } } diff --git a/pkg/controller/storagecluster/update.go b/pkg/controller/storagecluster/update.go index a1f965a64..a872d9707 100644 --- a/pkg/controller/storagecluster/update.go +++ b/pkg/controller/storagecluster/update.go @@ -173,6 +173,7 @@ func (c *Controller) rollingUpdate(cluster *corev1.StorageCluster, hash string, if vmPodsPresent { // add unschedulable label to the nodes that have pods to be deleted so that // stork does not schedule any new virt-launcher pods on them + evictionNodes := map[string]bool{} for _, podName := range oldPodsToDelete { pod := oldPodsMap[podName] if pod == nil { @@ -187,9 +188,10 @@ func (c *Controller) rollingUpdate(cluster *corev1.StorageCluster, hash string, if err := c.addNodeUnschedulableAnnotation(pod.Spec.NodeName); err != nil { return err } + evictionNodes[pod.Spec.NodeName] = true } // get the VM pods after labeling the nodes since the list may have changed - virtLauncherPodsByNode, err = c.kubevirt.GetVMPodsToEvictByNode() + virtLauncherPodsByNode, err = c.kubevirt.GetVMPodsToEvictByNode(evictionNodes) if err != nil { return err } diff --git a/pkg/mock/kubevirtmanager.mock.go b/pkg/mock/kubevirtmanager.mock.go index b73bc239a..465c6925d 100644 --- a/pkg/mock/kubevirtmanager.mock.go +++ b/pkg/mock/kubevirtmanager.mock.go @@ -50,18 +50,18 @@ func (mr *MockKubevirtManagerMockRecorder) ClusterHasVMPods() *gomock.Call { } // GetVMPodsToEvictByNode mocks base method. -func (m *MockKubevirtManager) GetVMPodsToEvictByNode() (map[string][]v1.Pod, error) { +func (m *MockKubevirtManager) GetVMPodsToEvictByNode(arg0 map[string]bool) (map[string][]v1.Pod, error) { m.ctrl.T.Helper() - ret := m.ctrl.Call(m, "GetVMPodsToEvictByNode") + ret := m.ctrl.Call(m, "GetVMPodsToEvictByNode", arg0) ret0, _ := ret[0].(map[string][]v1.Pod) ret1, _ := ret[1].(error) return ret0, ret1 } // GetVMPodsToEvictByNode indicates an expected call of GetVMPodsToEvictByNode. -func (mr *MockKubevirtManagerMockRecorder) GetVMPodsToEvictByNode() *gomock.Call { +func (mr *MockKubevirtManagerMockRecorder) GetVMPodsToEvictByNode(arg0 interface{}) *gomock.Call { mr.mock.ctrl.T.Helper() - return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "GetVMPodsToEvictByNode", reflect.TypeOf((*MockKubevirtManager)(nil).GetVMPodsToEvictByNode)) + return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "GetVMPodsToEvictByNode", reflect.TypeOf((*MockKubevirtManager)(nil).GetVMPodsToEvictByNode), arg0) } // StartEvictingVMPods mocks base method.