Skip to content

Commit

Permalink
PWX-37480: avoid starting extra live-migraions for the same VM (#1560) (
Browse files Browse the repository at this point in the history
#1561)

When deciding whether a running virt-launcher pod needs to be evicted, we now also check if the VMI
is active on the same node. If VMI says that it is running on a different node and
there is no live-migration in progress, we skip the eviction for that pod since the pod should go
into a completed state on its own.

Also, we now call shouldEvictPod only for the virt-launcher pods on the subset of nodes
due for the update in the current Reconcile() iteration to avoid invoking k8s APIs unnecessarily.

When starting live-migration, we now have an additional check to see if the live-migration
succeeded for the same VM in the same upgrade cycle. If yes, we don't start an additional
live-migration and generate an event instead. We were already doing this for a failed
migration.

Signed-off-by: Neelesh Thakur <neelesh.thakur@purestorage.com>
  • Loading branch information
pureneelesh authored Jun 1, 2024
1 parent d702d7e commit 4c82a26
Show file tree
Hide file tree
Showing 5 changed files with 153 additions and 33 deletions.
10 changes: 8 additions & 2 deletions pkg/controller/storagecluster/controller_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -2690,7 +2690,13 @@ func TestKubevirtVMsDuringUpgrade(t *testing.T) {
},
}
kubevirt.EXPECT().ClusterHasVMPods().Return(true, nil)
kubevirt.EXPECT().GetVMPodsToEvictByNode().Return(map[string][]v1.Pod{k8sNodes[1].Name: vmPods}, nil)
wantNodes := map[string]bool{
k8sNodes[0].Name: true,
k8sNodes[1].Name: true,
k8sNodes[2].Name: true,
}
kubevirt.EXPECT().GetVMPodsToEvictByNode(wantNodes).Return(
map[string][]v1.Pod{k8sNodes[1].Name: vmPods}, nil)
kubevirt.EXPECT().StartEvictingVMPods(vmPods, gomock.Any(), gomock.Any())

result, err = controller.Reconcile(context.TODO(), request)
Expand Down Expand Up @@ -2745,7 +2751,7 @@ func TestKubevirtVMsDuringUpgrade(t *testing.T) {
},
}
kubevirt.EXPECT().ClusterHasVMPods().Return(true, nil)
kubevirt.EXPECT().GetVMPodsToEvictByNode().Return(map[string][]v1.Pod{
kubevirt.EXPECT().GetVMPodsToEvictByNode(wantNodes).Return(map[string][]v1.Pod{
k8sNodes[0].Name: vmPodsNode0,
k8sNodes[2].Name: vmPodsNode2,
}, nil)
Expand Down
83 changes: 68 additions & 15 deletions pkg/controller/storagecluster/kubevirt.go
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@ type KubevirtManager interface {
ClusterHasVMPods() (bool, error)

// GetVMPodsToEvictByNode returns a map of node name to a list of virt-launcher pods that are live-migratable
GetVMPodsToEvictByNode() (map[string][]v1.Pod, error)
GetVMPodsToEvictByNode(wantNodes map[string]bool) (map[string][]v1.Pod, error)

// StartEvictingVMPods starts live-migrating the virt-launcher pods to other nodes
StartEvictingVMPods(virtLauncherPods []v1.Pod, controllerRevisionHash string,
Expand Down Expand Up @@ -53,14 +53,17 @@ func (k *kubevirtManagerImpl) ClusterHasVMPods() (bool, error) {
return len(virtLauncherPods) > 0, nil
}

func (k *kubevirtManagerImpl) GetVMPodsToEvictByNode() (map[string][]v1.Pod, error) {
func (k *kubevirtManagerImpl) GetVMPodsToEvictByNode(wantNodes map[string]bool) (map[string][]v1.Pod, error) {
virtLauncherPodsByNode := map[string][]v1.Pod{}
// get a list of virt-launcher pods for each node
virtLauncherPods, err := k.getVirtLauncherPods()
if err != nil {
return nil, err
}
for _, pod := range virtLauncherPods {
if !wantNodes[pod.Spec.NodeName] {
continue
}
shouldEvict, err := k.shouldLiveMigrateVM(&pod)
if err != nil {
return nil, err
Expand All @@ -84,29 +87,36 @@ OUTER:
logrus.Warnf("Failed to get VMI name for virt-launcher pod %s/%s", pod.Namespace, pod.Name)
continue
}
migrations, err := k.kubevirtOps.ListVirtualMachineInstanceMigrations(ctx, pod.Namespace, metav1.ListOptions{})
migrations, err := k.getVMIMigrations(pod.Namespace, vmiName)
if err != nil {
logrus.Warnf("Failed to list VM live-migrations in namespace %s: %v", pod.Namespace, err)
logrus.Warnf("Cannot evict pod %s/%s: %v", pod.Namespace, pod.Name, err)
continue
}
for _, migration := range migrations {
if migration.VMIName == vmiName {
if !migration.Completed {
logrus.Infof("VM live-migration %s/%s is in progress (%s) for VM %s",
pod.Namespace, migration.Name, migration.Phase, vmiName)
continue OUTER
}
if migration.Failed &&
migration.Annotations[constants.AnnotationVMIMigrationSourceNode] == pod.Spec.NodeName &&
migration.Annotations[constants.AnnotationControllerRevisionHashKey] == controllerRevisionHash {
if !migration.Completed {
logrus.Infof("VM live-migration %s/%s is in progress (%s) for VM %s",
pod.Namespace, migration.Name, migration.Phase, vmiName)
continue OUTER
}
if migration.Annotations[constants.AnnotationVMIMigrationSourceNode] == pod.Spec.NodeName &&
migration.Annotations[constants.AnnotationControllerRevisionHashKey] == controllerRevisionHash {

if migration.Failed {
msg := fmt.Sprintf("Live migration %s failed for VM %s/%s on node %s. "+
"Stop or migrate the VM so that the update of the storage node can proceed.",
migration.Name, pod.Namespace, vmiName, pod.Spec.NodeName)
logrus.Warnf(msg)
failedToEvictVMEventFunc(msg)
continue OUTER
} else {
// We should not have to evict the same VM twice in the same upgrade. That probably means
// something went wrong elsewhere. Let's avoid creating too many live-migrations unnecessarily.
msg := fmt.Sprintf("Live migration %s has already succeeded for VM %s/%s on node %s. "+
"But the VM pod %s is still running. Stop or migrate the VM if it is still running node %s.",
migration.Name, pod.Namespace, vmiName, pod.Spec.NodeName, pod.Name, pod.Spec.NodeName)
logrus.Warnf(msg)
failedToEvictVMEventFunc(msg)
}
continue OUTER
}
}
labels := map[string]string{
Expand All @@ -126,18 +136,51 @@ OUTER:
}
}

func (k *kubevirtManagerImpl) getVMIMigrations(
vmiNamespace, vmiName string,
) ([]*kubevirt.VirtualMachineInstanceMigration, error) {

var ret []*kubevirt.VirtualMachineInstanceMigration
migrations, err := k.kubevirtOps.ListVirtualMachineInstanceMigrations(
context.TODO(), vmiNamespace, metav1.ListOptions{})
if err != nil {
return nil, fmt.Errorf("failed to list VM live-migrations in namespace %s: %w", vmiNamespace, err)
}
for _, migration := range migrations {
if migration.VMIName == vmiName {
ret = append(ret, migration)
}
}
return ret, nil
}

func (k *kubevirtManagerImpl) shouldLiveMigrateVM(virtLauncherPod *v1.Pod) (bool, error) {
// we only care about the pods that are not in a terminal state
if virtLauncherPod.Status.Phase == v1.PodSucceeded || virtLauncherPod.Status.Phase == v1.PodFailed {
return false, nil
}
// ignore the VMs that are not live-migratable
vmiName := k.getVMIName(virtLauncherPod)
if vmiName == "" {
logrus.Warnf("Failed to get VMI name for virt-launcher pod %s/%s. Skipping live-migration.",
virtLauncherPod.Namespace, virtLauncherPod.Name)
return false, nil
}
migrations, err := k.getVMIMigrations(virtLauncherPod.Namespace, vmiName)
if err != nil {
return false, err
}
for _, migration := range migrations {
if !migration.Completed {
// We already checked that the virt-launcher pod is in not in a terminal state.
// There is a live-migration in progress for the VMI.
// Wait for the live-migration to finish before determining if we need to evict this pod.
// Return "shouldEvict=true" and deal with it later.
logrus.Infof("Will check whether to evict pod %s/%s after the live-migration %s (%s) is completed.",
virtLauncherPod.Namespace, virtLauncherPod.Name, migration.Name, migration.Phase)
return true, nil
}
}
// get VMI to check if the VM is live-migratable and if it is running on the same node as the virt-launcher pod
vmi, err := k.kubevirtOps.GetVirtualMachineInstance(context.TODO(), virtLauncherPod.Namespace, vmiName)
if err != nil {
if !errors.IsNotFound(err) {
Expand All @@ -146,6 +189,16 @@ func (k *kubevirtManagerImpl) shouldLiveMigrateVM(virtLauncherPod *v1.Pod) (bool
logrus.Warnf("VMI %s/%s was not found; skipping live-migration: %v", virtLauncherPod.Namespace, vmiName, err)
return false, nil
}
// We already checked that there is no live migration in progress for this VMI.
// Ignore this pod if VMI says that the VM is running on another node. This can happen if
// the live migration that we started in the previous Reconcile() has completed but the source pod is still in
// the Running phase. We don't need to evict this pod, so don't start another live-migration unnecessarily.
if vmi.NodeName != virtLauncherPod.Spec.NodeName {
logrus.Infof("VMI %s/%s is running on node %s, not on node %s. Skipping eviction of pod %s.",
virtLauncherPod.Namespace, vmiName, vmi.NodeName, virtLauncherPod.Spec.NodeName, virtLauncherPod.Name)
return false, nil
}
// Ignore the VMs that are not live-migratable.
return vmi.LiveMigratable, nil
}

Expand Down
81 changes: 70 additions & 11 deletions pkg/controller/storagecluster/kubevirt_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -62,17 +62,17 @@ func TestGetVMPodsToEvictByNode(t *testing.T) {

// Test case: no virt-launcher pods
mockCoreOps.EXPECT().ListPods(gomock.Any()).Return(nil, nil)
pods, err := kvmgr.GetVMPodsToEvictByNode()
pods, err := kvmgr.GetVMPodsToEvictByNode(nil)
require.NoError(t, err)
require.Empty(t, pods)

mockCoreOps.EXPECT().ListPods(gomock.Any()).Return(&v1.PodList{}, nil)
pods, err = kvmgr.GetVMPodsToEvictByNode()
pods, err = kvmgr.GetVMPodsToEvictByNode(nil)
require.NoError(t, err)
require.Empty(t, pods)

mockCoreOps.EXPECT().ListPods(gomock.Any()).Return(&v1.PodList{Items: []v1.Pod{}}, nil)
pods, err = kvmgr.GetVMPodsToEvictByNode()
pods, err = kvmgr.GetVMPodsToEvictByNode(nil)
require.NoError(t, err)
require.Empty(t, pods)

Expand All @@ -81,8 +81,10 @@ func TestGetVMPodsToEvictByNode(t *testing.T) {
virtLauncherPod, vmi := getTestVirtLauncherPodAndVMI("virt-launcher-1", "node1")
virtLauncherPod.Status.Phase = phase
mockCoreOps.EXPECT().ListPods(gomock.Any()).Return(&v1.PodList{Items: []v1.Pod{*virtLauncherPod}}, nil)
mockKubeVirtOps.EXPECT().ListVirtualMachineInstanceMigrations(
gomock.Any(), vmi.NameSpace, gomock.Any()).Return(nil, nil)
mockKubeVirtOps.EXPECT().GetVirtualMachineInstance(gomock.Any(), gomock.Any(), vmi.Name).Return(vmi, nil)
pods, err = kvmgr.GetVMPodsToEvictByNode()
pods, err = kvmgr.GetVMPodsToEvictByNode(map[string]bool{"node1": true})
require.NoError(t, err)
require.NotEmpty(t, pods)
require.Len(t, pods, 1)
Expand All @@ -95,7 +97,7 @@ func TestGetVMPodsToEvictByNode(t *testing.T) {
virtLauncherPod, _ := getTestVirtLauncherPodAndVMI("virt-launcher-1", "node1")
virtLauncherPod.Status.Phase = phase
mockCoreOps.EXPECT().ListPods(gomock.Any()).Return(&v1.PodList{Items: []v1.Pod{*virtLauncherPod}}, nil)
pods, err = kvmgr.GetVMPodsToEvictByNode()
pods, err = kvmgr.GetVMPodsToEvictByNode(map[string]bool{"node1": true})
require.NoError(t, err)
require.Empty(t, pods)
}
Expand All @@ -104,32 +106,86 @@ func TestGetVMPodsToEvictByNode(t *testing.T) {
virtLauncherPod, vmi := getTestVirtLauncherPodAndVMI("virt-launcher-1", "node1")
vmi.LiveMigratable = false
mockCoreOps.EXPECT().ListPods(gomock.Any()).Return(&v1.PodList{Items: []v1.Pod{*virtLauncherPod}}, nil)
mockKubeVirtOps.EXPECT().ListVirtualMachineInstanceMigrations(
gomock.Any(), vmi.NameSpace, gomock.Any()).Return(nil, nil)
mockKubeVirtOps.EXPECT().GetVirtualMachineInstance(gomock.Any(), gomock.Any(), vmi.Name).Return(vmi, nil)
pods, err = kvmgr.GetVMPodsToEvictByNode()
pods, err = kvmgr.GetVMPodsToEvictByNode(map[string]bool{"node1": true})
require.NoError(t, err)
require.Empty(t, pods)

// Test case: error listing pods
mockCoreOps.EXPECT().ListPods(gomock.Any()).Return(nil, assert.AnError)
pods, err = kvmgr.GetVMPodsToEvictByNode()
pods, err = kvmgr.GetVMPodsToEvictByNode(nil)
require.Error(t, err)
require.Empty(t, pods)

// Test case: no VMI ownerRef in virt-launcher pod; no error should be returned
virtLauncherPod, _ = getTestVirtLauncherPodAndVMI("virt-launcher-1", "node1")
virtLauncherPod.OwnerReferences = nil
mockCoreOps.EXPECT().ListPods(gomock.Any()).Return(&v1.PodList{Items: []v1.Pod{*virtLauncherPod}}, nil)
pods, err = kvmgr.GetVMPodsToEvictByNode()
pods, err = kvmgr.GetVMPodsToEvictByNode(map[string]bool{"node1": true})
require.NoError(t, err)
require.Empty(t, pods)

// Test case: error getting VMI
virtLauncherPod, vmi = getTestVirtLauncherPodAndVMI("virt-launcher-1", "node1")
mockCoreOps.EXPECT().ListPods(gomock.Any()).Return(&v1.PodList{Items: []v1.Pod{*virtLauncherPod}}, nil)
mockKubeVirtOps.EXPECT().ListVirtualMachineInstanceMigrations(
gomock.Any(), vmi.NameSpace, gomock.Any()).Return(nil, nil)
mockKubeVirtOps.EXPECT().GetVirtualMachineInstance(gomock.Any(), gomock.Any(), vmi.Name).Return(nil, assert.AnError)
pods, err = kvmgr.GetVMPodsToEvictByNode()
pods, err = kvmgr.GetVMPodsToEvictByNode(map[string]bool{"node1": true})
require.Error(t, err)
require.Empty(t, pods)

// Test case: wantNode does not have VM's node
virtLauncherPod, _ = getTestVirtLauncherPodAndVMI("virt-launcher-1", "node1")
virtLauncherPod.Status.Phase = v1.PodRunning
mockCoreOps.EXPECT().ListPods(gomock.Any()).Return(&v1.PodList{Items: []v1.Pod{*virtLauncherPod}}, nil)
pods, err = kvmgr.GetVMPodsToEvictByNode(map[string]bool{"node2": true})
require.NoError(t, err)
require.Empty(t, pods)

// Test case: running virt-launcher pod exists with VMI migration in progress.
// In this case VM might migrate to this node, so shouldEvict should be true.
virtLauncherPod, vmi = getTestVirtLauncherPodAndVMI("virt-launcher-1", "node1")
virtLauncherPod.Status.Phase = v1.PodRunning
mockCoreOps.EXPECT().ListPods(gomock.Any()).Return(&v1.PodList{Items: []v1.Pod{*virtLauncherPod}}, nil)
migrCompleted := []*kubevirt.VirtualMachineInstanceMigration{
{
VMIName: vmi.Name,
Completed: false,
Phase: "Running",
},
}
mockKubeVirtOps.EXPECT().ListVirtualMachineInstanceMigrations(
gomock.Any(), vmi.NameSpace, gomock.Any()).Return(migrCompleted, nil)
pods, err = kvmgr.GetVMPodsToEvictByNode(map[string]bool{"node1": true})
require.NoError(t, err)
require.NotEmpty(t, pods)
require.Len(t, pods, 1)
require.Len(t, pods[virtLauncherPod.Spec.NodeName], 1)
require.Equal(t, "virt-launcher-1", pods["node1"][0].Name)

// Test case: VM was migrated out already but the source virt-launcher pod has not finished yet.
// VMI migration in completed state and VMI is pointing to a different node.
// In this case shouldEvict should be false.
virtLauncherPod, vmi = getTestVirtLauncherPodAndVMI("virt-launcher-1", "node1")
vmi.NodeName = "node2" // vmi is running on node2 even though there is a running virt-launcher pod on node1
virtLauncherPod.Status.Phase = v1.PodRunning
mockCoreOps.EXPECT().ListPods(gomock.Any()).Return(&v1.PodList{Items: []v1.Pod{*virtLauncherPod}}, nil)
migrCompleted = []*kubevirt.VirtualMachineInstanceMigration{
{
VMIName: vmi.Name,
Completed: true,
Phase: "Succeeded",
},
}
mockKubeVirtOps.EXPECT().ListVirtualMachineInstanceMigrations(
gomock.Any(), vmi.NameSpace, gomock.Any()).Return(migrCompleted, nil)
mockKubeVirtOps.EXPECT().GetVirtualMachineInstance(gomock.Any(), gomock.Any(), vmi.Name).Return(vmi, nil)
pods, err = kvmgr.GetVMPodsToEvictByNode(map[string]bool{"node1": true})
require.NoError(t, err)
require.Empty(t, pods)
}

func TestStartEvictingVMPods(t *testing.T) {
Expand Down Expand Up @@ -161,7 +217,8 @@ func TestStartEvictingVMPods(t *testing.T) {
Phase: "Running",
},
}
mockKubeVirtOps.EXPECT().ListVirtualMachineInstanceMigrations(gomock.Any(), "", gomock.Any()).Return(migrInProgress, nil)
mockKubeVirtOps.EXPECT().ListVirtualMachineInstanceMigrations(
gomock.Any(), "", gomock.Any()).Return(migrInProgress, nil)
// No expectation for call to CreateMigration since no new migration should be created
kvmgr.StartEvictingVMPods([]v1.Pod{*virtLauncherPod}, hash, func(message string) {})

Expand Down Expand Up @@ -227,7 +284,8 @@ func TestStartEvictingVMPods(t *testing.T) {
Labels: expectedLabels,
},
}
mockKubeVirtOps.EXPECT().ListVirtualMachineInstanceMigrations(gomock.Any(), "", gomock.Any()).Return(migrations, nil)
mockKubeVirtOps.EXPECT().ListVirtualMachineInstanceMigrations(
gomock.Any(), "", gomock.Any()).Return(migrations, nil)
mockKubeVirtOps.EXPECT().CreateVirtualMachineInstanceMigrationWithParams(gomock.Any(), "", vmi.Name, "", "",
expectedAnnotations, expectedLabels).Return(nil, nil)
kvmgr.StartEvictingVMPods([]v1.Pod{*virtLauncherPod}, hash, func(message string) {})
Expand All @@ -254,5 +312,6 @@ func getTestVirtLauncherPodAndVMI(podName, nodeName string) (*v1.Pod, *kubevirt.
}, &kubevirt.VirtualMachineInstance{
LiveMigratable: true,
Name: vmiName,
NodeName: nodeName,
}
}
4 changes: 3 additions & 1 deletion pkg/controller/storagecluster/update.go
Original file line number Diff line number Diff line change
Expand Up @@ -173,6 +173,7 @@ func (c *Controller) rollingUpdate(cluster *corev1.StorageCluster, hash string,
if vmPodsPresent {
// add unschedulable label to the nodes that have pods to be deleted so that
// stork does not schedule any new virt-launcher pods on them
evictionNodes := map[string]bool{}
for _, podName := range oldPodsToDelete {
pod := oldPodsMap[podName]
if pod == nil {
Expand All @@ -187,9 +188,10 @@ func (c *Controller) rollingUpdate(cluster *corev1.StorageCluster, hash string,
if err := c.addNodeUnschedulableAnnotation(pod.Spec.NodeName); err != nil {
return err
}
evictionNodes[pod.Spec.NodeName] = true
}
// get the VM pods after labeling the nodes since the list may have changed
virtLauncherPodsByNode, err = c.kubevirt.GetVMPodsToEvictByNode()
virtLauncherPodsByNode, err = c.kubevirt.GetVMPodsToEvictByNode(evictionNodes)
if err != nil {
return err
}
Expand Down
8 changes: 4 additions & 4 deletions pkg/mock/kubevirtmanager.mock.go

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

0 comments on commit 4c82a26

Please sign in to comment.