Skip to content

Commit

Permalink
PWX-37573: tighten the checks for avoiding unnecessary VM live migration
Browse files Browse the repository at this point in the history
If the live-migration is in progress for a VM that we want to evict,
do not create another live migration for that VM in the same Reconcile() cycle.

Just before starting a live-migration, check the VMI one more time to verify
that the VMI is pointing to the same node as the pod being evicted.

These two extra checks reduce the window in which PX and operator may
try to live-migrate the same VM out of the same node. Also, it handles
any other unexpected live-migrations that might start.

Signed-off-by: Neelesh Thakur <neelesh.thakur@purestorage.com>
  • Loading branch information
pureneelesh committed Jun 6, 2024
1 parent 4c82a26 commit 729e5e3
Show file tree
Hide file tree
Showing 6 changed files with 136 additions and 37 deletions.
29 changes: 22 additions & 7 deletions pkg/controller/storagecluster/controller_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -2695,9 +2695,10 @@ func TestKubevirtVMsDuringUpgrade(t *testing.T) {
k8sNodes[1].Name: true,
k8sNodes[2].Name: true,
}
evictions := getVMPodEvictions(t, vmPods)
kubevirt.EXPECT().GetVMPodsToEvictByNode(wantNodes).Return(
map[string][]v1.Pod{k8sNodes[1].Name: vmPods}, nil)
kubevirt.EXPECT().StartEvictingVMPods(vmPods, gomock.Any(), gomock.Any())
map[string][]*util.VMPodEviction{k8sNodes[1].Name: evictions}, nil)
kubevirt.EXPECT().StartEvictingVMPods(evictions, gomock.Any(), gomock.Any())

result, err = controller.Reconcile(context.TODO(), request)
require.NoError(t, err)
Expand Down Expand Up @@ -2750,13 +2751,16 @@ func TestKubevirtVMsDuringUpgrade(t *testing.T) {
},
},
}
evictionsNode0 := getVMPodEvictions(t, vmPodsNode0)
evictionsNode2 := getVMPodEvictions(t, vmPodsNode2)

kubevirt.EXPECT().ClusterHasVMPods().Return(true, nil)
kubevirt.EXPECT().GetVMPodsToEvictByNode(wantNodes).Return(map[string][]v1.Pod{
k8sNodes[0].Name: vmPodsNode0,
k8sNodes[2].Name: vmPodsNode2,
kubevirt.EXPECT().GetVMPodsToEvictByNode(wantNodes).Return(map[string][]*util.VMPodEviction{
k8sNodes[0].Name: evictionsNode0,
k8sNodes[2].Name: evictionsNode2,
}, nil)
kubevirt.EXPECT().StartEvictingVMPods(vmPodsNode0, gomock.Any(), gomock.Any())
kubevirt.EXPECT().StartEvictingVMPods(vmPodsNode2, gomock.Any(), gomock.Any())
kubevirt.EXPECT().StartEvictingVMPods(evictionsNode0, gomock.Any(), gomock.Any())
kubevirt.EXPECT().StartEvictingVMPods(evictionsNode2, gomock.Any(), gomock.Any())

result, err = controller.Reconcile(context.TODO(), request)
require.NoError(t, err)
Expand Down Expand Up @@ -10669,3 +10673,14 @@ func getNode(t *testing.T, k8sclient client.Client, nodeName string) *v1.Node {
require.NoError(t, err)
return node
}

func getVMPodEvictions(t *testing.T, podsToEvict []v1.Pod) []*util.VMPodEviction {
var evictions []*util.VMPodEviction
for _, vmPod := range podsToEvict {
evictions = append(evictions, &util.VMPodEviction{
PodToEvict: vmPod,
LiveMigrationInProgress: false,
})
}
return evictions
}
66 changes: 47 additions & 19 deletions pkg/controller/storagecluster/kubevirt.go
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@ import (
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"

"github.com/libopenstorage/operator/pkg/constants"
"github.com/libopenstorage/operator/pkg/util"
coreops "github.com/portworx/sched-ops/k8s/core"
kubevirt "github.com/portworx/sched-ops/k8s/kubevirt-dynamic"
)
Expand All @@ -18,11 +19,11 @@ type KubevirtManager interface {
// ClusterHasVMPods returns true if the cluster has any KubeVirt VM Pods (running or not)
ClusterHasVMPods() (bool, error)

// GetVMPodsToEvictByNode returns a map of node name to a list of virt-launcher pods that are live-migratable
GetVMPodsToEvictByNode(wantNodes map[string]bool) (map[string][]v1.Pod, error)
// GetVMPodsToEvictByNode returns a map of node name to a list of virt-launcher pods that need to be evicted
GetVMPodsToEvictByNode(wantNodes map[string]bool) (map[string][]*util.VMPodEviction, error)

// StartEvictingVMPods starts live-migrating the virt-launcher pods to other nodes
StartEvictingVMPods(virtLauncherPods []v1.Pod, controllerRevisionHash string,
StartEvictingVMPods(virtLauncherPods []*util.VMPodEviction, controllerRevisionHash string,
failedToEvictVMEventFunc func(message string))
}

Expand Down Expand Up @@ -53,8 +54,8 @@ func (k *kubevirtManagerImpl) ClusterHasVMPods() (bool, error) {
return len(virtLauncherPods) > 0, nil
}

func (k *kubevirtManagerImpl) GetVMPodsToEvictByNode(wantNodes map[string]bool) (map[string][]v1.Pod, error) {
virtLauncherPodsByNode := map[string][]v1.Pod{}
func (k *kubevirtManagerImpl) GetVMPodsToEvictByNode(wantNodes map[string]bool) (map[string][]*util.VMPodEviction, error) {
virtLauncherPodsByNode := map[string][]*util.VMPodEviction{}
// get a list of virt-launcher pods for each node
virtLauncherPods, err := k.getVirtLauncherPods()
if err != nil {
Expand All @@ -64,24 +65,37 @@ func (k *kubevirtManagerImpl) GetVMPodsToEvictByNode(wantNodes map[string]bool)
if !wantNodes[pod.Spec.NodeName] {
continue
}
shouldEvict, err := k.shouldLiveMigrateVM(&pod)
shouldEvict, migrInProgress, err := k.shouldLiveMigrateVM(&pod)
if err != nil {
return nil, err
}
if shouldEvict {
virtLauncherPodsByNode[pod.Spec.NodeName] = append(virtLauncherPodsByNode[pod.Spec.NodeName], pod)
virtLauncherPodsByNode[pod.Spec.NodeName] = append(
virtLauncherPodsByNode[pod.Spec.NodeName],
&util.VMPodEviction{
PodToEvict: pod,
LiveMigrationInProgress: migrInProgress,
},
)
}
}
return virtLauncherPodsByNode, nil
}

func (k *kubevirtManagerImpl) StartEvictingVMPods(
virtLauncherPods []v1.Pod, controllerRevisionHash string, failedToEvictVMEventFunc func(message string),
evictions []*util.VMPodEviction, controllerRevisionHash string, failedToEvictVMEventFunc func(message string),
) {
ctx := context.TODO()
OUTER:
for _, pod := range virtLauncherPods {
vmiName := k.getVMIName(&pod)
for _, eviction := range evictions {
pod := &eviction.PodToEvict
if eviction.LiveMigrationInProgress {
// Wait until the next Reconcile() cycle to check if the live-migration is completed.
logrus.Infof("Skipping eviction of pod virt-launcher pod %s/%s until the next reconcile cycle",
pod.Namespace, pod.Name)
continue
}
vmiName := k.getVMIName(pod)
if vmiName == "" {
// vmName should not be empty. Don't pause upgrade for such badly formed pods.
logrus.Warnf("Failed to get VMI name for virt-launcher pod %s/%s", pod.Namespace, pod.Name)
Expand Down Expand Up @@ -119,6 +133,20 @@ OUTER:
continue OUTER
}
}
// Check if the VMI is still pointing to the same node as the virt-launcher pod. We already checked for this
// in shouldLiveMigrateVM() but we need to check again here because the VMI could have been live-migrated in
// the meantime. This reduces the chance of unnecessary live-migrations but does not close the hole fully.
vmi, err := k.kubevirtOps.GetVirtualMachineInstance(ctx, pod.Namespace, vmiName)
if err != nil {
logrus.Warnf("Failed to get VMI %s when evicting pod %s/%s: %v", vmiName, pod.Namespace, pod.Name, err)
continue

Check warning on line 142 in pkg/controller/storagecluster/kubevirt.go

View check run for this annotation

Codecov / codecov/patch

pkg/controller/storagecluster/kubevirt.go#L141-L142

Added lines #L141 - L142 were not covered by tests
}
if vmi.NodeName != pod.Spec.NodeName {
logrus.Infof("VMI %s/%s is running on node %s, not on node %s. Eviction not needed for pod %s.",
pod.Namespace, vmiName, vmi.NodeName, pod.Spec.NodeName, pod.Name)
continue
}
// All checks passed. Start the live-migration.
labels := map[string]string{
constants.OperatorLabelManagedByKey: constants.OperatorLabelManagedByValue,
}
Expand Down Expand Up @@ -154,20 +182,20 @@ func (k *kubevirtManagerImpl) getVMIMigrations(
return ret, nil
}

func (k *kubevirtManagerImpl) shouldLiveMigrateVM(virtLauncherPod *v1.Pod) (bool, error) {
func (k *kubevirtManagerImpl) shouldLiveMigrateVM(virtLauncherPod *v1.Pod) (bool, bool, error) {
// we only care about the pods that are not in a terminal state
if virtLauncherPod.Status.Phase == v1.PodSucceeded || virtLauncherPod.Status.Phase == v1.PodFailed {
return false, nil
return false, false, nil
}
vmiName := k.getVMIName(virtLauncherPod)
if vmiName == "" {
logrus.Warnf("Failed to get VMI name for virt-launcher pod %s/%s. Skipping live-migration.",
virtLauncherPod.Namespace, virtLauncherPod.Name)
return false, nil
return false, false, nil
}
migrations, err := k.getVMIMigrations(virtLauncherPod.Namespace, vmiName)
if err != nil {
return false, err
return false, false, err

Check warning on line 198 in pkg/controller/storagecluster/kubevirt.go

View check run for this annotation

Codecov / codecov/patch

pkg/controller/storagecluster/kubevirt.go#L198

Added line #L198 was not covered by tests
}
for _, migration := range migrations {
if !migration.Completed {
Expand All @@ -177,17 +205,17 @@ func (k *kubevirtManagerImpl) shouldLiveMigrateVM(virtLauncherPod *v1.Pod) (bool
// Return "shouldEvict=true" and deal with it later.
logrus.Infof("Will check whether to evict pod %s/%s after the live-migration %s (%s) is completed.",
virtLauncherPod.Namespace, virtLauncherPod.Name, migration.Name, migration.Phase)
return true, nil
return true, true, nil
}
}
// get VMI to check if the VM is live-migratable and if it is running on the same node as the virt-launcher pod
vmi, err := k.kubevirtOps.GetVirtualMachineInstance(context.TODO(), virtLauncherPod.Namespace, vmiName)
if err != nil {
if !errors.IsNotFound(err) {
return false, fmt.Errorf("failed to get VMI %s/%s: %w", virtLauncherPod.Namespace, vmiName, err)
return false, false, fmt.Errorf("failed to get VMI %s/%s: %w", virtLauncherPod.Namespace, vmiName, err)
}
logrus.Warnf("VMI %s/%s was not found; skipping live-migration: %v", virtLauncherPod.Namespace, vmiName, err)
return false, nil
return false, false, nil

Check warning on line 218 in pkg/controller/storagecluster/kubevirt.go

View check run for this annotation

Codecov / codecov/patch

pkg/controller/storagecluster/kubevirt.go#L218

Added line #L218 was not covered by tests
}
// We already checked that there is no live migration in progress for this VMI.
// Ignore this pod if VMI says that the VM is running on another node. This can happen if
Expand All @@ -196,10 +224,10 @@ func (k *kubevirtManagerImpl) shouldLiveMigrateVM(virtLauncherPod *v1.Pod) (bool
if vmi.NodeName != virtLauncherPod.Spec.NodeName {
logrus.Infof("VMI %s/%s is running on node %s, not on node %s. Skipping eviction of pod %s.",
virtLauncherPod.Namespace, vmiName, vmi.NodeName, virtLauncherPod.Spec.NodeName, virtLauncherPod.Name)
return false, nil
return false, false, nil
}
// Ignore the VMs that are not live-migratable.
return vmi.LiveMigratable, nil
return vmi.LiveMigratable, false, nil
}

func (k *kubevirtManagerImpl) getVirtLauncherPods() ([]v1.Pod, error) {
Expand Down
59 changes: 53 additions & 6 deletions pkg/controller/storagecluster/kubevirt_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@ import (
"github.com/libopenstorage/operator/pkg/constants"
"github.com/libopenstorage/operator/pkg/mock/mockcore"
"github.com/libopenstorage/operator/pkg/mock/mockkubevirtdy"
"github.com/libopenstorage/operator/pkg/util"
kubevirt "github.com/portworx/sched-ops/k8s/kubevirt-dynamic"
"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require"
Expand Down Expand Up @@ -89,7 +90,8 @@ func TestGetVMPodsToEvictByNode(t *testing.T) {
require.NotEmpty(t, pods)
require.Len(t, pods, 1)
require.Len(t, pods[virtLauncherPod.Spec.NodeName], 1)
require.Equal(t, "virt-launcher-1", pods["node1"][0].Name)
require.Equal(t, "virt-launcher-1", pods["node1"][0].PodToEvict.Name)
require.False(t, pods["node1"][0].LiveMigrationInProgress)
}

// Test case: completed or failed virt-launcher pod should be ignored
Expand Down Expand Up @@ -164,7 +166,8 @@ func TestGetVMPodsToEvictByNode(t *testing.T) {
require.NotEmpty(t, pods)
require.Len(t, pods, 1)
require.Len(t, pods[virtLauncherPod.Spec.NodeName], 1)
require.Equal(t, "virt-launcher-1", pods["node1"][0].Name)
require.Equal(t, "virt-launcher-1", pods["node1"][0].PodToEvict.Name)
require.True(t, pods["node1"][0].LiveMigrationInProgress)

// Test case: VM was migrated out already but the source virt-launcher pod has not finished yet.
// VMI migration in completed state and VMI is pointing to a different node.
Expand Down Expand Up @@ -204,9 +207,37 @@ func TestStartEvictingVMPods(t *testing.T) {
// Test case: no migration exists
virtLauncherPod, vmi := getTestVirtLauncherPodAndVMI("virt-launcher-1", "node1")
mockKubeVirtOps.EXPECT().ListVirtualMachineInstanceMigrations(gomock.Any(), "", gomock.Any()).Return(nil, nil)
mockKubeVirtOps.EXPECT().GetVirtualMachineInstance(gomock.Any(), gomock.Any(), vmi.Name).Return(vmi, nil)
mockKubeVirtOps.EXPECT().CreateVirtualMachineInstanceMigrationWithParams(gomock.Any(), "", vmi.Name, "", "",
expectedAnnotations, expectedLabels).Return(nil, nil)
kvmgr.StartEvictingVMPods([]v1.Pod{*virtLauncherPod}, hash, func(message string) {})
kvmgr.StartEvictingVMPods([]*util.VMPodEviction{
{
PodToEvict: *virtLauncherPod,
LiveMigrationInProgress: false,
},
}, hash, func(message string) {})

// Test case: migration was in progress when we checked shouldLiveMigrate
virtLauncherPod, _ = getTestVirtLauncherPodAndVMI("virt-launcher-1", "node1")
kvmgr.StartEvictingVMPods([]*util.VMPodEviction{
{
PodToEvict: *virtLauncherPod,
LiveMigrationInProgress: true,
},
}, hash, func(message string) {})

// Test case: VMI says that VM is running on a different node
virtLauncherPod, vmi = getTestVirtLauncherPodAndVMI("virt-launcher-1", "node1")
// vmi is running on node2 even though there is a running virt-launcher pod on node1
vmi.NodeName = "node2"
mockKubeVirtOps.EXPECT().ListVirtualMachineInstanceMigrations(gomock.Any(), "", gomock.Any()).Return(nil, nil)
mockKubeVirtOps.EXPECT().GetVirtualMachineInstance(gomock.Any(), gomock.Any(), vmi.Name).Return(vmi, nil)
kvmgr.StartEvictingVMPods([]*util.VMPodEviction{
{
PodToEvict: *virtLauncherPod,
LiveMigrationInProgress: false,
},
}, hash, func(message string) {})

// Test case: migration in progress for the same VMI
virtLauncherPod, vmi = getTestVirtLauncherPodAndVMI("virt-launcher-1", "node1")
Expand All @@ -220,7 +251,12 @@ func TestStartEvictingVMPods(t *testing.T) {
mockKubeVirtOps.EXPECT().ListVirtualMachineInstanceMigrations(
gomock.Any(), "", gomock.Any()).Return(migrInProgress, nil)
// No expectation for call to CreateMigration since no new migration should be created
kvmgr.StartEvictingVMPods([]v1.Pod{*virtLauncherPod}, hash, func(message string) {})
kvmgr.StartEvictingVMPods([]*util.VMPodEviction{
{
PodToEvict: *virtLauncherPod,
LiveMigrationInProgress: false,
},
}, hash, func(message string) {})

// Test case: failed migration for the same VMI with the same controller revision hash from the same sourceNode.
// Should not create a new migration.
Expand All @@ -239,7 +275,12 @@ func TestStartEvictingVMPods(t *testing.T) {
mockKubeVirtOps.EXPECT().ListVirtualMachineInstanceMigrations(gomock.Any(), "", gomock.Any()).Return(migrFailed, nil)
// No expectation for call to CreateMigration since no new migration should be created
eventMsg := ""
kvmgr.StartEvictingVMPods([]v1.Pod{*virtLauncherPod}, hash, func(message string) { eventMsg = message })
kvmgr.StartEvictingVMPods([]*util.VMPodEviction{
{
PodToEvict: *virtLauncherPod,
LiveMigrationInProgress: false,
},
}, hash, func(message string) { eventMsg = message })
require.Contains(t, eventMsg, "Stop or migrate the VM so that the update of the storage node can proceed")

// Test case: Failed or in-progress migrations for a different VMI, different revision hash, different source node etc.
Expand Down Expand Up @@ -286,9 +327,15 @@ func TestStartEvictingVMPods(t *testing.T) {
}
mockKubeVirtOps.EXPECT().ListVirtualMachineInstanceMigrations(
gomock.Any(), "", gomock.Any()).Return(migrations, nil)
mockKubeVirtOps.EXPECT().GetVirtualMachineInstance(gomock.Any(), gomock.Any(), vmi.Name).Return(vmi, nil)
mockKubeVirtOps.EXPECT().CreateVirtualMachineInstanceMigrationWithParams(gomock.Any(), "", vmi.Name, "", "",
expectedAnnotations, expectedLabels).Return(nil, nil)
kvmgr.StartEvictingVMPods([]v1.Pod{*virtLauncherPod}, hash, func(message string) {})
kvmgr.StartEvictingVMPods([]*util.VMPodEviction{
{
PodToEvict: *virtLauncherPod,
LiveMigrationInProgress: false,
},
}, hash, func(message string) {})
}

func getTestVirtLauncherPodAndVMI(podName, nodeName string) (*v1.Pod, *kubevirt.VirtualMachineInstance) {
Expand Down
2 changes: 1 addition & 1 deletion pkg/controller/storagecluster/update.go
Original file line number Diff line number Diff line change
Expand Up @@ -164,7 +164,7 @@ func (c *Controller) rollingUpdate(cluster *corev1.StorageCluster, hash string,
}

// check if we should live-migrate VMs before updating the storage node
virtLauncherPodsByNode := map[string][]v1.Pod{}
virtLauncherPodsByNode := map[string][]*operatorutil.VMPodEviction{}
if len(oldPodsToDelete) > 0 && !forceContinueUpgrade(cluster) && evictVMsDuringUpdate(cluster) {
vmPodsPresent, err := c.kubevirt.ClusterHasVMPods()
if err != nil {
Expand Down
8 changes: 4 additions & 4 deletions pkg/mock/kubevirtmanager.mock.go

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

9 changes: 9 additions & 0 deletions pkg/util/util.go
Original file line number Diff line number Diff line change
Expand Up @@ -89,6 +89,15 @@ const (
ValidMinAvailable = "ValidMinAvailable"
)

// VMPodEviction has info about the virt-launcher pod that needs to be evicted before upgrading PX on a node
type VMPodEviction struct {
// PodToEvict is the virt-launcher pod that needs to be evicted
PodToEvict v1.Pod
// LiveMigrationInProgress is true if in-progress live-migration exists for this VM. In this case, the eviction
// should be skipped until the next reconcile cycle
LiveMigrationInProgress bool
}

var (
// commonDockerRegistries is a map of commonly used Docker registries
commonDockerRegistries = map[string]bool{
Expand Down

0 comments on commit 729e5e3

Please sign in to comment.