Skip to content

Commit

Permalink
Merge branch 'master' into PWX-36509
Browse files Browse the repository at this point in the history
  • Loading branch information
hitesh-wani-px authored Jun 10, 2024
2 parents d7a4451 + 3b53125 commit 2e6a04a
Show file tree
Hide file tree
Showing 6 changed files with 136 additions and 37 deletions.
29 changes: 22 additions & 7 deletions pkg/controller/storagecluster/controller_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -2695,9 +2695,10 @@ func TestKubevirtVMsDuringUpgrade(t *testing.T) {
k8sNodes[1].Name: true,
k8sNodes[2].Name: true,
}
evictions := getVMPodEvictions(t, vmPods)
kubevirt.EXPECT().GetVMPodsToEvictByNode(wantNodes).Return(
map[string][]v1.Pod{k8sNodes[1].Name: vmPods}, nil)
kubevirt.EXPECT().StartEvictingVMPods(vmPods, gomock.Any(), gomock.Any())
map[string][]*util.VMPodEviction{k8sNodes[1].Name: evictions}, nil)
kubevirt.EXPECT().StartEvictingVMPods(evictions, gomock.Any(), gomock.Any())

result, err = controller.Reconcile(context.TODO(), request)
require.NoError(t, err)
Expand Down Expand Up @@ -2750,13 +2751,16 @@ func TestKubevirtVMsDuringUpgrade(t *testing.T) {
},
},
}
evictionsNode0 := getVMPodEvictions(t, vmPodsNode0)
evictionsNode2 := getVMPodEvictions(t, vmPodsNode2)

kubevirt.EXPECT().ClusterHasVMPods().Return(true, nil)
kubevirt.EXPECT().GetVMPodsToEvictByNode(wantNodes).Return(map[string][]v1.Pod{
k8sNodes[0].Name: vmPodsNode0,
k8sNodes[2].Name: vmPodsNode2,
kubevirt.EXPECT().GetVMPodsToEvictByNode(wantNodes).Return(map[string][]*util.VMPodEviction{
k8sNodes[0].Name: evictionsNode0,
k8sNodes[2].Name: evictionsNode2,
}, nil)
kubevirt.EXPECT().StartEvictingVMPods(vmPodsNode0, gomock.Any(), gomock.Any())
kubevirt.EXPECT().StartEvictingVMPods(vmPodsNode2, gomock.Any(), gomock.Any())
kubevirt.EXPECT().StartEvictingVMPods(evictionsNode0, gomock.Any(), gomock.Any())
kubevirt.EXPECT().StartEvictingVMPods(evictionsNode2, gomock.Any(), gomock.Any())

result, err = controller.Reconcile(context.TODO(), request)
require.NoError(t, err)
Expand Down Expand Up @@ -10669,3 +10673,14 @@ func getNode(t *testing.T, k8sclient client.Client, nodeName string) *v1.Node {
require.NoError(t, err)
return node
}

func getVMPodEvictions(t *testing.T, podsToEvict []v1.Pod) []*util.VMPodEviction {
var evictions []*util.VMPodEviction
for _, vmPod := range podsToEvict {
evictions = append(evictions, &util.VMPodEviction{
PodToEvict: vmPod,
LiveMigrationInProgress: false,
})
}
return evictions
}
66 changes: 47 additions & 19 deletions pkg/controller/storagecluster/kubevirt.go
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@ import (
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"

"github.com/libopenstorage/operator/pkg/constants"
"github.com/libopenstorage/operator/pkg/util"
coreops "github.com/portworx/sched-ops/k8s/core"
kubevirt "github.com/portworx/sched-ops/k8s/kubevirt-dynamic"
)
Expand All @@ -18,11 +19,11 @@ type KubevirtManager interface {
// ClusterHasVMPods returns true if the cluster has any KubeVirt VM Pods (running or not)
ClusterHasVMPods() (bool, error)

// GetVMPodsToEvictByNode returns a map of node name to a list of virt-launcher pods that are live-migratable
GetVMPodsToEvictByNode(wantNodes map[string]bool) (map[string][]v1.Pod, error)
// GetVMPodsToEvictByNode returns a map of node name to a list of virt-launcher pods that need to be evicted
GetVMPodsToEvictByNode(wantNodes map[string]bool) (map[string][]*util.VMPodEviction, error)

// StartEvictingVMPods starts live-migrating the virt-launcher pods to other nodes
StartEvictingVMPods(virtLauncherPods []v1.Pod, controllerRevisionHash string,
StartEvictingVMPods(virtLauncherPods []*util.VMPodEviction, controllerRevisionHash string,
failedToEvictVMEventFunc func(message string))
}

Expand Down Expand Up @@ -53,8 +54,8 @@ func (k *kubevirtManagerImpl) ClusterHasVMPods() (bool, error) {
return len(virtLauncherPods) > 0, nil
}

func (k *kubevirtManagerImpl) GetVMPodsToEvictByNode(wantNodes map[string]bool) (map[string][]v1.Pod, error) {
virtLauncherPodsByNode := map[string][]v1.Pod{}
func (k *kubevirtManagerImpl) GetVMPodsToEvictByNode(wantNodes map[string]bool) (map[string][]*util.VMPodEviction, error) {
virtLauncherPodsByNode := map[string][]*util.VMPodEviction{}
// get a list of virt-launcher pods for each node
virtLauncherPods, err := k.getVirtLauncherPods()
if err != nil {
Expand All @@ -64,24 +65,37 @@ func (k *kubevirtManagerImpl) GetVMPodsToEvictByNode(wantNodes map[string]bool)
if !wantNodes[pod.Spec.NodeName] {
continue
}
shouldEvict, err := k.shouldLiveMigrateVM(&pod)
shouldEvict, migrInProgress, err := k.shouldLiveMigrateVM(&pod)
if err != nil {
return nil, err
}
if shouldEvict {
virtLauncherPodsByNode[pod.Spec.NodeName] = append(virtLauncherPodsByNode[pod.Spec.NodeName], pod)
virtLauncherPodsByNode[pod.Spec.NodeName] = append(
virtLauncherPodsByNode[pod.Spec.NodeName],
&util.VMPodEviction{
PodToEvict: pod,
LiveMigrationInProgress: migrInProgress,
},
)
}
}
return virtLauncherPodsByNode, nil
}

func (k *kubevirtManagerImpl) StartEvictingVMPods(
virtLauncherPods []v1.Pod, controllerRevisionHash string, failedToEvictVMEventFunc func(message string),
evictions []*util.VMPodEviction, controllerRevisionHash string, failedToEvictVMEventFunc func(message string),
) {
ctx := context.TODO()
OUTER:
for _, pod := range virtLauncherPods {
vmiName := k.getVMIName(&pod)
for _, eviction := range evictions {
pod := &eviction.PodToEvict
if eviction.LiveMigrationInProgress {
// Wait until the next Reconcile() cycle to check if the live-migration is completed.
logrus.Infof("Skipping eviction of virt-launcher pod %s/%s until the next reconcile cycle",
pod.Namespace, pod.Name)
continue
}
vmiName := k.getVMIName(pod)
if vmiName == "" {
// vmName should not be empty. Don't pause upgrade for such badly formed pods.
logrus.Warnf("Failed to get VMI name for virt-launcher pod %s/%s", pod.Namespace, pod.Name)
Expand Down Expand Up @@ -119,6 +133,20 @@ OUTER:
continue OUTER
}
}
// Check if the VMI is still pointing to the same node as the virt-launcher pod. We already checked for this
// in shouldLiveMigrateVM() but we need to check again here because the VMI could have been live-migrated in
// the meantime. This reduces the chance of unnecessary live-migrations but does not close the hole fully.
vmi, err := k.kubevirtOps.GetVirtualMachineInstance(ctx, pod.Namespace, vmiName)
if err != nil {
logrus.Warnf("Failed to get VMI %s when evicting pod %s/%s: %v", vmiName, pod.Namespace, pod.Name, err)
continue
}
if vmi.NodeName != pod.Spec.NodeName {
logrus.Infof("VMI %s/%s is running on node %s, not on node %s. Eviction not needed for pod %s.",
pod.Namespace, vmiName, vmi.NodeName, pod.Spec.NodeName, pod.Name)
continue
}
// All checks passed. Start the live-migration.
labels := map[string]string{
constants.OperatorLabelManagedByKey: constants.OperatorLabelManagedByValue,
}
Expand Down Expand Up @@ -154,20 +182,20 @@ func (k *kubevirtManagerImpl) getVMIMigrations(
return ret, nil
}

func (k *kubevirtManagerImpl) shouldLiveMigrateVM(virtLauncherPod *v1.Pod) (bool, error) {
func (k *kubevirtManagerImpl) shouldLiveMigrateVM(virtLauncherPod *v1.Pod) (bool, bool, error) {
// we only care about the pods that are not in a terminal state
if virtLauncherPod.Status.Phase == v1.PodSucceeded || virtLauncherPod.Status.Phase == v1.PodFailed {
return false, nil
return false, false, nil
}
vmiName := k.getVMIName(virtLauncherPod)
if vmiName == "" {
logrus.Warnf("Failed to get VMI name for virt-launcher pod %s/%s. Skipping live-migration.",
virtLauncherPod.Namespace, virtLauncherPod.Name)
return false, nil
return false, false, nil
}
migrations, err := k.getVMIMigrations(virtLauncherPod.Namespace, vmiName)
if err != nil {
return false, err
return false, false, err
}
for _, migration := range migrations {
if !migration.Completed {
Expand All @@ -177,17 +205,17 @@ func (k *kubevirtManagerImpl) shouldLiveMigrateVM(virtLauncherPod *v1.Pod) (bool
// Return "shouldEvict=true" and deal with it later.
logrus.Infof("Will check whether to evict pod %s/%s after the live-migration %s (%s) is completed.",
virtLauncherPod.Namespace, virtLauncherPod.Name, migration.Name, migration.Phase)
return true, nil
return true, true, nil
}
}
// get VMI to check if the VM is live-migratable and if it is running on the same node as the virt-launcher pod
vmi, err := k.kubevirtOps.GetVirtualMachineInstance(context.TODO(), virtLauncherPod.Namespace, vmiName)
if err != nil {
if !errors.IsNotFound(err) {
return false, fmt.Errorf("failed to get VMI %s/%s: %w", virtLauncherPod.Namespace, vmiName, err)
return false, false, fmt.Errorf("failed to get VMI %s/%s: %w", virtLauncherPod.Namespace, vmiName, err)
}
logrus.Warnf("VMI %s/%s was not found; skipping live-migration: %v", virtLauncherPod.Namespace, vmiName, err)
return false, nil
return false, false, nil
}
// We already checked that there is no live migration in progress for this VMI.
// Ignore this pod if VMI says that the VM is running on another node. This can happen if
Expand All @@ -196,10 +224,10 @@ func (k *kubevirtManagerImpl) shouldLiveMigrateVM(virtLauncherPod *v1.Pod) (bool
if vmi.NodeName != virtLauncherPod.Spec.NodeName {
logrus.Infof("VMI %s/%s is running on node %s, not on node %s. Skipping eviction of pod %s.",
virtLauncherPod.Namespace, vmiName, vmi.NodeName, virtLauncherPod.Spec.NodeName, virtLauncherPod.Name)
return false, nil
return false, false, nil
}
// Ignore the VMs that are not live-migratable.
return vmi.LiveMigratable, nil
return vmi.LiveMigratable, false, nil
}

func (k *kubevirtManagerImpl) getVirtLauncherPods() ([]v1.Pod, error) {
Expand Down
59 changes: 53 additions & 6 deletions pkg/controller/storagecluster/kubevirt_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@ import (
"github.com/libopenstorage/operator/pkg/constants"
"github.com/libopenstorage/operator/pkg/mock/mockcore"
"github.com/libopenstorage/operator/pkg/mock/mockkubevirtdy"
"github.com/libopenstorage/operator/pkg/util"
kubevirt "github.com/portworx/sched-ops/k8s/kubevirt-dynamic"
"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require"
Expand Down Expand Up @@ -89,7 +90,8 @@ func TestGetVMPodsToEvictByNode(t *testing.T) {
require.NotEmpty(t, pods)
require.Len(t, pods, 1)
require.Len(t, pods[virtLauncherPod.Spec.NodeName], 1)
require.Equal(t, "virt-launcher-1", pods["node1"][0].Name)
require.Equal(t, "virt-launcher-1", pods["node1"][0].PodToEvict.Name)
require.False(t, pods["node1"][0].LiveMigrationInProgress)
}

// Test case: completed or failed virt-launcher pod should be ignored
Expand Down Expand Up @@ -164,7 +166,8 @@ func TestGetVMPodsToEvictByNode(t *testing.T) {
require.NotEmpty(t, pods)
require.Len(t, pods, 1)
require.Len(t, pods[virtLauncherPod.Spec.NodeName], 1)
require.Equal(t, "virt-launcher-1", pods["node1"][0].Name)
require.Equal(t, "virt-launcher-1", pods["node1"][0].PodToEvict.Name)
require.True(t, pods["node1"][0].LiveMigrationInProgress)

// Test case: VM was migrated out already but the source virt-launcher pod has not finished yet.
// VMI migration in completed state and VMI is pointing to a different node.
Expand Down Expand Up @@ -204,9 +207,37 @@ func TestStartEvictingVMPods(t *testing.T) {
// Test case: no migration exists
virtLauncherPod, vmi := getTestVirtLauncherPodAndVMI("virt-launcher-1", "node1")
mockKubeVirtOps.EXPECT().ListVirtualMachineInstanceMigrations(gomock.Any(), "", gomock.Any()).Return(nil, nil)
mockKubeVirtOps.EXPECT().GetVirtualMachineInstance(gomock.Any(), gomock.Any(), vmi.Name).Return(vmi, nil)
mockKubeVirtOps.EXPECT().CreateVirtualMachineInstanceMigrationWithParams(gomock.Any(), "", vmi.Name, "", "",
expectedAnnotations, expectedLabels).Return(nil, nil)
kvmgr.StartEvictingVMPods([]v1.Pod{*virtLauncherPod}, hash, func(message string) {})
kvmgr.StartEvictingVMPods([]*util.VMPodEviction{
{
PodToEvict: *virtLauncherPod,
LiveMigrationInProgress: false,
},
}, hash, func(message string) {})

// Test case: migration was in progress when we checked shouldLiveMigrate
virtLauncherPod, _ = getTestVirtLauncherPodAndVMI("virt-launcher-1", "node1")
kvmgr.StartEvictingVMPods([]*util.VMPodEviction{
{
PodToEvict: *virtLauncherPod,
LiveMigrationInProgress: true,
},
}, hash, func(message string) {})

// Test case: VMI says that VM is running on a different node
virtLauncherPod, vmi = getTestVirtLauncherPodAndVMI("virt-launcher-1", "node1")
// vmi is running on node2 even though there is a running virt-launcher pod on node1
vmi.NodeName = "node2"
mockKubeVirtOps.EXPECT().ListVirtualMachineInstanceMigrations(gomock.Any(), "", gomock.Any()).Return(nil, nil)
mockKubeVirtOps.EXPECT().GetVirtualMachineInstance(gomock.Any(), gomock.Any(), vmi.Name).Return(vmi, nil)
kvmgr.StartEvictingVMPods([]*util.VMPodEviction{
{
PodToEvict: *virtLauncherPod,
LiveMigrationInProgress: false,
},
}, hash, func(message string) {})

// Test case: migration in progress for the same VMI
virtLauncherPod, vmi = getTestVirtLauncherPodAndVMI("virt-launcher-1", "node1")
Expand All @@ -220,7 +251,12 @@ func TestStartEvictingVMPods(t *testing.T) {
mockKubeVirtOps.EXPECT().ListVirtualMachineInstanceMigrations(
gomock.Any(), "", gomock.Any()).Return(migrInProgress, nil)
// No expectation for call to CreateMigration since no new migration should be created
kvmgr.StartEvictingVMPods([]v1.Pod{*virtLauncherPod}, hash, func(message string) {})
kvmgr.StartEvictingVMPods([]*util.VMPodEviction{
{
PodToEvict: *virtLauncherPod,
LiveMigrationInProgress: false,
},
}, hash, func(message string) {})

// Test case: failed migration for the same VMI with the same controller revision hash from the same sourceNode.
// Should not create a new migration.
Expand All @@ -239,7 +275,12 @@ func TestStartEvictingVMPods(t *testing.T) {
mockKubeVirtOps.EXPECT().ListVirtualMachineInstanceMigrations(gomock.Any(), "", gomock.Any()).Return(migrFailed, nil)
// No expectation for call to CreateMigration since no new migration should be created
eventMsg := ""
kvmgr.StartEvictingVMPods([]v1.Pod{*virtLauncherPod}, hash, func(message string) { eventMsg = message })
kvmgr.StartEvictingVMPods([]*util.VMPodEviction{
{
PodToEvict: *virtLauncherPod,
LiveMigrationInProgress: false,
},
}, hash, func(message string) { eventMsg = message })
require.Contains(t, eventMsg, "Stop or migrate the VM so that the update of the storage node can proceed")

// Test case: Failed or in-progress migrations for a different VMI, different revision hash, different source node etc.
Expand Down Expand Up @@ -286,9 +327,15 @@ func TestStartEvictingVMPods(t *testing.T) {
}
mockKubeVirtOps.EXPECT().ListVirtualMachineInstanceMigrations(
gomock.Any(), "", gomock.Any()).Return(migrations, nil)
mockKubeVirtOps.EXPECT().GetVirtualMachineInstance(gomock.Any(), gomock.Any(), vmi.Name).Return(vmi, nil)
mockKubeVirtOps.EXPECT().CreateVirtualMachineInstanceMigrationWithParams(gomock.Any(), "", vmi.Name, "", "",
expectedAnnotations, expectedLabels).Return(nil, nil)
kvmgr.StartEvictingVMPods([]v1.Pod{*virtLauncherPod}, hash, func(message string) {})
kvmgr.StartEvictingVMPods([]*util.VMPodEviction{
{
PodToEvict: *virtLauncherPod,
LiveMigrationInProgress: false,
},
}, hash, func(message string) {})
}

func getTestVirtLauncherPodAndVMI(podName, nodeName string) (*v1.Pod, *kubevirt.VirtualMachineInstance) {
Expand Down
2 changes: 1 addition & 1 deletion pkg/controller/storagecluster/update.go
Original file line number Diff line number Diff line change
Expand Up @@ -164,7 +164,7 @@ func (c *Controller) rollingUpdate(cluster *corev1.StorageCluster, hash string,
}

// check if we should live-migrate VMs before updating the storage node
virtLauncherPodsByNode := map[string][]v1.Pod{}
virtLauncherPodsByNode := map[string][]*operatorutil.VMPodEviction{}
if len(oldPodsToDelete) > 0 && !forceContinueUpgrade(cluster) && evictVMsDuringUpdate(cluster) {
vmPodsPresent, err := c.kubevirt.ClusterHasVMPods()
if err != nil {
Expand Down
8 changes: 4 additions & 4 deletions pkg/mock/kubevirtmanager.mock.go

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

9 changes: 9 additions & 0 deletions pkg/util/util.go
Original file line number Diff line number Diff line change
Expand Up @@ -89,6 +89,15 @@ const (
ValidMinAvailable = "ValidMinAvailable"
)

// VMPodEviction has info about the virt-launcher pod that needs to be evicted before upgrading PX on a node
type VMPodEviction struct {
// PodToEvict is the virt-launcher pod that needs to be evicted
PodToEvict v1.Pod
// LiveMigrationInProgress is true if in-progress live-migration exists for this VM. In this case, the eviction
// should be skipped until the next reconcile cycle
LiveMigrationInProgress bool
}

var (
// commonDockerRegistries is a map of commonly used Docker registries
commonDockerRegistries = map[string]bool{
Expand Down

0 comments on commit 2e6a04a

Please sign in to comment.