Skip to content

Commit

Permalink
Fix WaitGroup panic issue
Browse files Browse the repository at this point in the history
Make sure WaitGroup.Add() is called before WaitGroup.Done() to avoid WaitGroup panic issue

Fixes #8657

Signed-off-by: Wenkai Yin(尹文开) <yinw@vmware.com>
  • Loading branch information
ywk253100 committed Feb 12, 2025
1 parent 5d9a4e8 commit cdcd6eb
Show file tree
Hide file tree
Showing 5 changed files with 51 additions and 24 deletions.
1 change: 1 addition & 0 deletions changelogs/unreleased/8679-ywk253100
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
Fix #8657: WaitGroup panic issue
2 changes: 1 addition & 1 deletion pkg/backup/backup.go
Original file line number Diff line number Diff line change
Expand Up @@ -780,7 +780,7 @@ func (kb *kubernetesBackupper) waitUntilPVBsProcessed(ctx context.Context, log l
if processed {
continue
}
updatedPVB, err := itemBlock.itemBackupper.podVolumeBackupper.GetPodVolumeBackup(pvb.Namespace, pvb.Name)
updatedPVB, err := itemBlock.itemBackupper.podVolumeBackupper.GetPodVolumeBackupByPodAndVolume(pvb.Spec.Pod.Namespace, pvb.Spec.Pod.Name, pvb.Spec.Volume)

Check warning on line 783 in pkg/backup/backup.go

View check run for this annotation

Codecov / codecov/patch

pkg/backup/backup.go#L783

Added line #L783 was not covered by tests
if err != nil {
allProcessed = false
log.Infof("failed to get PVB: %v", err)
Expand Down
4 changes: 2 additions & 2 deletions pkg/backup/backup_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -3978,9 +3978,9 @@ func (b *fakePodVolumeBackupper) WaitAllPodVolumesProcessed(log logrus.FieldLogg
return b.pvbs
}

func (b *fakePodVolumeBackupper) GetPodVolumeBackup(namespace, name string) (*velerov1.PodVolumeBackup, error) {
func (b *fakePodVolumeBackupper) GetPodVolumeBackupByPodAndVolume(podNamespace, podName, volume string) (*velerov1.PodVolumeBackup, error) {
for _, pvb := range b.pvbs {
if pvb.Namespace == namespace && pvb.Name == name {
if pvb.Spec.Pod.Namespace == podNamespace && pvb.Spec.Pod.Name == podName && pvb.Spec.Volume == volume {
return pvb, nil
}
}
Expand Down
41 changes: 30 additions & 11 deletions pkg/podvolume/backupper.go
Original file line number Diff line number Diff line change
Expand Up @@ -43,12 +43,17 @@ import (
"github.com/vmware-tanzu/velero/pkg/util/kube"
)

const (
indexNamePod = "POD"
pvbKeyPattern = "%s+%s+%s"
)

// Backupper can execute pod volume backups of volumes in a pod.
type Backupper interface {
// BackupPodVolumes backs up all specified volumes in a pod.
BackupPodVolumes(backup *velerov1api.Backup, pod *corev1api.Pod, volumesToBackup []string, resPolicies *resourcepolicies.Policies, log logrus.FieldLogger) ([]*velerov1api.PodVolumeBackup, *PVCBackupSummary, []error)
WaitAllPodVolumesProcessed(log logrus.FieldLogger) []*velerov1api.PodVolumeBackup
GetPodVolumeBackup(namespace, name string) (*velerov1api.PodVolumeBackup, error)
GetPodVolumeBackupByPodAndVolume(podNamespace, podName, volume string) (*velerov1api.PodVolumeBackup, error)
ListPodVolumeBackupsByPod(podNamespace, podName string) ([]*velerov1api.PodVolumeBackup, error)
}

Expand Down Expand Up @@ -106,8 +111,6 @@ func (pbs *PVCBackupSummary) addSkipped(volumeName string, reason string) {
}
}

const indexNamePod = "POD"

func podIndexFunc(obj any) ([]string, error) {
pvb, ok := obj.(*velerov1api.PodVolumeBackup)
if !ok {
Expand All @@ -119,6 +122,16 @@ func podIndexFunc(obj any) ([]string, error) {
return []string{cache.NewObjectName(pvb.Spec.Pod.Namespace, pvb.Spec.Pod.Name).String()}, nil
}

// the PVB's name is auto-generated when creating the PVB, we cannot get the name or uid before creating it.
// So we cannot use namespace&name or uid as the key because we need to insert PVB into the indexer before creating it in API server
func podVolumeBackupKey(obj any) (string, error) {
pvb, ok := obj.(*velerov1api.PodVolumeBackup)
if !ok {
return "", fmt.Errorf("expected PodVolumeBackup, but got %T", obj)
}

Check warning on line 131 in pkg/podvolume/backupper.go

View check run for this annotation

Codecov / codecov/patch

pkg/podvolume/backupper.go#L130-L131

Added lines #L130 - L131 were not covered by tests
return fmt.Sprintf(pvbKeyPattern, pvb.Spec.Pod.Namespace, pvb.Spec.Pod.Name, pvb.Spec.Volume), nil
}

func newBackupper(
ctx context.Context,
log logrus.FieldLogger,
Expand All @@ -137,7 +150,7 @@ func newBackupper(
uploaderType: uploaderType,
pvbInformer: pvbInformer,
wg: sync.WaitGroup{},
pvbIndexer: cache.NewIndexer(cache.MetaNamespaceKeyFunc, cache.Indexers{
pvbIndexer: cache.NewIndexer(podVolumeBackupKey, cache.Indexers{
indexNamePod: podIndexFunc,
}),
}
Expand Down Expand Up @@ -341,14 +354,20 @@ func (b *backupper) BackupPodVolumes(backup *velerov1api.Backup, pod *corev1api.
}

volumeBackup := newPodVolumeBackup(backup, pod, volume, repoIdentifier, b.uploaderType, pvc)
if err := veleroclient.CreateRetryGenerateName(b.crClient, b.ctx, volumeBackup); err != nil {
errs = append(errs, err)
// the PVB must be added into the indexer before creating it in API server otherwise unexpected behavior may happen:
// the PVB may be handled very quickly by the controller and the informer handler will insert the PVB before "b.pvbIndexer.Add(volumeBackup)" runs,
// this causes the PVB inserted by "b.pvbIndexer.Add(volumeBackup)" overrides the PVB in the indexer while the PVB inserted by "b.pvbIndexer.Add(volumeBackup)"
// contains empty "Status"
if err := b.pvbIndexer.Add(volumeBackup); err != nil {
errs = append(errs, errors.Wrapf(err, "failed to add PodVolumeBackup %s/%s to indexer", volumeBackup.Namespace, volumeBackup.Name))

Check warning on line 362 in pkg/podvolume/backupper.go

View check run for this annotation

Codecov / codecov/patch

pkg/podvolume/backupper.go#L362

Added line #L362 was not covered by tests
continue
}
// similar with above: the PVB may be handled very quickly by the controller and the informer handler will call "b.wg.Done()" before "b.wg.Add(1)" runs which causes panic
// see https://github.com/vmware-tanzu/velero/issues/8657
b.wg.Add(1)

if err := b.pvbIndexer.Add(volumeBackup); err != nil {
errs = append(errs, errors.Wrapf(err, "failed to add PodVolumeBackup %s/%s to indexer", volumeBackup.Namespace, volumeBackup.Name))
if err := veleroclient.CreateRetryGenerateName(b.crClient, b.ctx, volumeBackup); err != nil {
b.wg.Done()
errs = append(errs, err)

Check warning on line 370 in pkg/podvolume/backupper.go

View check run for this annotation

Codecov / codecov/patch

pkg/podvolume/backupper.go#L369-L370

Added lines #L369 - L370 were not covered by tests
continue
}

Expand Down Expand Up @@ -392,8 +411,8 @@ func (b *backupper) WaitAllPodVolumesProcessed(log logrus.FieldLogger) []*velero
return podVolumeBackups
}

func (b *backupper) GetPodVolumeBackup(namespace, name string) (*velerov1api.PodVolumeBackup, error) {
obj, exist, err := b.pvbIndexer.GetByKey(cache.NewObjectName(namespace, name).String())
func (b *backupper) GetPodVolumeBackupByPodAndVolume(podNamespace, podName, volume string) (*velerov1api.PodVolumeBackup, error) {
obj, exist, err := b.pvbIndexer.GetByKey(fmt.Sprintf(pvbKeyPattern, podNamespace, podName, volume))
if err != nil {
return nil, err
}
Expand Down
27 changes: 17 additions & 10 deletions pkg/podvolume/backupper_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -620,37 +620,44 @@ func TestBackupPodVolumes(t *testing.T) {
}
}

func TestGetPodVolumeBackup(t *testing.T) {
func TestGetPodVolumeBackupByPodAndVolume(t *testing.T) {
backupper := &backupper{
pvbIndexer: cache.NewIndexer(cache.MetaNamespaceKeyFunc, cache.Indexers{
pvbIndexer: cache.NewIndexer(podVolumeBackupKey, cache.Indexers{
indexNamePod: podIndexFunc,
}),
}

obj := &velerov1api.PodVolumeBackup{
ObjectMeta: metav1.ObjectMeta{
Namespace: "velero",
Name: "pvb",
},
Spec: velerov1api.PodVolumeBackupSpec{
Pod: corev1api.ObjectReference{
Kind: "Pod",
Namespace: "default",
Name: "pod",
},
Volume: "volume",
},
}

err := backupper.pvbIndexer.Add(obj)
require.NoError(t, err)

// not exist PVB
pvb, err := backupper.GetPodVolumeBackup("invalid-namespace", "invalid-name")
// incorrect pod namespace
pvb, err := backupper.GetPodVolumeBackupByPodAndVolume("invalid-namespace", "pod", "volume")
require.NoError(t, err)
assert.Nil(t, pvb)

// incorrect pod name
pvb, err = backupper.GetPodVolumeBackupByPodAndVolume("default", "invalid-pod", "volume")
require.NoError(t, err)
assert.Nil(t, pvb)

// incorrect volume
pvb, err = backupper.GetPodVolumeBackupByPodAndVolume("default", "pod", "invalid-volume")
require.NoError(t, err)
assert.Nil(t, pvb)

// exist PVB
pvb, err = backupper.GetPodVolumeBackup("velero", "pvb")
// correct pod namespace, name and volume
pvb, err = backupper.GetPodVolumeBackupByPodAndVolume("default", "pod", "volume")
require.NoError(t, err)
assert.NotNil(t, pvb)
}
Expand Down

0 comments on commit cdcd6eb

Please sign in to comment.