Skip to content

Commit

Permalink
Recreate kind cluster after each K8s test
Browse files Browse the repository at this point in the history
This is a temporary measure to improve K8s test reliability before we
can run them in parallel. We run these tests sequentially right now, and
each kind cluster is only used once, but is only cleaned up after all
tests finish running. Creating and deleting kind
clusters is fast relative to test run time, so
this shouldn't impact total run time significantly.
  • Loading branch information
swiatekm committed Mar 3, 2025
1 parent 508271b commit ec65a9f
Show file tree
Hide file tree
Showing 3 changed files with 41 additions and 30 deletions.
61 changes: 31 additions & 30 deletions magefile.go
Original file line number Diff line number Diff line change
Expand Up @@ -1934,7 +1934,7 @@ func (Integration) Clean() error {
_, err := os.Stat(".integration-cache")
if err == nil {
// .integration-cache exists; need to run `Clean` from the runner
r, err := createTestRunner(false, "", "")
r, err := createTestRunner(false, "", "", false)
if err != nil {
return fmt.Errorf("error creating test runner: %w", err)
}
Expand Down Expand Up @@ -2007,17 +2007,17 @@ func (Integration) Auth(ctx context.Context) error {

// Test runs integration tests on remote hosts
func (Integration) Test(ctx context.Context) error {
return integRunner(ctx, false, "")
return integRunner(ctx, false, "", false)
}

// Matrix runs integration tests on a matrix of all supported remote hosts
func (Integration) Matrix(ctx context.Context) error {
return integRunner(ctx, true, "")
return integRunner(ctx, true, "", false)
}

// Single runs single integration test on remote host
func (Integration) Single(ctx context.Context, testName string) error {
return integRunner(ctx, false, testName)
return integRunner(ctx, false, testName, false)
}

// Kubernetes runs kubernetes integration tests
Expand All @@ -2027,7 +2027,7 @@ func (Integration) Kubernetes(ctx context.Context) error {
return err
}

return integRunner(ctx, false, "")
return integRunner(ctx, false, "", false)
}

// KubernetesMatrix runs a matrix of kubernetes integration tests
Expand All @@ -2037,7 +2037,7 @@ func (Integration) KubernetesMatrix(ctx context.Context) error {
return err
}

return integRunner(ctx, true, "")
return integRunner(ctx, true, "", true)
}

// UpdateVersions runs an update on the `.agent-versions.yml` fetching
Expand Down Expand Up @@ -2514,15 +2514,15 @@ func (Integration) TestBeatServerless(ctx context.Context, beatname string) erro
if err != nil {
return fmt.Errorf("error setting binary name: %w", err)
}
return integRunner(ctx, false, "TestBeatsServerless")
return integRunner(ctx, false, "TestBeatsServerless", false)
}

func (Integration) TestForResourceLeaks(ctx context.Context) error {
err := os.Setenv("TEST_LONG_RUNNING", "true")
if err != nil {
return fmt.Errorf("error setting TEST_LONG_RUNNING: %w", err)
}
return integRunner(ctx, false, "TestLongRunningAgentForLeaks")
return integRunner(ctx, false, "TestLongRunningAgentForLeaks", false)
}

// TestOnRemote shouldn't be called locally (called on remote host to perform testing)
Expand Down Expand Up @@ -2649,7 +2649,7 @@ func (Integration) Buildkite() error {
return nil
}

func integRunner(ctx context.Context, matrix bool, singleTest string) error {
func integRunner(ctx context.Context, matrix bool, singleTest string, deleteInstanceAfterTest bool) error {
if _, ok := ctx.Deadline(); !ok {
// If the context doesn't have a timeout (usually via the mage -t option), give it one.
var cancel context.CancelFunc
Expand All @@ -2658,7 +2658,7 @@ func integRunner(ctx context.Context, matrix bool, singleTest string) error {
}

for {
failedCount, err := integRunnerOnce(ctx, matrix, singleTest)
failedCount, err := integRunnerOnce(ctx, matrix, singleTest, deleteInstanceAfterTest)
if err != nil {
return err
}
Expand All @@ -2677,14 +2677,14 @@ func integRunner(ctx context.Context, matrix bool, singleTest string) error {
}
}

func integRunnerOnce(ctx context.Context, matrix bool, singleTest string) (int, error) {
func integRunnerOnce(ctx context.Context, matrix bool, singleTest string, deleteInstanceAfterTest bool) (int, error) {
goTestFlags := os.Getenv("GOTEST_FLAGS")

batches, err := define.DetermineBatches("testing/integration", goTestFlags, "integration")
if err != nil {
return 0, fmt.Errorf("failed to determine batches: %w", err)
}
r, err := createTestRunner(matrix, singleTest, goTestFlags, batches...)
r, err := createTestRunner(matrix, singleTest, goTestFlags, deleteInstanceAfterTest, batches...)
if err != nil {
return 0, fmt.Errorf("error creating test runner: %w", err)
}
Expand Down Expand Up @@ -2746,7 +2746,7 @@ func getTestRunnerVersions() (string, string, error) {
return agentVersion, agentStackVersion, nil
}

func createTestRunner(matrix bool, singleTest string, goTestFlags string, batches ...define.Batch) (*runner.Runner, error) {
func createTestRunner(matrix bool, singleTest string, goTestFlags string, deleteInstanceAfterTest bool, batches ...define.Batch) (*runner.Runner, error) {
goVersion, err := mage.DefaultBeatBuildVariableSources.GetGoVersion()
if err != nil {
return nil, err
Expand Down Expand Up @@ -2872,23 +2872,24 @@ func createTestRunner(matrix bool, singleTest string, goTestFlags string, batche
_ = os.MkdirAll(diagDir, 0o755)

cfg := tcommon.Config{
AgentVersion: agentVersion,
StackVersion: agentStackVersion,
BuildDir: agentBuildDir,
GOVersion: goVersion,
RepoDir: repoDir,
DiagnosticsDir: diagDir,
StateDir: ".integration-cache",
Platforms: testPlatforms(),
Packages: testPackages(),
Groups: testGroups(),
Matrix: matrix,
SingleTest: singleTest,
VerboseMode: mg.Verbose(),
Timestamp: timestamp,
TestFlags: goTestFlags,
ExtraEnv: extraEnv,
BinaryName: binaryName,
AgentVersion: agentVersion,
StackVersion: agentStackVersion,
BuildDir: agentBuildDir,
GOVersion: goVersion,
RepoDir: repoDir,
DiagnosticsDir: diagDir,
StateDir: ".integration-cache",
Platforms: testPlatforms(),
Packages: testPackages(),
Groups: testGroups(),
Matrix: matrix,
SingleTest: singleTest,
VerboseMode: mg.Verbose(),
DeleteInstanceAfterTest: deleteInstanceAfterTest,
Timestamp: timestamp,
TestFlags: goTestFlags,
ExtraEnv: extraEnv,
BinaryName: binaryName,
}

r, err := runner.NewRunner(cfg, instanceProvisioner, stackProvisioner, batches...)
Expand Down
3 changes: 3 additions & 0 deletions pkg/testing/common/config.go
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,9 @@ type Config struct {
// VerboseMode passed along a verbose mode flag to tests
VerboseMode bool

// DeleteInstanceAfterTest causes the runner to delete instances immediately after test completion.
DeleteInstanceAfterTest bool

// Timestamp enables timestamps on the console output.
Timestamp bool

Expand Down
7 changes: 7 additions & 0 deletions pkg/testing/runner/runner.go
Original file line number Diff line number Diff line change
Expand Up @@ -282,6 +282,13 @@ func (r *Runner) runK8sInstances(ctx context.Context, instances []StateInstance)
resultsMx.Lock()
results[batch.ID] = result
resultsMx.Unlock()
if r.cfg.DeleteInstanceAfterTest {
logger.Logf("Cleaning up instance after test: %s", instance.Name)
err = r.ip.Clean(ctx, r.cfg, []common.Instance{instance.Instance})
if err != nil {
logger.Logf("Failed to clean up instance %s: %v", instance.Name, err)
}
}
}
if err != nil {
return nil, err
Expand Down

0 comments on commit ec65a9f

Please sign in to comment.