From 2efc3075e63cd4e48ddff35c736efc7c45d114ca Mon Sep 17 00:00:00 2001 From: Vihas Makwana Date: Sat, 27 Jul 2024 17:44:35 +0530 Subject: [PATCH 01/11] chore: fix monitoring test cases --- .../integration/monitoring_probe_preserve_text_cfg_test.go | 4 ++-- testing/integration/monitoring_probe_reload_test.go | 3 +-- 2 files changed, 3 insertions(+), 4 deletions(-) diff --git a/testing/integration/monitoring_probe_preserve_text_cfg_test.go b/testing/integration/monitoring_probe_preserve_text_cfg_test.go index a7af3c597f0..97d0b49cd7a 100644 --- a/testing/integration/monitoring_probe_preserve_text_cfg_test.go +++ b/testing/integration/monitoring_probe_preserve_text_cfg_test.go @@ -54,6 +54,7 @@ inputs: - filesystem data_stream.dataset: system.filesystem agent.monitoring: + metrics_period: 1s http: enabled: true port: 6791 @@ -187,9 +188,8 @@ func (runner *MonitoringTextRunner) AllComponentsHealthy(ctx context.Context) { compDebugName := "" require.Eventually(runner.T(), func() bool { allHealthy := true - status, err := runner.agentFixture.ExecStatus(ctx) + status, _ := runner.agentFixture.ExecStatus(ctx) - require.NoError(runner.T(), err) for _, comp := range status.Components { runner.T().Logf("component state: %s", comp.Message) if comp.State != int(cproto.State_HEALTHY) { diff --git a/testing/integration/monitoring_probe_reload_test.go b/testing/integration/monitoring_probe_reload_test.go index bfb28f16c4f..3abaca68b5e 100644 --- a/testing/integration/monitoring_probe_reload_test.go +++ b/testing/integration/monitoring_probe_reload_test.go @@ -165,9 +165,8 @@ func (runner *MonitoringRunner) AllComponentsHealthy(ctx context.Context) { compDebugName := "" require.Eventually(runner.T(), func() bool { allHealthy := true - status, err := runner.agentFixture.ExecStatus(ctx) + status, _ := runner.agentFixture.ExecStatus(ctx) - require.NoError(runner.T(), err) for _, comp := range status.Components { runner.T().Logf("component state: %s", comp.Message) if comp.State != int(cproto.State_HEALTHY) { From cbd55cc9a6d33a016c6d460df3ecac50c7c4ce79 Mon Sep 17 00:00:00 2001 From: Vihas Makwana Date: Mon, 29 Jul 2024 19:16:40 +0530 Subject: [PATCH 02/11] fix: update long running tests --- testing/integration/agent_long_running_leak_test.go | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/testing/integration/agent_long_running_leak_test.go b/testing/integration/agent_long_running_leak_test.go index 0d1a731d3f7..2ceb2b18909 100644 --- a/testing/integration/agent_long_running_leak_test.go +++ b/testing/integration/agent_long_running_leak_test.go @@ -209,14 +209,13 @@ func (runner *ExtendedRunner) CheckHealthAtStartup(ctx context.Context) { compDebugName := "" require.Eventually(runner.T(), func() bool { allHealthy := true - status, err := runner.agentFixture.ExecStatus(ctx) + status, _ := runner.agentFixture.ExecStatus(ctx) apacheMatch := "logfile-apache" foundApache := false systemMatch := "system/metrics" foundSystem := false - require.NoError(runner.T(), err) for _, comp := range status.Components { // make sure the components include the expected integrations for _, v := range comp.Units { @@ -269,8 +268,8 @@ func (gm *goroutinesMonitor) Init(ctx context.Context, t *testing.T, fixture *at oldTop := paths.Top() paths.SetTop("/opt/Elastic/Agent") // fetch the unit ID of the component, use that to generate the path to the unix socket - status, err := fixture.ExecStatus(ctx) - require.NoError(t, err) + status, _ := fixture.ExecStatus(ctx) + for _, comp := range status.Components { unitId := comp.ID socketPath := utils.SocketURLWithFallback(unitId, paths.TempDir()) @@ -351,8 +350,8 @@ func (handleMon *handleMonitor) Init(ctx context.Context, t *testing.T, fixture // the `last 30s` metrics tend to report gauges, which we can't use for calculating a derivative. // so separately fetch the PIDs pidInStatusMessageRegex := regexp.MustCompile(`[\d]+`) - status, err := fixture.ExecStatus(ctx) - require.NoError(t, err) + status, _ := fixture.ExecStatus(ctx) + for _, comp := range status.Components { pidStr := pidInStatusMessageRegex.FindString(comp.Message) pid, err := strconv.ParseInt(pidStr, 10, 64) From 793589a4efbb02cb21b583d4fc5ece78a25e2caa Mon Sep 17 00:00:00 2001 From: Vihas Makwana Date: Mon, 29 Jul 2024 22:08:29 +0530 Subject: [PATCH 03/11] fix: change ExecStatus and return nil --- pkg/testing/fixture.go | 12 +++--------- .../integration/agent_long_running_leak_test.go | 16 +++++++++++++--- .../monitoring_probe_preserve_text_cfg_test.go | 6 +++++- .../integration/monitoring_probe_reload_test.go | 6 +++++- 4 files changed, 26 insertions(+), 14 deletions(-) diff --git a/pkg/testing/fixture.go b/pkg/testing/fixture.go index 75da9f9c94a..4c06dd30054 100644 --- a/pkg/testing/fixture.go +++ b/pkg/testing/fixture.go @@ -715,20 +715,14 @@ func (e *ExecErr) Unwrap() error { // determine if the returned AgentStatusOutput is empty or not. // It should work with any 8.6+ agent func (f *Fixture) ExecStatus(ctx context.Context, opts ...process.CmdOption) (AgentStatusOutput, error) { - out, err := f.Exec(ctx, []string{"status", "--output", "json"}, opts...) + out, _ := f.Exec(ctx, []string{"status", "--output", "json"}, opts...) + status := AgentStatusOutput{} if uerr := json.Unmarshal(out, &status); uerr != nil { return AgentStatusOutput{}, - fmt.Errorf("could not unmarshal agent status output: %w", - errors.Join(&ExecErr{ - err: err, - Output: out, - }, uerr)) + fmt.Errorf("could not unmarshal agent status output: %w", uerr) } - if err != nil { - return status, fmt.Errorf("error running command (output: %s): %w", string(out), err) - } return status, nil } diff --git a/testing/integration/agent_long_running_leak_test.go b/testing/integration/agent_long_running_leak_test.go index 2ceb2b18909..f3862665771 100644 --- a/testing/integration/agent_long_running_leak_test.go +++ b/testing/integration/agent_long_running_leak_test.go @@ -209,7 +209,11 @@ func (runner *ExtendedRunner) CheckHealthAtStartup(ctx context.Context) { compDebugName := "" require.Eventually(runner.T(), func() bool { allHealthy := true - status, _ := runner.agentFixture.ExecStatus(ctx) + status, err := runner.agentFixture.ExecStatus(ctx) + if err != nil { + runner.T().Logf("agent status returned an error: %v", err) + return false + } apacheMatch := "logfile-apache" foundApache := false @@ -268,7 +272,10 @@ func (gm *goroutinesMonitor) Init(ctx context.Context, t *testing.T, fixture *at oldTop := paths.Top() paths.SetTop("/opt/Elastic/Agent") // fetch the unit ID of the component, use that to generate the path to the unix socket - status, _ := fixture.ExecStatus(ctx) + status, err := fixture.ExecStatus(ctx) + if err != nil { + t.Logf("agent status returned an error: %v", err) + } for _, comp := range status.Components { unitId := comp.ID @@ -350,7 +357,10 @@ func (handleMon *handleMonitor) Init(ctx context.Context, t *testing.T, fixture // the `last 30s` metrics tend to report gauges, which we can't use for calculating a derivative. // so separately fetch the PIDs pidInStatusMessageRegex := regexp.MustCompile(`[\d]+`) - status, _ := fixture.ExecStatus(ctx) + status, err := fixture.ExecStatus(ctx) + if err != nil { + t.Logf("agent status returned an error: %v", err) + } for _, comp := range status.Components { pidStr := pidInStatusMessageRegex.FindString(comp.Message) diff --git a/testing/integration/monitoring_probe_preserve_text_cfg_test.go b/testing/integration/monitoring_probe_preserve_text_cfg_test.go index 97d0b49cd7a..a4e76d6b075 100644 --- a/testing/integration/monitoring_probe_preserve_text_cfg_test.go +++ b/testing/integration/monitoring_probe_preserve_text_cfg_test.go @@ -188,7 +188,11 @@ func (runner *MonitoringTextRunner) AllComponentsHealthy(ctx context.Context) { compDebugName := "" require.Eventually(runner.T(), func() bool { allHealthy := true - status, _ := runner.agentFixture.ExecStatus(ctx) + status, err := runner.agentFixture.ExecStatus(ctx) + if err != nil { + runner.T().Logf("agent status returned an error: %v", err) + return false + } for _, comp := range status.Components { runner.T().Logf("component state: %s", comp.Message) diff --git a/testing/integration/monitoring_probe_reload_test.go b/testing/integration/monitoring_probe_reload_test.go index 3abaca68b5e..e456be40c6d 100644 --- a/testing/integration/monitoring_probe_reload_test.go +++ b/testing/integration/monitoring_probe_reload_test.go @@ -165,7 +165,11 @@ func (runner *MonitoringRunner) AllComponentsHealthy(ctx context.Context) { compDebugName := "" require.Eventually(runner.T(), func() bool { allHealthy := true - status, _ := runner.agentFixture.ExecStatus(ctx) + status, err := runner.agentFixture.ExecStatus(ctx) + if err != nil { + runner.T().Logf("agent status returned an error: %v", err) + return false + } for _, comp := range status.Components { runner.T().Logf("component state: %s", comp.Message) From 2abeaf0666c6a7383ac9f83c33b44882c5e0fdca Mon Sep 17 00:00:00 2001 From: Vihas Makwana Date: Mon, 29 Jul 2024 23:04:10 +0530 Subject: [PATCH 04/11] fix: update docstring, non-empty --- pkg/testing/fixture.go | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/pkg/testing/fixture.go b/pkg/testing/fixture.go index 4c06dd30054..cc592167745 100644 --- a/pkg/testing/fixture.go +++ b/pkg/testing/fixture.go @@ -709,10 +709,9 @@ func (e *ExecErr) Unwrap() error { // ExecStatus executes the status subcommand on the prepared Elastic Agent binary. // It returns the parsed output and the error from the execution. Keep in mind // the agent exits with status 1 if it's unhealthy, but it still outputs the -// status successfully. Therefore, a non-empty AgentStatusOutput is valid -// regardless of the error. An empty AgentStatusOutput and non nil error -// means the output could not be parsed. Use AgentStatusOutput.IsZero() to -// determine if the returned AgentStatusOutput is empty or not. +// status successfully. An empty AgentStatusOutput and non nil error +// means the output could not be parsed. +// As long as we get some output, we don't return any error. // It should work with any 8.6+ agent func (f *Fixture) ExecStatus(ctx context.Context, opts ...process.CmdOption) (AgentStatusOutput, error) { out, _ := f.Exec(ctx, []string{"status", "--output", "json"}, opts...) @@ -721,6 +720,8 @@ func (f *Fixture) ExecStatus(ctx context.Context, opts ...process.CmdOption) (Ag if uerr := json.Unmarshal(out, &status); uerr != nil { return AgentStatusOutput{}, fmt.Errorf("could not unmarshal agent status output: %w", uerr) + } else if status.IsZero() { + return status, fmt.Errorf("agent status returned empty output") } return status, nil From 44bc4bc1f1a2d6b926b3fcb2d2a249d565dbc77f Mon Sep 17 00:00:00 2001 From: Vihas Makwana Date: Mon, 29 Jul 2024 23:05:33 +0530 Subject: [PATCH 05/11] fix: reword --- pkg/testing/fixture.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pkg/testing/fixture.go b/pkg/testing/fixture.go index cc592167745..0ac01a8d241 100644 --- a/pkg/testing/fixture.go +++ b/pkg/testing/fixture.go @@ -721,7 +721,7 @@ func (f *Fixture) ExecStatus(ctx context.Context, opts ...process.CmdOption) (Ag return AgentStatusOutput{}, fmt.Errorf("could not unmarshal agent status output: %w", uerr) } else if status.IsZero() { - return status, fmt.Errorf("agent status returned empty output") + return status, fmt.Errorf("agent status output is empty") } return status, nil From 960b4c9f391dcd95d404fac563e628c7a4c86c14 Mon Sep 17 00:00:00 2001 From: Vihas Makwana Date: Tue, 30 Jul 2024 01:52:11 +0530 Subject: [PATCH 06/11] fix: nit --- pkg/testing/fixture.go | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pkg/testing/fixture.go b/pkg/testing/fixture.go index 0ac01a8d241..5fc58ccbe0b 100644 --- a/pkg/testing/fixture.go +++ b/pkg/testing/fixture.go @@ -714,14 +714,14 @@ func (e *ExecErr) Unwrap() error { // As long as we get some output, we don't return any error. // It should work with any 8.6+ agent func (f *Fixture) ExecStatus(ctx context.Context, opts ...process.CmdOption) (AgentStatusOutput, error) { - out, _ := f.Exec(ctx, []string{"status", "--output", "json"}, opts...) + out, err := f.Exec(ctx, []string{"status", "--output", "json"}, opts...) status := AgentStatusOutput{} if uerr := json.Unmarshal(out, &status); uerr != nil { return AgentStatusOutput{}, fmt.Errorf("could not unmarshal agent status output: %w", uerr) } else if status.IsZero() { - return status, fmt.Errorf("agent status output is empty") + return status, fmt.Errorf("agent status output is empty: %w", err) } return status, nil From e0ad5bb65d751594342002838babd0f8ff33babd Mon Sep 17 00:00:00 2001 From: Vihas Makwana Date: Tue, 30 Jul 2024 01:53:17 +0530 Subject: [PATCH 07/11] fix: empty line --- pkg/testing/fixture.go | 1 - 1 file changed, 1 deletion(-) diff --git a/pkg/testing/fixture.go b/pkg/testing/fixture.go index 5fc58ccbe0b..d9131e0f3ae 100644 --- a/pkg/testing/fixture.go +++ b/pkg/testing/fixture.go @@ -715,7 +715,6 @@ func (e *ExecErr) Unwrap() error { // It should work with any 8.6+ agent func (f *Fixture) ExecStatus(ctx context.Context, opts ...process.CmdOption) (AgentStatusOutput, error) { out, err := f.Exec(ctx, []string{"status", "--output", "json"}, opts...) - status := AgentStatusOutput{} if uerr := json.Unmarshal(out, &status); uerr != nil { return AgentStatusOutput{}, From 9e77ba4f48301578ea6771813a55fe99a4634dc9 Mon Sep 17 00:00:00 2001 From: Vihas Makwana Date: Tue, 30 Jul 2024 02:23:22 +0530 Subject: [PATCH 08/11] fix: join errors. --- pkg/testing/fixture.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pkg/testing/fixture.go b/pkg/testing/fixture.go index d9131e0f3ae..f821955a8dd 100644 --- a/pkg/testing/fixture.go +++ b/pkg/testing/fixture.go @@ -718,7 +718,7 @@ func (f *Fixture) ExecStatus(ctx context.Context, opts ...process.CmdOption) (Ag status := AgentStatusOutput{} if uerr := json.Unmarshal(out, &status); uerr != nil { return AgentStatusOutput{}, - fmt.Errorf("could not unmarshal agent status output: %w", uerr) + fmt.Errorf("could not unmarshal agent status output: %w", errors.Join(uerr, err)) } else if status.IsZero() { return status, fmt.Errorf("agent status output is empty: %w", err) } From dc392b3c7fc71d564965d5ed7562cbc53a8a90a4 Mon Sep 17 00:00:00 2001 From: Vihas Makwana Date: Tue, 30 Jul 2024 15:19:47 +0530 Subject: [PATCH 09/11] temproray logs to examine CI --- pkg/testing/fixture.go | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/pkg/testing/fixture.go b/pkg/testing/fixture.go index f821955a8dd..17baf0a2599 100644 --- a/pkg/testing/fixture.go +++ b/pkg/testing/fixture.go @@ -715,13 +715,17 @@ func (e *ExecErr) Unwrap() error { // It should work with any 8.6+ agent func (f *Fixture) ExecStatus(ctx context.Context, opts ...process.CmdOption) (AgentStatusOutput, error) { out, err := f.Exec(ctx, []string{"status", "--output", "json"}, opts...) + f.t.Logf(">> Got output for status: %v :err: %v", out, err) status := AgentStatusOutput{} if uerr := json.Unmarshal(out, &status); uerr != nil { + f.t.Logf(">> Error %v", errors.Join(uerr, err)) return AgentStatusOutput{}, fmt.Errorf("could not unmarshal agent status output: %w", errors.Join(uerr, err)) } else if status.IsZero() { + f.t.Logf(">> Marshalled output: %v", status) return status, fmt.Errorf("agent status output is empty: %w", err) } + f.t.Logf(">> Marshalled output: %v", status) return status, nil } From 77ebb131d147c12b206c32c1000246a4f00e4833 Mon Sep 17 00:00:00 2001 From: Vihas Makwana Date: Tue, 30 Jul 2024 16:45:11 +0530 Subject: [PATCH 10/11] Revert "temproray logs to examine CI" remove logs as the test failure has been identified This reverts commit dc392b3c7fc71d564965d5ed7562cbc53a8a90a4. --- pkg/testing/fixture.go | 4 ---- 1 file changed, 4 deletions(-) diff --git a/pkg/testing/fixture.go b/pkg/testing/fixture.go index 17baf0a2599..f821955a8dd 100644 --- a/pkg/testing/fixture.go +++ b/pkg/testing/fixture.go @@ -715,17 +715,13 @@ func (e *ExecErr) Unwrap() error { // It should work with any 8.6+ agent func (f *Fixture) ExecStatus(ctx context.Context, opts ...process.CmdOption) (AgentStatusOutput, error) { out, err := f.Exec(ctx, []string{"status", "--output", "json"}, opts...) - f.t.Logf(">> Got output for status: %v :err: %v", out, err) status := AgentStatusOutput{} if uerr := json.Unmarshal(out, &status); uerr != nil { - f.t.Logf(">> Error %v", errors.Join(uerr, err)) return AgentStatusOutput{}, fmt.Errorf("could not unmarshal agent status output: %w", errors.Join(uerr, err)) } else if status.IsZero() { - f.t.Logf(">> Marshalled output: %v", status) return status, fmt.Errorf("agent status output is empty: %w", err) } - f.t.Logf(">> Marshalled output: %v", status) return status, nil } From 7ddb06ed38222edb39cc4aefdc6fe7799dd2127e Mon Sep 17 00:00:00 2001 From: Vihas Makwana Date: Tue, 30 Jul 2024 16:46:28 +0530 Subject: [PATCH 11/11] fix: update condition --- pkg/testing/fixture.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pkg/testing/fixture.go b/pkg/testing/fixture.go index f821955a8dd..c7338cc63d1 100644 --- a/pkg/testing/fixture.go +++ b/pkg/testing/fixture.go @@ -1237,7 +1237,7 @@ type AgentStatusOutput struct { } func (aso *AgentStatusOutput) IsZero() bool { - return aso.Info.ID == "" + return aso.Info.ID == "" && aso.Message == "" && aso.Info.Version == "" } type AgentInspectOutput struct {