Skip to content

Commit

Permalink
Fix TestLogIngestionFleetManaged, TestDebLogIngestFleetManaged (#5375)
Browse files Browse the repository at this point in the history
* [integration tests] Keep work directory if test fails

This commit enables the work directory used by the integration tests
framework to be kept in the filesystem if the test fails. The full
path of the test directory is printed when the test fails.

* Update pkg/testing/fixture.go

Co-authored-by: Paolo Chilà <paolo.chila@elastic.co>

* Update pkg/testing/fixture.go

Co-authored-by: Paolo Chilà <paolo.chila@elastic.co>

* Remove log in error branch

* [Integration Tests] Generate namespace based on UUIDv4

The namespace generated by the integration tests framework was not
unique among different tests and test runs, so sometimes collisions
would occurs causing some tests to be flaky.

* Add debug logs

* run mage fmt

* Fix TestDebLogIngestFleetManaged

- Remove debug logs
- Make the deb respect the AGENT_KEEP_INSTALLED env var
- Add errors that only happen on deb to allow list

* Improve logs and test error message

---------

Co-authored-by: Paolo Chilà <paolo.chila@elastic.co>
Co-authored-by: Julien Lind <julien.lind@elastic.co>
(cherry picked from commit 6695324)
  • Loading branch information
belimawr authored and mergify[bot] committed Sep 3, 2024
1 parent ecab0c3 commit 00d5d84
Show file tree
Hide file tree
Showing 7 changed files with 49 additions and 20 deletions.
4 changes: 2 additions & 2 deletions internal/pkg/agent/application/upgrade/upgrade.go
Original file line number Diff line number Diff line change
Expand Up @@ -353,14 +353,14 @@ func waitForWatcherWithTimeoutCreationFunc(ctx context.Context, log *logger.Logg
return fmt.Errorf("error starting update marker watcher: %w", err)
}

log.Info("waiting up to %s for upgrade watcher to set %s state in upgrade marker", waitTime, details.StateWatching)
log.Infof("waiting up to %s for upgrade watcher to set %s state in upgrade marker", waitTime, details.StateWatching)

for {
select {
case updMarker := <-markerWatcher.Watch():
if updMarker.Details != nil && updMarker.Details.State == details.StateWatching {
// watcher started and it is watching, all good
log.Info("upgrade watcher set %s state in upgrade marker: exiting wait loop", details.StateWatching)
log.Infof("upgrade watcher set %s state in upgrade marker: exiting wait loop", details.StateWatching)
return nil
}

Expand Down
19 changes: 7 additions & 12 deletions pkg/testing/define/define.go
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,8 @@ import (
"sync"
"testing"

"github.com/gofrs/uuid/v5"

"github.com/elastic/elastic-agent-libs/kibana"
"github.com/elastic/go-elasticsearch/v8"
"github.com/elastic/go-sysinfo"
Expand Down Expand Up @@ -199,28 +201,21 @@ func getOSInfo() (*types.OSInfo, error) {
// getNamespace is a general namespace that the test can use that will ensure that it
// is unique and won't collide with other tests (even the same test from a different batch).
//
// this function uses a sha256 of the prefix, package and test name, to ensure that the
// This function uses a sha256 of an UUIDv4 to ensure that the
// length of the namespace is not over the 100 byte limit from Fleet
// see: https://www.elastic.co/guide/en/fleet/current/data-streams.html#data-streams-naming-scheme
func getNamespace(t *testing.T, local bool) (string, error) {
prefix := os.Getenv("TEST_DEFINE_PREFIX")
if prefix == "" {
if local {
prefix = "local"
}
if prefix == "" {
return "", errors.New("TEST_DEFINE_PREFIX must be defined by the test runner")
}
nsUUID, err := uuid.NewV4()
if err != nil {
return "", fmt.Errorf("cannot generate UUID V4: %w", err)
}
name := fmt.Sprintf("%s-%s", prefix, t.Name())
hasher := sha256.New()
hasher.Write([]byte(name))
hasher.Write([]byte(nsUUID.String()))

// Fleet API requires the namespace to be lowercased and not contain
// special characters.
namespace := strings.ToLower(base64.URLEncoding.EncodeToString(hasher.Sum(nil)))
namespace = noSpecialCharsRegexp.ReplaceAllString(namespace, "")

return namespace, nil
}

Expand Down
27 changes: 26 additions & 1 deletion pkg/testing/fixture.go
Original file line number Diff line number Diff line change
Expand Up @@ -206,7 +206,7 @@ func (f *Fixture) Prepare(ctx context.Context, components ...UsableComponent) er
if err != nil {
return err
}
workDir := f.t.TempDir()
workDir := createTempDir(f.t)
finalDir := filepath.Join(workDir, name)
err = ExtractArtifact(f.t, src, workDir)
if err != nil {
Expand Down Expand Up @@ -1196,6 +1196,31 @@ func performConfigure(ctx context.Context, c client.Client, cfg string, timeout
return nil
}

// createTempDir creates a temporary directory that will be
// removed after the tests passes. If the test fails, the
// directory is kept for further investigation.
//
// If the test is run with -v and fails the temporary directory is logged
func createTempDir(t *testing.T) string {
tempDir, err := os.MkdirTemp("", strings.ReplaceAll(t.Name(), "/", "-"))
if err != nil {
t.Fatalf("failed to make temp directory: %s", err)
}

cleanup := func() {
if !t.Failed() {
if err := os.RemoveAll(tempDir); err != nil {
t.Errorf("could not remove temp dir '%s': %s", tempDir, err)
}
} else {
t.Logf("Temporary directory %q preserved for investigation/debugging", tempDir)
}
}
t.Cleanup(cleanup)

return tempDir
}

type AgentStatusOutput struct {
Info struct {
ID string `json:"id"`
Expand Down
7 changes: 7 additions & 0 deletions pkg/testing/fixture_install.go
Original file line number Diff line number Diff line change
Expand Up @@ -416,6 +416,7 @@ func (f *Fixture) installDeb(ctx context.Context, installOpts *InstallOpts, opts

f.t.Cleanup(func() {
f.t.Logf("[test %s] Inside fixture installDeb cleanup function", f.t.Name())

uninstallCtx, uninstallCancel := context.WithTimeout(context.Background(), 5*time.Minute)
defer uninstallCancel()
// stop elastic-agent, non fatal if error, might have been stopped before this.
Expand All @@ -424,6 +425,12 @@ func (f *Fixture) installDeb(ctx context.Context, installOpts *InstallOpts, opts
if err != nil {
f.t.Logf("error systemctl stop elastic-agent: %s, output: %s", err, string(out))
}

if keepInstalledFlag() {
f.t.Logf("skipping uninstall; test failed and AGENT_KEEP_INSTALLED=true")
return
}

// apt-get purge elastic-agent
f.t.Logf("running 'sudo apt-get -y -q purge elastic-agent'")
out, err = exec.CommandContext(uninstallCtx, "sudo", "apt-get", "-y", "-q", "purge", "elastic-agent").CombinedOutput()
Expand Down
1 change: 1 addition & 0 deletions pkg/testing/tools/estools/elasticsearch.go
Original file line number Diff line number Diff line change
Expand Up @@ -593,6 +593,7 @@ func PerformQueryForRawQuery(ctx context.Context, queryRaw map[string]interface{
es.Search.WithContext(ctx),
es.Search.WithSize(300),
)

if err != nil {
return Documents{}, fmt.Errorf("error performing ES search: %w", err)
}
Expand Down
9 changes: 5 additions & 4 deletions testing/integration/logs_ingestion_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -160,14 +160,15 @@ func testMonitoringLogsAreShipped(
return estools.CheckForErrorsInLogs(ctx, info.ESClient, info.Namespace, []string{
// acceptable error messages (include reason)
"Error dialing dial tcp 127.0.0.1:9200: connect: connection refused", // beat is running default config before its config gets updated
"Global configuration artifact is not available", // Endpoint: failed to load user artifact due to connectivity issues
"Failed to apply initial policy from on disk configuration",
"Failed to connect to backoff(elasticsearch(http://127.0.0.1:9200)): Get \"http://127.0.0.1:9200\": dial tcp 127.0.0.1:9200: connect: connection refused", // Deb test
"Failed to download artifact",
"Failed to initialize artifact",
"Failed to apply initial policy from on disk configuration",
"elastic-agent-client error: rpc error: code = Canceled desc = context canceled", // can happen on restart
"Global configuration artifact is not available", // Endpoint: failed to load user artifact due to connectivity issues
"add_cloud_metadata: received error failed fetching EC2 Identity Document", // okay for the cloud metadata to not work
"add_cloud_metadata: received error failed requesting openstack metadata", // okay for the cloud metadata to not work
"add_cloud_metadata: received error failed with http status code 404", // okay for the cloud metadata to not work
"add_cloud_metadata: received error failed fetching EC2 Identity Document", // okay for the cloud metadata to not work
"elastic-agent-client error: rpc error: code = Canceled desc = context canceled", // can happen on restart
"failed to invoke rollback watcher: failed to start Upgrade Watcher", // on debian this happens probably need to fix.
"falling back to IMDSv1: operation error ec2imds: getToken", // okay for the cloud metadata to not work
})
Expand Down
2 changes: 1 addition & 1 deletion testing/integration/upgrade_rollback_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -135,7 +135,7 @@ inputs:
state, err := client.State(ctx)
require.NoError(t, err)

require.NotNil(t, state.UpgradeDetails)
require.NotNil(t, state.UpgradeDetails, "upgrade details in the state cannot be nil")
require.Equal(t, details.StateRollback, details.State(state.UpgradeDetails.State))
}

Expand Down

0 comments on commit 00d5d84

Please sign in to comment.