From bb4d78992a0c7cb0c11815e9adbbc53feed56015 Mon Sep 17 00:00:00 2001 From: michel-laterman Date: Wed, 14 Aug 2024 15:35:06 -0700 Subject: [PATCH] Call fleet-server audit/unenroll endpoint on uninstall Uninstalling a fleet-managed elastic-agent instance will now do a best-effort attempt to notify fleet-server of the agent removal so the agent may not appear as offiline. --- ...-audit-unenroll-endpoint-on-uninstall.yaml | 35 +++++++ internal/pkg/agent/install/uninstall.go | 79 ++++++++++++++++ internal/pkg/fleetapi/audit_unenroll_cmd.go | 92 +++++++++++++++++++ .../pkg/fleetapi/audit_unenroll_cmd_test.go | 50 ++++++++++ testing/integration/install_test.go | 56 +++++++++++ 5 files changed, 312 insertions(+) create mode 100644 changelog/fragments/1723675050-Call-fleet-server-audit-unenroll-endpoint-on-uninstall.yaml create mode 100644 internal/pkg/fleetapi/audit_unenroll_cmd.go create mode 100644 internal/pkg/fleetapi/audit_unenroll_cmd_test.go diff --git a/changelog/fragments/1723675050-Call-fleet-server-audit-unenroll-endpoint-on-uninstall.yaml b/changelog/fragments/1723675050-Call-fleet-server-audit-unenroll-endpoint-on-uninstall.yaml new file mode 100644 index 00000000000..575027c6187 --- /dev/null +++ b/changelog/fragments/1723675050-Call-fleet-server-audit-unenroll-endpoint-on-uninstall.yaml @@ -0,0 +1,35 @@ +# Kind can be one of: +# - breaking-change: a change to previously-documented behavior +# - deprecation: functionality that is being removed in a later release +# - bug-fix: fixes a problem in a previous version +# - enhancement: extends functionality but does not break or fix existing behavior +# - feature: new functionality +# - known-issue: problems that we are aware of in a given version +# - security: impacts on the security of a product or a user’s deployment. +# - upgrade: important information for someone upgrading from a prior version +# - other: does not fit into any of the other categories +kind: feature + +# Change summary; a 80ish characters long description of the change. +summary: Call fleet-server audit/unenroll endpoint on uninstall + +# Long description; in case the summary is not enough to describe the change +# this field accommodate a description without length limits. +# NOTE: This field will be rendered only for breaking-change and known-issue kinds at the moment. +description: | + Uninstalling a fleet-managed elastic-agent instance will now do a + best-effort attempt to notify fleet-server of the agent removal so the + agent may not appear as offiline. + +# Affected component; a word indicating the component this changeset affects. +component: + +# PR URL; optional; the PR number that added the changeset. +# If not present is automatically filled by the tooling finding the PR where this changelog fragment has been added. +# NOTE: the tooling supports backports, so it's able to fill the original PR number instead of the backport PR number. +# Please provide it if you are adding a fragment for a different PR. +pr: https://github.com/elastic/elastic-agent/pull/5302 + +# Issue URL; optional; the GitHub issue related to this changeset (either closes or is part of). +# If not present is automatically filled by the tooling with the issue linked to the PR number. +issue: https://github.com/elastic/elastic-agent/issues/484 diff --git a/internal/pkg/agent/install/uninstall.go b/internal/pkg/agent/install/uninstall.go index c3deb43ca33..ce9e4a3fe2b 100644 --- a/internal/pkg/agent/install/uninstall.go +++ b/internal/pkg/agent/install/uninstall.go @@ -9,6 +9,7 @@ import ( "errors" "fmt" "io/fs" + "net/http" "os" "path/filepath" "runtime" @@ -19,8 +20,10 @@ import ( "github.com/schollz/progressbar/v3" "github.com/elastic/elastic-agent-libs/logp" + "github.com/elastic/elastic-agent/internal/pkg/agent/application/info" "github.com/elastic/elastic-agent/internal/pkg/agent/application/paths" "github.com/elastic/elastic-agent/internal/pkg/agent/application/secret" + "github.com/elastic/elastic-agent/internal/pkg/agent/configuration" aerrors "github.com/elastic/elastic-agent/internal/pkg/agent/errors" "github.com/elastic/elastic-agent/internal/pkg/agent/transpiler" "github.com/elastic/elastic-agent/internal/pkg/agent/vars" @@ -28,6 +31,8 @@ import ( "github.com/elastic/elastic-agent/internal/pkg/capabilities" "github.com/elastic/elastic-agent/internal/pkg/config" "github.com/elastic/elastic-agent/internal/pkg/config/operations" + "github.com/elastic/elastic-agent/internal/pkg/fleetapi" + fleetclient "github.com/elastic/elastic-agent/internal/pkg/fleetapi/client" "github.com/elastic/elastic-agent/pkg/component" comprt "github.com/elastic/elastic-agent/pkg/component/runtime" "github.com/elastic/elastic-agent/pkg/core/logger" @@ -100,6 +105,27 @@ func Uninstall(cfgFile, topPath, uninstallToken string, log *logp.Logger, pt *pr } } + // will only notify fleet of the uninstall command if it can gather config and agentinfo, and is not a stand-alone install + notifyFleet := false + var ai *info.AgentInfo + c, err := operations.LoadFullAgentConfig(ctx, log, cfgFile, false, unprivileged) + if err != nil { + pt.Describe(fmt.Sprintf("unable to read agent config to deterimine if notifiying fleet-server is needed: %v", err)) + } + cfg, err := configuration.NewFromConfig(c) + if err != nil { + pt.Describe(fmt.Sprintf("notify fleet-server: unable to transform *config.Config to *configuration.Configuration: %v", err)) + } + + if cfg != nil && !configuration.IsStandalone(cfg.Fleet) { + ai, err = info.NewAgentInfo(ctx, false) + if err != nil { + pt.Describe(fmt.Sprintf("unable to read ageint info, fleet-server will not be notified of uninstall: %v", err)) + } else { + notifyFleet = true + } + } + // remove existing directory pt.Describe("Removing install directory") err = RemovePath(topPath) @@ -112,9 +138,62 @@ func Uninstall(cfgFile, topPath, uninstallToken string, log *logp.Logger, pt *pr } pt.Describe("Removed install directory") + if notifyFleet { + notifyFleetAuditUninstall(ctx, log, pt, cfg, ai) + } + return nil } +// notifyFleetAuditUninstall will attempt to notify fleet-server of the agent's uninstall. +// +// There are retries for the attempt after a 10s wait, but it is a best-effort approach. +func notifyFleetAuditUninstall(ctx context.Context, log *logp.Logger, pt *progressbar.ProgressBar, cfg *configuration.Configuration, ai *info.AgentInfo) { + pt.Describe("notify fleet-server of uninstall") + client, err := fleetclient.NewAuthWithConfig(log, cfg.Fleet.AccessAPIKey, cfg.Fleet.Client) + if err != nil { + pt.Describe(fmt.Sprintf("notify fleet-server: unable to create fleetapi client: %v", err)) + return + } + cmd := fleetapi.NewAuditUnenrollCmd(ai, client) + req := &fleetapi.AuditUnenrollRequest{ + Reason: fleetapi.ReasonUninstall, + Timestamp: time.Now().UTC(), + } + timer := time.NewTimer(0) + for i := 0; i < 10; i++ { + select { + case <-ctx.Done(): + return + case <-timer.C: + } + status, err := cmd.Execute(ctx, req) + if err != nil { + var reqErr *fleetapi.ReqError + // Do not retry if it was a context error, or an error with the request. + if errors.Is(err, context.Canceled) || errors.As(err, &reqErr) { + pt.Describe(fmt.Sprintf("notify fleet-server encountered unretryable error: %v", err)) + return + } + pt.Describe("notify fleet-server network error, retry in 10s.") + timer.Reset(time.Second * 10) + continue + } + switch status { + case http.StatusOK: + pt.Describe("notify fleet-server success") + return + case http.StatusBadRequest, http.StatusUnauthorized, http.StatusConflict: + pt.Describe(fmt.Sprintf("notify fleet-server failed with status code %d. no retries.", status)) + return + default: + pt.Describe(fmt.Sprintf("notify fleet-server failed with status code %d. retry in 10s", status)) + timer.Reset(time.Second * 10) + } + } + pt.Describe("notify fleet-server failed.") +} + // EnsureStoppedService ensures that the installed service is stopped. func EnsureStoppedService(topPath string, pt *progressbar.ProgressBar) (service.Status, error) { status, _ := StatusService(topPath) diff --git a/internal/pkg/fleetapi/audit_unenroll_cmd.go b/internal/pkg/fleetapi/audit_unenroll_cmd.go new file mode 100644 index 00000000000..a670b220817 --- /dev/null +++ b/internal/pkg/fleetapi/audit_unenroll_cmd.go @@ -0,0 +1,92 @@ +// Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one +// or more contributor license agreements. Licensed under the Elastic License; +// you may not use this file except in compliance with the Elastic License. + +package fleetapi + +import ( + "bytes" + "context" + "encoding/json" + "fmt" + "net/http" + "time" + + "github.com/elastic/elastic-agent/internal/pkg/agent/errors" + "github.com/elastic/elastic-agent/internal/pkg/fleetapi/client" +) + +// ReqError is an error wrapper to wrap errors with a request. +// These can include validation or marshalling errors that should not be retried. +type ReqError struct { + err error +} + +func (e *ReqError) Error() string { + return e.err.Error() +} + +func (e *ReqError) Unwrap() error { + return e.err +} + +const auditUnenrollPath = "/api/fleet/agents/%s/audit/unenroll" + +type Reason string + +const ( + ReasonUninstall Reason = "uninstall" +) + +type AuditUnenrollRequest struct { + Reason Reason `json:"reason"` + Timestamp time.Time `json:"timestamp"` +} + +// Validate will ensure the timestamp is set and the reason is an allowed value. +func (e *AuditUnenrollRequest) Validate() error { + if e.Timestamp.IsZero() { + return &ReqError{fmt.Errorf("request timestamp not set")} + } + switch e.Reason { + case ReasonUninstall: + default: + return &ReqError{fmt.Errorf("unsupported reason: %s", e.Reason)} + } + return nil +} + +type AuditUnenrollCmd struct { + client client.Sender + info agentInfo +} + +func NewAuditUnenrollCmd(info agentInfo, client client.Sender) *AuditUnenrollCmd { + return &AuditUnenrollCmd{ + client: client, + info: info, + } +} + +// Execute sends the request to fleet-sever and returns the status code response. +// +// the caller must determine if the call succeeded or if it should be retried. +func (e *AuditUnenrollCmd) Execute(ctx context.Context, r *AuditUnenrollRequest) (int, error) { + if err := r.Validate(); err != nil { + return 0, err + } + p, err := json.Marshal(r) + if err != nil { + return 0, &ReqError{err} + } + path := fmt.Sprintf(auditUnenrollPath, e.info.AgentID()) + resp, err := e.client.Send(ctx, http.MethodPost, path, nil, nil, bytes.NewBuffer(p)) + if err != nil { + return 0, errors.New(err, + "fail to notify audit/unenroll on fleet-server", + errors.TypeNetwork, + errors.M(errors.MetaKeyURI, path)) + } + resp.Body.Close() + return resp.StatusCode, nil +} diff --git a/internal/pkg/fleetapi/audit_unenroll_cmd_test.go b/internal/pkg/fleetapi/audit_unenroll_cmd_test.go new file mode 100644 index 00000000000..353d8a9749e --- /dev/null +++ b/internal/pkg/fleetapi/audit_unenroll_cmd_test.go @@ -0,0 +1,50 @@ +// Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one +// or more contributor license agreements. Licensed under the Elastic License; +// you may not use this file except in compliance with the Elastic License. + +package fleetapi + +import ( + "context" + "encoding/json" + "fmt" + "net/http" + "testing" + "time" + + "github.com/elastic/elastic-agent/internal/pkg/fleetapi/client" + "github.com/stretchr/testify/require" +) + +func Test_AuditUnenrollCmd_Execute(t *testing.T) { + const withAPIKey = "secret" + agentInfo := &agentinfo{} + + t.Run("test audit/unenroll roundtrip", withServerWithAuthClient( + func(t *testing.T) *http.ServeMux { + mux := http.NewServeMux() + path := fmt.Sprintf(auditUnenrollPath, agentInfo.AgentID()) + mux.HandleFunc(path, authHandler(func(w http.ResponseWriter, r *http.Request) { + w.WriteHeader(http.StatusOK) + + decoder := json.NewDecoder(r.Body) + defer r.Body.Close() + request := &AuditUnenrollRequest{} + err := decoder.Decode(&request) + require.NoError(t, err) + require.Equal(t, ReasonUninstall, request.Reason) + }, withAPIKey)) + return mux + }, withAPIKey, + func(t *testing.T, client client.Sender) { + cmd := NewAuditUnenrollCmd(agentInfo, client) + request := &AuditUnenrollRequest{ + Reason: ReasonUninstall, + Timestamp: time.Now(), + } + status, err := cmd.Execute(context.Background(), request) + require.NoError(t, err) + require.Equal(t, http.StatusOK, status) + }, + )) +} diff --git a/testing/integration/install_test.go b/testing/integration/install_test.go index f2159752943..4bd18161927 100644 --- a/testing/integration/install_test.go +++ b/testing/integration/install_test.go @@ -8,8 +8,11 @@ package integration import ( "context" + "encoding/json" "fmt" + "io" "math/rand" + "net/http" "os" "path/filepath" "runtime" @@ -282,6 +285,59 @@ func testSecondAgentCanInstall(ctx context.Context, fixture *atesting.Fixture, b } } +// TestInstallUninstallAudit will test to make sure that a fleet-managed agent can use the audit/unenroll endpoint when uninstalling itself. +func TestInstallUninstallAudit(t *testing.T) { + info := define.Require(t, define.Requirements{ + Group: Default, + Stack: &define.Stack{}, // needs a fleet-server. + Sudo: true, + Local: false, + }) + + ctx, cancel := testcontext.WithDeadline(t, context.Background(), time.Now().Add(10*time.Minute)) + defer cancel() + + fixture, err := define.NewFixtureFromLocalBuild(t, define.Version()) + require.NoError(t, err) + + err = fixture.Prepare(ctx) + require.NoError(t, err) + // Run `elastic-agent install`. We use `--force` to prevent interactive + // execution. + opts := &atesting.InstallOpts{Force: true} + out, err := fixture.Install(ctx, opts) + if err != nil { + t.Logf("install output: %s", out) + require.NoError(t, err) + } + + // Check that Agent was installed in default base path + require.NoError(t, installtest.CheckSuccess(ctx, fixture, opts.BasePath, &installtest.CheckOpts{Privileged: opts.Privileged})) + + agentID, err := getAgentID(ctx, fixture) + require.NoError(t, err, "error getting the agent ID") + + out, err = fixture.Uninstall(ctx, &atesting.UninstallOpts{Force: true}) + if err != nil { + t.Logf("uninstall output: %s", out) + require.NoErrorf(t, err) + } + + response, err := info.kibanaClient.SendWithContext(ctx, http.MethodGet, "/api/fleet/agents/"+agentID, nil, nil, nil) + require.NoError(t, err) + defer response.Body.Close() + p, err := io.ReadAll(response.Body) + require.NoError(t, err) + var res struct { + Item struct { + AuditUnenrollReason string `json:"audit_unenroll_reason"` + } `json:"item"` + } + err = json.Unmarshal(p, &res) + require.NoError(t, err) + require.Equal(t, "uninstall", res.Item.AuditUnenrollReason) +} + // TestRepeatedInstallUninstall will install then uninstall the agent // repeatedly. This test exists because of a number of race // conditions that have occurred in the uninstall process. Current