Skip to content

Commit

Permalink
Call fleet-server audit/unenroll endpoint on uninstall
Browse files Browse the repository at this point in the history
Uninstalling a fleet-managed elastic-agent instance will now do a
best-effort attempt to notify fleet-server of the agent removal so the
agent may not appear as offiline.
  • Loading branch information
michel-laterman committed Aug 14, 2024
1 parent ef69b58 commit bb4d789
Show file tree
Hide file tree
Showing 5 changed files with 312 additions and 0 deletions.
Original file line number Diff line number Diff line change
@@ -0,0 +1,35 @@
# Kind can be one of:
# - breaking-change: a change to previously-documented behavior
# - deprecation: functionality that is being removed in a later release
# - bug-fix: fixes a problem in a previous version
# - enhancement: extends functionality but does not break or fix existing behavior
# - feature: new functionality
# - known-issue: problems that we are aware of in a given version
# - security: impacts on the security of a product or a user’s deployment.
# - upgrade: important information for someone upgrading from a prior version
# - other: does not fit into any of the other categories
kind: feature

# Change summary; a 80ish characters long description of the change.
summary: Call fleet-server audit/unenroll endpoint on uninstall

# Long description; in case the summary is not enough to describe the change
# this field accommodate a description without length limits.
# NOTE: This field will be rendered only for breaking-change and known-issue kinds at the moment.
description: |
Uninstalling a fleet-managed elastic-agent instance will now do a
best-effort attempt to notify fleet-server of the agent removal so the
agent may not appear as offiline.
# Affected component; a word indicating the component this changeset affects.
component:

# PR URL; optional; the PR number that added the changeset.
# If not present is automatically filled by the tooling finding the PR where this changelog fragment has been added.
# NOTE: the tooling supports backports, so it's able to fill the original PR number instead of the backport PR number.
# Please provide it if you are adding a fragment for a different PR.
pr: https://github.com/elastic/elastic-agent/pull/5302

# Issue URL; optional; the GitHub issue related to this changeset (either closes or is part of).
# If not present is automatically filled by the tooling with the issue linked to the PR number.
issue: https://github.com/elastic/elastic-agent/issues/484
79 changes: 79 additions & 0 deletions internal/pkg/agent/install/uninstall.go
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@ import (
"errors"
"fmt"
"io/fs"
"net/http"
"os"
"path/filepath"
"runtime"
Expand All @@ -19,15 +20,19 @@ import (
"github.com/schollz/progressbar/v3"

"github.com/elastic/elastic-agent-libs/logp"
"github.com/elastic/elastic-agent/internal/pkg/agent/application/info"
"github.com/elastic/elastic-agent/internal/pkg/agent/application/paths"
"github.com/elastic/elastic-agent/internal/pkg/agent/application/secret"
"github.com/elastic/elastic-agent/internal/pkg/agent/configuration"
aerrors "github.com/elastic/elastic-agent/internal/pkg/agent/errors"
"github.com/elastic/elastic-agent/internal/pkg/agent/transpiler"
"github.com/elastic/elastic-agent/internal/pkg/agent/vars"
"github.com/elastic/elastic-agent/internal/pkg/agent/vault"
"github.com/elastic/elastic-agent/internal/pkg/capabilities"
"github.com/elastic/elastic-agent/internal/pkg/config"
"github.com/elastic/elastic-agent/internal/pkg/config/operations"
"github.com/elastic/elastic-agent/internal/pkg/fleetapi"
fleetclient "github.com/elastic/elastic-agent/internal/pkg/fleetapi/client"
"github.com/elastic/elastic-agent/pkg/component"
comprt "github.com/elastic/elastic-agent/pkg/component/runtime"
"github.com/elastic/elastic-agent/pkg/core/logger"
Expand Down Expand Up @@ -100,6 +105,27 @@ func Uninstall(cfgFile, topPath, uninstallToken string, log *logp.Logger, pt *pr
}
}

// will only notify fleet of the uninstall command if it can gather config and agentinfo, and is not a stand-alone install
notifyFleet := false
var ai *info.AgentInfo
c, err := operations.LoadFullAgentConfig(ctx, log, cfgFile, false, unprivileged)
if err != nil {
pt.Describe(fmt.Sprintf("unable to read agent config to deterimine if notifiying fleet-server is needed: %v", err))
}
cfg, err := configuration.NewFromConfig(c)
if err != nil {
pt.Describe(fmt.Sprintf("notify fleet-server: unable to transform *config.Config to *configuration.Configuration: %v", err))
}

if cfg != nil && !configuration.IsStandalone(cfg.Fleet) {
ai, err = info.NewAgentInfo(ctx, false)
if err != nil {
pt.Describe(fmt.Sprintf("unable to read ageint info, fleet-server will not be notified of uninstall: %v", err))
} else {
notifyFleet = true
}
}

// remove existing directory
pt.Describe("Removing install directory")
err = RemovePath(topPath)
Expand All @@ -112,9 +138,62 @@ func Uninstall(cfgFile, topPath, uninstallToken string, log *logp.Logger, pt *pr
}
pt.Describe("Removed install directory")

if notifyFleet {
notifyFleetAuditUninstall(ctx, log, pt, cfg, ai)
}

return nil
}

// notifyFleetAuditUninstall will attempt to notify fleet-server of the agent's uninstall.
//
// There are retries for the attempt after a 10s wait, but it is a best-effort approach.
func notifyFleetAuditUninstall(ctx context.Context, log *logp.Logger, pt *progressbar.ProgressBar, cfg *configuration.Configuration, ai *info.AgentInfo) {
pt.Describe("notify fleet-server of uninstall")
client, err := fleetclient.NewAuthWithConfig(log, cfg.Fleet.AccessAPIKey, cfg.Fleet.Client)
if err != nil {
pt.Describe(fmt.Sprintf("notify fleet-server: unable to create fleetapi client: %v", err))
return
}
cmd := fleetapi.NewAuditUnenrollCmd(ai, client)
req := &fleetapi.AuditUnenrollRequest{
Reason: fleetapi.ReasonUninstall,
Timestamp: time.Now().UTC(),
}
timer := time.NewTimer(0)
for i := 0; i < 10; i++ {
select {
case <-ctx.Done():
return
case <-timer.C:
}
status, err := cmd.Execute(ctx, req)
if err != nil {
var reqErr *fleetapi.ReqError
// Do not retry if it was a context error, or an error with the request.
if errors.Is(err, context.Canceled) || errors.As(err, &reqErr) {
pt.Describe(fmt.Sprintf("notify fleet-server encountered unretryable error: %v", err))
return
}
pt.Describe("notify fleet-server network error, retry in 10s.")
timer.Reset(time.Second * 10)
continue
}
switch status {
case http.StatusOK:
pt.Describe("notify fleet-server success")
return
case http.StatusBadRequest, http.StatusUnauthorized, http.StatusConflict:
pt.Describe(fmt.Sprintf("notify fleet-server failed with status code %d. no retries.", status))
return
default:
pt.Describe(fmt.Sprintf("notify fleet-server failed with status code %d. retry in 10s", status))
timer.Reset(time.Second * 10)
}
}
pt.Describe("notify fleet-server failed.")
}

// EnsureStoppedService ensures that the installed service is stopped.
func EnsureStoppedService(topPath string, pt *progressbar.ProgressBar) (service.Status, error) {
status, _ := StatusService(topPath)
Expand Down
92 changes: 92 additions & 0 deletions internal/pkg/fleetapi/audit_unenroll_cmd.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,92 @@
// Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
// or more contributor license agreements. Licensed under the Elastic License;
// you may not use this file except in compliance with the Elastic License.

package fleetapi

import (
"bytes"
"context"
"encoding/json"
"fmt"
"net/http"
"time"

"github.com/elastic/elastic-agent/internal/pkg/agent/errors"
"github.com/elastic/elastic-agent/internal/pkg/fleetapi/client"
)

// ReqError is an error wrapper to wrap errors with a request.
// These can include validation or marshalling errors that should not be retried.
type ReqError struct {
err error
}

func (e *ReqError) Error() string {
return e.err.Error()
}

func (e *ReqError) Unwrap() error {
return e.err
}

const auditUnenrollPath = "/api/fleet/agents/%s/audit/unenroll"

type Reason string

const (
ReasonUninstall Reason = "uninstall"
)

type AuditUnenrollRequest struct {
Reason Reason `json:"reason"`
Timestamp time.Time `json:"timestamp"`
}

// Validate will ensure the timestamp is set and the reason is an allowed value.
func (e *AuditUnenrollRequest) Validate() error {
if e.Timestamp.IsZero() {
return &ReqError{fmt.Errorf("request timestamp not set")}
}
switch e.Reason {
case ReasonUninstall:
default:
return &ReqError{fmt.Errorf("unsupported reason: %s", e.Reason)}
}
return nil
}

type AuditUnenrollCmd struct {
client client.Sender
info agentInfo
}

func NewAuditUnenrollCmd(info agentInfo, client client.Sender) *AuditUnenrollCmd {
return &AuditUnenrollCmd{
client: client,
info: info,
}
}

// Execute sends the request to fleet-sever and returns the status code response.
//
// the caller must determine if the call succeeded or if it should be retried.
func (e *AuditUnenrollCmd) Execute(ctx context.Context, r *AuditUnenrollRequest) (int, error) {
if err := r.Validate(); err != nil {
return 0, err
}
p, err := json.Marshal(r)
if err != nil {
return 0, &ReqError{err}
}
path := fmt.Sprintf(auditUnenrollPath, e.info.AgentID())
resp, err := e.client.Send(ctx, http.MethodPost, path, nil, nil, bytes.NewBuffer(p))
if err != nil {
return 0, errors.New(err,
"fail to notify audit/unenroll on fleet-server",
errors.TypeNetwork,
errors.M(errors.MetaKeyURI, path))
}
resp.Body.Close()
return resp.StatusCode, nil
}
50 changes: 50 additions & 0 deletions internal/pkg/fleetapi/audit_unenroll_cmd_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,50 @@
// Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
// or more contributor license agreements. Licensed under the Elastic License;
// you may not use this file except in compliance with the Elastic License.

package fleetapi

import (
"context"
"encoding/json"
"fmt"
"net/http"
"testing"
"time"

"github.com/elastic/elastic-agent/internal/pkg/fleetapi/client"
"github.com/stretchr/testify/require"
)

func Test_AuditUnenrollCmd_Execute(t *testing.T) {
const withAPIKey = "secret"
agentInfo := &agentinfo{}

t.Run("test audit/unenroll roundtrip", withServerWithAuthClient(
func(t *testing.T) *http.ServeMux {
mux := http.NewServeMux()
path := fmt.Sprintf(auditUnenrollPath, agentInfo.AgentID())
mux.HandleFunc(path, authHandler(func(w http.ResponseWriter, r *http.Request) {
w.WriteHeader(http.StatusOK)

decoder := json.NewDecoder(r.Body)
defer r.Body.Close()
request := &AuditUnenrollRequest{}
err := decoder.Decode(&request)
require.NoError(t, err)
require.Equal(t, ReasonUninstall, request.Reason)
}, withAPIKey))
return mux
}, withAPIKey,
func(t *testing.T, client client.Sender) {
cmd := NewAuditUnenrollCmd(agentInfo, client)
request := &AuditUnenrollRequest{
Reason: ReasonUninstall,
Timestamp: time.Now(),
}
status, err := cmd.Execute(context.Background(), request)
require.NoError(t, err)
require.Equal(t, http.StatusOK, status)
},
))
}
56 changes: 56 additions & 0 deletions testing/integration/install_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -8,8 +8,11 @@ package integration

import (
"context"
"encoding/json"
"fmt"
"io"
"math/rand"
"net/http"
"os"
"path/filepath"
"runtime"
Expand Down Expand Up @@ -282,6 +285,59 @@ func testSecondAgentCanInstall(ctx context.Context, fixture *atesting.Fixture, b
}
}

// TestInstallUninstallAudit will test to make sure that a fleet-managed agent can use the audit/unenroll endpoint when uninstalling itself.
func TestInstallUninstallAudit(t *testing.T) {
info := define.Require(t, define.Requirements{
Group: Default,
Stack: &define.Stack{}, // needs a fleet-server.
Sudo: true,
Local: false,
})

ctx, cancel := testcontext.WithDeadline(t, context.Background(), time.Now().Add(10*time.Minute))
defer cancel()

fixture, err := define.NewFixtureFromLocalBuild(t, define.Version())
require.NoError(t, err)

err = fixture.Prepare(ctx)
require.NoError(t, err)
// Run `elastic-agent install`. We use `--force` to prevent interactive
// execution.
opts := &atesting.InstallOpts{Force: true}
out, err := fixture.Install(ctx, opts)
if err != nil {
t.Logf("install output: %s", out)
require.NoError(t, err)
}

// Check that Agent was installed in default base path
require.NoError(t, installtest.CheckSuccess(ctx, fixture, opts.BasePath, &installtest.CheckOpts{Privileged: opts.Privileged}))

agentID, err := getAgentID(ctx, fixture)
require.NoError(t, err, "error getting the agent ID")

out, err = fixture.Uninstall(ctx, &atesting.UninstallOpts{Force: true})
if err != nil {
t.Logf("uninstall output: %s", out)
require.NoErrorf(t, err)
}

response, err := info.kibanaClient.SendWithContext(ctx, http.MethodGet, "/api/fleet/agents/"+agentID, nil, nil, nil)
require.NoError(t, err)
defer response.Body.Close()
p, err := io.ReadAll(response.Body)
require.NoError(t, err)
var res struct {
Item struct {
AuditUnenrollReason string `json:"audit_unenroll_reason"`
} `json:"item"`
}
err = json.Unmarshal(p, &res)
require.NoError(t, err)
require.Equal(t, "uninstall", res.Item.AuditUnenrollReason)
}

// TestRepeatedInstallUninstall will install then uninstall the agent
// repeatedly. This test exists because of a number of race
// conditions that have occurred in the uninstall process. Current
Expand Down

0 comments on commit bb4d789

Please sign in to comment.