Skip to content
This repository has been archived by the owner on Jun 21, 2022. It is now read-only.

PMM-10078 Extract portal client, add dev env variables for portal address overwriting #1136

Open
wants to merge 16 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from 11 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 3 additions & 1 deletion docker-compose.yml
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,9 @@ services:
- AWS_SECRET_KEY=${AWS_SECRET_KEY}
- ENABLE_ALERTING=1
- ENABLE_BACKUP_MANAGEMENT=1
# - PERCONA_TEST_SAAS_HOST=check.localhost
# - PERCONA_TEST_PLATFORM_ADDRESS=https://check.localhost
# - PERCONA_TEST_PLATFORM_INSECURE=1
# - PERCONA_TEST_PLATFORM_PUBLIC_KEY=<public key>
Copy link
Contributor

@ritbl ritbl Jun 9, 2022

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

nit: it is not clear if it is a PATH to a key, how about renaming to something like PERCONA_TEST_PLATFORM_PUBLIC_KEY_PATH

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It's not a path, it's key itself

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

can you show example of how it is supposed to be used? the key is multiline. the user should cat it can send it?
I think, it can be a potential source of issues. ie when user set value in terminal (escaping and new line issues).

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It's minisign public key, it's one line, here is example:

devChecksPublicKey = "RWTg+ZmCCjt7O8eWeAmTLAqW+1ozUbpRSKSwNTmO+exlS5KEIPYWuYdX"

This variable should not be used by anyone outside Percona. We print warnings in logs each time PERCONA_TEST_* variable used. It's completely for dev/qa needs.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Imagine user creates .env, and sets env var this way:
- PERCONA_TEST_PLATFORM_PUBLIC_KEY=${PERCONA_TEST_PLATFORM_PUBLIC_KEY}

how .env should look like?

image

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Here is how it will look like for our dev environment:

PERCONA_TEST_PLATFORM_PUBLIC_KEY=RWTg+ZmCCjt7O8eWeAmTLAqW+1ozUbpRSKSwNTmO+exlS5KEIPYWuYdX

Copy link
Contributor

@ritbl ritbl Jun 9, 2022

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It's minisign public key, it's one line, here is example:

oh, it is very confusing, because it can be though as https cert
can you pls rename so that it will clear?

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

PUBLIC_KEY part is confusing, can we rename it according to it's function? ie PERCONA_TEST_PLATFORM_ENCRYPTION_KEY

Copy link
Contributor Author

@artemgavrilov artemgavrilov Jun 10, 2022

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

In fact it's not a new variable, it was PERCONA_TEST_CHECKS_PUBLIC_KEY for a long time. I renamed it to PERCONA_TEST_PLATFORM_PUBLIC_KEY because now it used not only for checks system but also for alerting.

It's public key for signatures verification, not for encryption/decryption. Maybe PERCONA_TEST_PLATFORM_VERIFICATION_KEY will work better. BTW I will create a poll

# - PERCONA_TEST_TELEMETRY_INTERVAL=10s
# - PERCONA_TEST_TELEMETRY_RETRY_BACKOFF=10s
# - PMM_DEBUG=1
Expand Down
16 changes: 12 additions & 4 deletions main.go
Original file line number Diff line number Diff line change
Expand Up @@ -93,6 +93,7 @@ import (
"github.com/percona/pmm-managed/utils/clean"
"github.com/percona/pmm-managed/utils/interceptors"
"github.com/percona/pmm-managed/utils/logger"
platformClient "github.com/percona/pmm-managed/utils/platform"
)

const (
Expand Down Expand Up @@ -130,6 +131,7 @@ func addLogsHandler(mux *http.ServeMux, logs *supervisord.Logs) {
type gRPCServerDeps struct {
db *reform.DB
vmdb *victoriametrics.Service
portalClient *platformClient.Client
server *server.Server
agentsRegistry *agents.Registry
handler *agents.Handler
Expand Down Expand Up @@ -224,7 +226,7 @@ func runGRPCServer(ctx context.Context, deps *gRPCServerDeps) {
dbaasv1beta1.RegisterLogsAPIServer(gRPCServer, managementdbaas.NewLogsService(deps.db, deps.dbaasClient))
dbaasv1beta1.RegisterComponentsServer(gRPCServer, managementdbaas.NewComponentsService(deps.db, deps.dbaasClient, deps.versionServiceClient))

platformService, err := platform.New(deps.db, deps.supervisord, deps.checksService, deps.grafanaClient, deps.config.Services.Platform)
platformService, err := platform.New(deps.portalClient, deps.db, deps.supervisord, deps.checksService, deps.grafanaClient)
if err == nil {
platformpb.RegisterPlatformServer(gRPCServer, platformService)
} else {
Expand Down Expand Up @@ -689,7 +691,12 @@ func main() {
logs := supervisord.NewLogs(version.FullInfo(), pmmUpdateCheck)
supervisord := supervisord.New(*supervisordConfigDirF, pmmUpdateCheck, vmParams)

telemetry, err := telemetry.NewService(db, version.Version, cfg.Config.Services.Telemetry)
portalClient, err := platformClient.NewClient(db)
if err != nil {
l.Fatalf("Could not create Percona Portal client: %s", err)
}

telemetry, err := telemetry.NewService(db, portalClient, version.Version, cfg.Config.Services.Telemetry)
if err != nil {
l.Fatalf("Could not create telemetry service: %s", err)
}
Expand All @@ -704,15 +711,15 @@ func main() {

actionsService := agents.NewActionsService(agentsRegistry)

checksService, err := checks.New(actionsService, alertManager, db, *victoriaMetricsURLF)
checksService, err := checks.New(db, portalClient, actionsService, alertManager, *victoriaMetricsURLF)
if err != nil {
l.Fatalf("Could not create checks service: %s", err)
}

prom.MustRegister(checksService)

// Integrated alerts services
templatesService, err := ia.NewTemplatesService(db)
templatesService, err := ia.NewTemplatesService(db, portalClient)
if err != nil {
l.Fatalf("Could not create templates service: %s", err)
}
Expand Down Expand Up @@ -905,6 +912,7 @@ func main() {
&gRPCServerDeps{
db: db,
vmdb: vmdb,
portalClient: portalClient,
server: server,
agentsRegistry: agentsRegistry,
handler: agentsHandler,
Expand Down
61 changes: 9 additions & 52 deletions services/checks/checks.go
Original file line number Diff line number Diff line change
Expand Up @@ -21,9 +21,7 @@ import (
"bytes"
"context"
"encoding/json"
"fmt"
"io/ioutil"
"net/http"
"os"
"os/exec"
"strconv"
Expand All @@ -33,7 +31,6 @@ import (
"text/template"
"time"

api "github.com/percona-platform/saas/gen/check/retrieval"
"github.com/percona-platform/saas/pkg/check"
"github.com/percona-platform/saas/pkg/common"
"github.com/percona/pmm/api/agentpb"
Expand All @@ -50,9 +47,7 @@ import (

"github.com/percona/pmm-managed/models"
"github.com/percona/pmm-managed/services"
"github.com/percona/pmm-managed/utils/envvars"
"github.com/percona/pmm-managed/utils/saasreq"
"github.com/percona/pmm-managed/utils/signatures"
"github.com/percona/pmm-managed/utils/platform"
)

const (
Expand All @@ -64,7 +59,7 @@ const (
envDisableStartDelay = "PERCONA_TEST_CHECKS_DISABLE_START_DELAY"

checkExecutionTimeout = 5 * time.Minute // limits execution time for every single check
platformRequestTimeout = 2 * time.Minute // time limit to get checks list from the platform
portalRequestTimeout = 2 * time.Minute // time limit to get checks list from the portal
resultAwaitTimeout = 20 * time.Second // should be greater than agents.defaultQueryActionTimeout
scriptExecutionTimeout = 5 * time.Second // time limit for running pmm-managed-starlark
resultCheckInterval = time.Second
Expand All @@ -91,15 +86,14 @@ var (

// Service is responsible for interactions with Percona Check service.
type Service struct {
portalClient *platform.Client
agentsRegistry agentsRegistry
alertmanagerService alertmanagerService
db *reform.DB
alertsRegistry *registry
vmClient v1.API

l *logrus.Entry
host string
publicKeys []string
startDelay time.Duration
resendInterval time.Duration
localChecksFile string // For testing
Expand All @@ -117,7 +111,7 @@ type Service struct {
}

// New returns Service with given PMM version.
func New(agentsRegistry agentsRegistry, alertmanagerService alertmanagerService, db *reform.DB, VMAddress string) (*Service, error) {
func New(db *reform.DB, portalClient *platform.Client, agentsRegistry agentsRegistry, alertmanagerService alertmanagerService, VMAddress string) (*Service, error) {
l := logrus.WithField("component", "checks")

resendInterval := defaultResendInterval
Expand All @@ -126,25 +120,20 @@ func New(agentsRegistry agentsRegistry, alertmanagerService alertmanagerService,
resendInterval = d
}

host, err := envvars.GetSAASHost()
if err != nil {
return nil, err
}

vmClient, err := metrics.NewClient(metrics.Config{Address: VMAddress})
if err != nil {
return nil, err
}

s := &Service{
db: db,
agentsRegistry: agentsRegistry,
alertmanagerService: alertmanagerService,
db: db,
alertsRegistry: newRegistry(resolveTimeoutFactor * resendInterval),
vmClient: v1.NewAPI(vmClient),

l: l,
host: host,
portalClient: portalClient,
startDelay: defaultStartDelay,
resendInterval: resendInterval,
localChecksFile: os.Getenv(envCheckFile),
Expand All @@ -164,10 +153,6 @@ func New(agentsRegistry agentsRegistry, alertmanagerService alertmanagerService,
}, []string{"service_type", "check_type"}),
}

if k := envvars.GetPublicKeys(); k != nil {
l.Warnf("Public keys changed to %q.", k)
s.publicKeys = k
}
if d, _ := strconv.ParseBool(os.Getenv(envDisableStartDelay)); d {
l.Warn("Start delay disabled.")
s.startDelay = 0
Expand Down Expand Up @@ -1422,40 +1407,12 @@ func (s *Service) downloadChecks(ctx context.Context) ([]check.Check, error) {
return nil, nil
}

s.l.Infof("Downloading checks from %s ...", s.host)

nCtx, cancel := context.WithTimeout(ctx, platformRequestTimeout)
nCtx, cancel := context.WithTimeout(ctx, portalRequestTimeout)
defer cancel()

var accessToken string
if ssoDetails, err := models.GetPerconaSSODetails(nCtx, s.db.Querier); err == nil {
accessToken = ssoDetails.AccessToken.AccessToken
}

endpoint := fmt.Sprintf("https://%s/v1/check/GetAllChecks", s.host)
bodyBytes, err := saasreq.MakeRequest(nCtx, http.MethodPost, endpoint, accessToken, nil,
&saasreq.SaasRequestOptions{})
checks, err := s.portalClient.GetChecks(nCtx)
if err != nil {
return nil, errors.Wrap(err, "failed to dial")
}

var resp *api.GetAllChecksResponse
if err := json.Unmarshal(bodyBytes, &resp); err != nil {
return nil, err
}

if err = signatures.Verify(s.l, resp.File, resp.Signatures, s.publicKeys); err != nil {
return nil, err
}

// be liberal about files from SaaS for smooth transition to future versions
params := &check.ParseParams{
DisallowUnknownFields: false,
DisallowInvalidChecks: false,
}
checks, err := check.Parse(strings.NewReader(resp.File), params)
if err != nil {
return nil, err
return nil, errors.WithStack(err)
}

return checks, nil
Expand Down
Loading