Skip to content

Commit

Permalink
PMM-10078 Extract portal client, add dev env variables for portal add…
Browse files Browse the repository at this point in the history
…ress overwriting (#958)

* PMM-10078 Extract portal client, add dev env variables for portal address overwriting

* PMM-10078 Refactoring

* PMM-10078 Refactoring

* PMM-10078 Refactoring

* Fix DBAAS dependency version

* PMM-10078 Use warns instead of errors for removed test env variables

* PMM-10078 Fix tests

* PMM-10078 Add test env variables to CONTRIBUTING.md
  • Loading branch information
artemgavrilov authored and BupycHuk committed Jul 20, 2022
1 parent 40b4664 commit bc7ea8f
Show file tree
Hide file tree
Showing 21 changed files with 665 additions and 760 deletions.
4 changes: 3 additions & 1 deletion docker-compose.yml
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,9 @@ services:
- AWS_SECRET_KEY=${AWS_SECRET_KEY}
- ENABLE_ALERTING=1
- ENABLE_BACKUP_MANAGEMENT=1
# - PERCONA_TEST_SAAS_HOST=check.localhost
# - PERCONA_TEST_PLATFORM_ADDRESS=https://check.localhost
# - PERCONA_TEST_PLATFORM_INSECURE=1
# - PERCONA_TEST_PLATFORM_PUBLIC_KEY=<public key>
# - PERCONA_TEST_TELEMETRY_INTERVAL=10s
# - PERCONA_TEST_TELEMETRY_RETRY_BACKOFF=10s
# - PMM_DEBUG=1
Expand Down
20 changes: 20 additions & 0 deletions managed/CONTRIBUTING.md
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,26 @@ go test -timeout=30s -p 1 ./...

# Advanced Setup

## Available test environment variables:
| Variable | Description | Default |
|-----------------------------------------|------------------------------------------------------------------------------------------------|------------------------------------------|
| PERCONA_TEST_PMM_CLICKHOUSE_ADDR | Sets Clickhouse address | 127.0.0.1:9000 |
| PERCONA_TEST_PMM_CLICKHOUSE_DATABASE | Sets Clickhouse database | pmm |
| PERCONA_TEST_PMM_CLICKHOUSE_POOL_SIZE | Sets Clickhouse connections pool size | none |
| PERCONA_TEST_PMM_CLICKHOUSE_BLOCK_SIZE | Sets Clickhouse block size | none |
| PERCONA_TEST_STARLARK_ALLOW_RECURSION | Allows recursive functions in checks scripts | false |
| PERCONA_TEST_NICER_API | Enables nicer API with default/zero values in response. | false |
| PERCONA_TEST_VERSION_SERVICE_URL | Sets versions service URL | https://check.percona.com/versions/v1 |
| PERCONA_TEST_CHECKS_FILE | Specifies path to local checks file and disables downlading checks files from Percona Platform | none |
| PERCONA_TEST_CHECKS_RESEND_INTERVAL | Sets how often checks alerts resent to Alertmanager | 2 seconds |
| PERCONA_TEST_CHECKS_DISABLE_START_DELAY | Disables checks service startup delay | false |
| PERCONA_TEST_TELEMETRY_INTERVAL | ## TODO | |
| PERCONA_TEST_TELEMETRY_RETRY_BACKOFF | ## TODO | |
| PERCONA_TEST_DBAAS_KUBECONFIG | ## TODO | |
| PERCONA_TEST_PLATFORM_ADDRESS | Sets Percona Platform address | https://check.percona.com |
| PERCONA_TEST_PLATFORM_INSECURE | Allows insecure TLS connections to Percona Platform | false |
| PERCONA_TEST_PLATFORM_PUBLIC_KEY | Sets Percona Platform public key (Minisign) | set of keys embedded into managed binary |

## Add instances for monitoring
`make env-up` just starts the PMM server but it doesn't setup anything to be monitored. We can use [pmm-admin](https://github.com/percona/pmm-admin) and [pmm-agent](https://github.com/percona/pmm-agent) to add instances to be monitored to pmm-managed.

Expand Down
22 changes: 18 additions & 4 deletions managed/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -90,8 +90,10 @@ import (
"github.com/percona/pmm/managed/services/victoriametrics"
"github.com/percona/pmm/managed/services/vmalert"
"github.com/percona/pmm/managed/utils/clean"
"github.com/percona/pmm/managed/utils/envvars"
"github.com/percona/pmm/managed/utils/interceptors"
"github.com/percona/pmm/managed/utils/logger"
platformClient "github.com/percona/pmm/managed/utils/platform"
pmmerrors "github.com/percona/pmm/utils/errors"
"github.com/percona/pmm/utils/sqlmetrics"
"github.com/percona/pmm/version"
Expand Down Expand Up @@ -132,6 +134,7 @@ func addLogsHandler(mux *http.ServeMux, logs *supervisord.Logs) {
type gRPCServerDeps struct {
db *reform.DB
vmdb *victoriametrics.Service
platformClient *platformClient.Client
server *server.Server
agentsRegistry *agents.Registry
handler *agents.Handler
Expand Down Expand Up @@ -228,7 +231,7 @@ func runGRPCServer(ctx context.Context, deps *gRPCServerDeps) {
dbaasv1beta1.RegisterLogsAPIServer(gRPCServer, managementdbaas.NewLogsService(deps.db, deps.dbaasClient))
dbaasv1beta1.RegisterComponentsServer(gRPCServer, managementdbaas.NewComponentsService(deps.db, deps.dbaasClient, deps.versionServiceClient))

platformService, err := platform.New(deps.db, deps.supervisord, deps.checksService, deps.grafanaClient, deps.config.Services.Platform)
platformService, err := platform.New(deps.platformClient, deps.db, deps.supervisord, deps.checksService, deps.grafanaClient)
if err == nil {
platformpb.RegisterPlatformServer(gRPCServer, platformService)
} else {
Expand Down Expand Up @@ -699,7 +702,17 @@ func main() {
logs := supervisord.NewLogs(version.FullInfo(), pmmUpdateCheck)
supervisord := supervisord.New(*supervisordConfigDirF, pmmUpdateCheck, vmParams)

telemetry, err := telemetry.NewService(db, version.Version, cfg.Config.Services.Telemetry)
platformAddress, err := envvars.GetPlatformAddress()
if err != nil {
l.Fatal(err)
}

platformClient, err := platformClient.NewClient(db, platformAddress)
if err != nil {
l.Fatalf("Could not create Percona Portal client: %s", err)
}

telemetry, err := telemetry.NewService(db, platformClient, version.Version, cfg.Config.Services.Telemetry)
if err != nil {
l.Fatalf("Could not create telemetry service: %s", err)
}
Expand All @@ -714,15 +727,15 @@ func main() {

actionsService := agents.NewActionsService(qanClient, agentsRegistry)

checksService, err := checks.New(actionsService, alertManager, db, *victoriaMetricsURLF)
checksService, err := checks.New(db, platformClient, actionsService, alertManager, *victoriaMetricsURLF)
if err != nil {
l.Fatalf("Could not create checks service: %s", err)
}

prom.MustRegister(checksService)

// Integrated alerts services
templatesService, err := ia.NewTemplatesService(db)
templatesService, err := ia.NewTemplatesService(db, platformClient)
if err != nil {
l.Fatalf("Could not create templates service: %s", err)
}
Expand Down Expand Up @@ -918,6 +931,7 @@ func main() {
&gRPCServerDeps{
db: db,
vmdb: vmdb,
platformClient: platformClient,
server: server,
agentsRegistry: agentsRegistry,
handler: agentsHandler,
Expand Down
70 changes: 26 additions & 44 deletions managed/services/checks/checks.go
Original file line number Diff line number Diff line change
Expand Up @@ -21,9 +21,7 @@ import (
"bytes"
"context"
"encoding/json"
"fmt"
"io/ioutil"
"net/http"
"os"
"os/exec"
"strconv"
Expand All @@ -33,7 +31,6 @@ import (
"text/template"
"time"

api "github.com/percona-platform/saas/gen/check/retrieval"
"github.com/percona-platform/saas/pkg/check"
"github.com/percona-platform/saas/pkg/common"
"github.com/pkg/errors"
Expand All @@ -49,7 +46,7 @@ import (
"github.com/percona/pmm/managed/models"
"github.com/percona/pmm/managed/services"
"github.com/percona/pmm/managed/utils/envvars"
"github.com/percona/pmm/managed/utils/saasreq"
"github.com/percona/pmm/managed/utils/platform"
"github.com/percona/pmm/managed/utils/signatures"
"github.com/percona/pmm/utils/pdeathsig"
"github.com/percona/pmm/version"
Expand All @@ -64,7 +61,7 @@ const (
envDisableStartDelay = "PERCONA_TEST_CHECKS_DISABLE_START_DELAY"

checkExecutionTimeout = 5 * time.Minute // limits execution time for every single check
platformRequestTimeout = 2 * time.Minute // time limit to get checks list from the platform
platformRequestTimeout = 2 * time.Minute // time limit to get checks list from the portal
resultAwaitTimeout = 20 * time.Second // should be greater than agents.defaultQueryActionTimeout
scriptExecutionTimeout = 5 * time.Second // time limit for running pmm-managed-starlark
resultCheckInterval = time.Second
Expand All @@ -91,18 +88,18 @@ var (

// Service is responsible for interactions with Percona Check service.
type Service struct {
platformClient *platform.Client
agentsRegistry agentsRegistry
alertmanagerService alertmanagerService
db *reform.DB
alertsRegistry *registry
vmClient v1.API

l *logrus.Entry
host string
publicKeys []string
startDelay time.Duration
resendInterval time.Duration
localChecksFile string // For testing
l *logrus.Entry
startDelay time.Duration
resendInterval time.Duration
platformPublicKeys []string
localChecksFile string // For testing

cm sync.Mutex
checks map[string]check.Check
Expand All @@ -117,7 +114,7 @@ type Service struct {
}

// New returns Service with given PMM version.
func New(agentsRegistry agentsRegistry, alertmanagerService alertmanagerService, db *reform.DB, VMAddress string) (*Service, error) {
func New(db *reform.DB, platformClient *platform.Client, agentsRegistry agentsRegistry, alertmanagerService alertmanagerService, VMAddress string) (*Service, error) {
l := logrus.WithField("component", "checks")

resendInterval := defaultResendInterval
Expand All @@ -126,28 +123,30 @@ func New(agentsRegistry agentsRegistry, alertmanagerService alertmanagerService,
resendInterval = d
}

host, err := envvars.GetSAASHost()
vmClient, err := metrics.NewClient(metrics.Config{Address: VMAddress})
if err != nil {
return nil, err
}

vmClient, err := metrics.NewClient(metrics.Config{Address: VMAddress})
if err != nil {
return nil, err
var platformPublicKeys []string
if k := envvars.GetPlatformPublicKeys(); k != nil {
l.Warnf("Percona Platform public keys changed to %q.", k)
platformPublicKeys = k
}

s := &Service{
db: db,
agentsRegistry: agentsRegistry,
alertmanagerService: alertmanagerService,
db: db,
alertsRegistry: newRegistry(resolveTimeoutFactor * resendInterval),
vmClient: v1.NewAPI(vmClient),

l: l,
host: host,
startDelay: defaultStartDelay,
resendInterval: resendInterval,
localChecksFile: os.Getenv(envCheckFile),
l: l,
platformClient: platformClient,
startDelay: defaultStartDelay,
resendInterval: resendInterval,
platformPublicKeys: platformPublicKeys,
localChecksFile: os.Getenv(envCheckFile),

mScriptsExecuted: prom.NewCounterVec(prom.CounterOpts{
Namespace: prometheusNamespace,
Expand All @@ -164,10 +163,6 @@ func New(agentsRegistry agentsRegistry, alertmanagerService alertmanagerService,
}, []string{"service_type", "check_type"}),
}

if k := envvars.GetPublicKeys(); k != nil {
l.Warnf("Public keys changed to %q.", k)
s.publicKeys = k
}
if d, _ := strconv.ParseBool(os.Getenv(envDisableStartDelay)); d {
l.Warn("Start delay disabled.")
s.startDelay = 0
Expand Down Expand Up @@ -1422,29 +1417,15 @@ func (s *Service) downloadChecks(ctx context.Context) ([]check.Check, error) {
return nil, nil
}

s.l.Infof("Downloading checks from %s ...", s.host)

nCtx, cancel := context.WithTimeout(ctx, platformRequestTimeout)
defer cancel()

var accessToken string
if ssoDetails, err := models.GetPerconaSSODetails(nCtx, s.db.Querier); err == nil {
accessToken = ssoDetails.AccessToken.AccessToken
}

endpoint := fmt.Sprintf("https://%s/v1/check/GetAllChecks", s.host)
bodyBytes, err := saasreq.MakeRequest(nCtx, http.MethodPost, endpoint, accessToken, nil,
&saasreq.SaasRequestOptions{})
resp, err := s.platformClient.GetChecks(nCtx)
if err != nil {
return nil, errors.Wrap(err, "failed to dial")
}

var resp *api.GetAllChecksResponse
if err := json.Unmarshal(bodyBytes, &resp); err != nil {
return nil, err
return nil, errors.WithStack(err)
}

if err = signatures.Verify(s.l, resp.File, resp.Signatures, s.publicKeys); err != nil {
if err = signatures.Verify(s.l, resp.File, resp.Signatures, s.platformPublicKeys); err != nil {
return nil, err
}

Expand All @@ -1453,9 +1434,10 @@ func (s *Service) downloadChecks(ctx context.Context) ([]check.Check, error) {
DisallowUnknownFields: false,
DisallowInvalidChecks: false,
}

checks, err := check.Parse(strings.NewReader(resp.File), params)
if err != nil {
return nil, err
return nil, errors.WithStack(err)
}

return checks, nil
Expand Down
Loading

0 comments on commit bc7ea8f

Please sign in to comment.