From 413e5dd8eb93a9ed8106bc329282e25d99464894 Mon Sep 17 00:00:00 2001 From: Marco Braga Date: Mon, 25 Sep 2023 13:29:27 -0300 Subject: [PATCH] add etcd checks ; cleanup --- data/templates/report/report.html | 2 +- hack/verify-codegen.sh | 16 ---- internal/opct/archive/runtime.go | 16 ++-- internal/opct/report/checks.go | 105 +++++++++++++++++++++------ internal/opct/report/report.go | 8 +- internal/opct/summary/result.go | 5 +- internal/openshift/ci/sippy/sippy.go | 4 + 7 files changed, 107 insertions(+), 49 deletions(-) delete mode 100644 hack/verify-codegen.sh diff --git a/data/templates/report/report.html b/data/templates/report/report.html index f9c0d660..a9134f37 100644 --- a/data/templates/report/report.html +++ b/data/templates/report/report.html @@ -317,7 +317,6 @@ }, getTotalCounter(errors=[]) { for (let e in errors) { - console.log(e) if (e == "total") { return errors[e] } @@ -358,6 +357,7 @@ dataP.push(this.getPluginStat("10-openshift-kube-conformance")[0]) dataP.push(this.getPluginStat("20-openshift-conformance-validated")[0]) dataP.push(this.getPluginStat("99-openshift-artifacts-collector")[0]) + dataP.push({"Plugin Name": "Execution Time", "Time": this.report.summary.runtime.executionTime}) const pluginHeaders = new Map(Object.entries(dataP[0])); let tbPluginsRes = { data: dataP, diff --git a/hack/verify-codegen.sh b/hack/verify-codegen.sh deleted file mode 100644 index bafc9041..00000000 --- a/hack/verify-codegen.sh +++ /dev/null @@ -1,16 +0,0 @@ -#!/bin/sh - -if [ "$IS_CONTAINER" != "" ]; then - go install github.com/go-bindata/go-bindata/go-bindata@latest - set -xe - ./hack/update-generated-bindata.sh - set +ex - git diff --exit-code -else - podman run --rm \ - --env IS_CONTAINER=TRUE \ - --volume "${PWD}:/go/src/github.com/redhat-openshift-ecosystem/provider-certification-tool:z" \ - --workdir /go/src/github.com/redhat-openshift-ecosystem/provider-certification-tool \ - docker.io/golang:1.19 \ - ./hack/verify-codegen.sh "${@}" -fi diff --git a/internal/opct/archive/runtime.go b/internal/opct/archive/runtime.go index ab918627..a18e26d4 100644 --- a/internal/opct/archive/runtime.go +++ b/internal/opct/archive/runtime.go @@ -1,10 +1,16 @@ package archive type RuntimeInfoItem struct { - Name string `json:"name"` - Value string `json:"value"` + // Name holds the name of the item/attribute. + Name string `json:"name"` + // Value holds the value of the item/attribute. + Value string `json:"value"` + // Config is a optional field containing extra config information from the item. Config string `json:"config,omitempty"` - Time string `json:"time,omitempty"` - Total string `json:"total,omitempty"` - Delta string `json:"delta,omitempty"` + // Time is a optional field with the timestamp in the datetime format. + Time string `json:"time,omitempty"` + // Total is a optional field with the calculation of total time the item take. + Total string `json:"total,omitempty"` + // Detal is a optional field with the calculation of difference between two timers. + Delta string `json:"delta,omitempty"` } diff --git a/internal/opct/report/checks.go b/internal/opct/report/checks.go index f238b437..dd18fa26 100644 --- a/internal/opct/report/checks.go +++ b/internal/opct/report/checks.go @@ -13,6 +13,8 @@ package report import ( "fmt" "os" + "strconv" + "strings" "github.com/redhat-openshift-ecosystem/provider-certification-tool/internal/opct/plugin" log "github.com/sirupsen/logrus" @@ -31,8 +33,9 @@ type CheckSummary struct { func NewCheckSummary(re *Report) *CheckSummary { baseURL := defaultBaseURL - // mkdocs serve - // export OPCT_DEV_BASE_URL_DOC="http://127.0.0.1:8000/provider-certification-tool" + // Developer environment: + // $ mkdocs serve + // $ export OPCT_DEV_BASE_URL_DOC="http://127.0.0.1:8000/provider-certification-tool" localDevBaseURL := os.Getenv("OPCT_DEV_BASE_URL_DOC") if localDevBaseURL != "" { baseURL = localDevBaseURL @@ -164,7 +167,6 @@ func NewCheckSummary(re *Report) *CheckSummary { if _, ok := re.Provider.Plugins[plugin.PluginNameKubernetesConformance]; !ok { return CheckResultFail } - fmt.Println(len(re.Provider.Plugins[plugin.PluginNameKubernetesConformance].TestsFailedPrio)) if len(re.Provider.Plugins[plugin.PluginNameKubernetesConformance].TestsFailedPrio) > 0 { return CheckResultFail } @@ -177,8 +179,8 @@ func NewCheckSummary(re *Report) *CheckSummary { // if _, ok := re.Provider.Plugins[PluginNameOpenShiftConformance]; !ok { // return CheckResultFail // } - // // "Acceptable" are relative, we are trying to observe the baselines and set - // // a "good" value considering a healthy cluster. + // // "Acceptance" are relative, the baselines is observed to set + // // an "accepted" value considering a healthy cluster in known provider/installation. // plugin := re.Provider.Plugins[PluginNameOpenShiftConformance] // if re.Provider.ClusterHealth.PodHealthTotal != 0 { // return CheckResultFail @@ -193,8 +195,8 @@ func NewCheckSummary(re *Report) *CheckSummary { if _, ok := re.Provider.Plugins[plugin.PluginNameOpenShiftConformance]; !ok { return CheckResultFail } - // "Acceptable" are relative, we are trying to observe the baselines and set - // a "good" value considering a healthy cluster. + // "Acceptance" are relative, the baselines is observed to set + // an "accepted" value considering a healthy cluster in known provider/installation. plugin := re.Provider.Plugins[plugin.PluginNameOpenShiftConformance] perc := (float64(plugin.Stat.Failed) / float64(plugin.Stat.Total)) * 100 if perc > 1.5 { @@ -210,8 +212,8 @@ func NewCheckSummary(re *Report) *CheckSummary { if _, ok := re.Provider.Plugins[plugin.PluginNameOpenShiftConformance]; !ok { return CheckResultFail } - // "Acceptable" are relative, we are trying to observe the baselines and set - // a "good" value considering a healthy cluster. + // "Acceptance" are relative, the baselines is observed to set + // an "accepted" value considering a healthy cluster in known provider/installation. plugin := re.Provider.Plugins[plugin.PluginNameOpenShiftConformance] perc := (float64(plugin.Stat.FilterFailedPrio) / float64(plugin.Stat.Total)) * 100 if perc > 0.5 { @@ -231,8 +233,8 @@ func NewCheckSummary(re *Report) *CheckSummary { if _, ok := cnt["total"]; !ok { return CheckResultFail } - // "Acceptable" are relative, we are trying to observe the baselines and set - // a "good" value considering a healthy cluster. + // "Acceptance" are relative, the baselines is observed to set + // an "accepted" value considering a healthy cluster in known provider/installation. total := cnt["total"] if total > 150 { return CheckResultFail @@ -256,8 +258,8 @@ func NewCheckSummary(re *Report) *CheckSummary { log.Debugf("Check Failed: OPCT-007: ErrorCounters[\"total\"]") return CheckResultFail } - // "Acceptable" are relative, we are trying to observe the baselines and set - // a "good" value considering a healthy cluster. + // "Acceptance" are relative, the baselines is observed to set + // an "accepted" value considering a healthy cluster in known provider/installation. total := re.Provider.MustGatherInfo.ErrorCounters["total"] if total > 30000 { log.Debugf("Check Failed: OPCT-007: want[<=25000] got[%d]", total) @@ -303,19 +305,80 @@ func NewCheckSummary(re *Report) *CheckSummary { Name: "[TODO] etcd fio must accept the tests (TODO)", Test: AcceptanceCheckFail, }) - checkSum.Checks = append(checkSum.Checks, &Check{ - Name: "[TODO] etcd slow requests: average must be under 500ms", - Test: ExampleAcceptanceCheckPass, - }) checkSum.Checks = append(checkSum.Checks, &Check{ Name: "[TODO] etcd slow requests: p99 must be lower than 900ms", Test: AcceptanceCheckFail, }) - checkSum.Checks = append(checkSum.Checks, &Check{ - Name: "[TODO] etcd slow requests: must nto have requests taking more than 1s", - Test: ExampleAcceptanceCheckPass, - }) */ + checkSum.Checks = append(checkSum.Checks, &Check{ + ID: "OPCT-010", + Name: "etcd logs: slow requests: average should be under 500ms", + Test: func() CheckResult { + prefix := "Check OPCT-010 Failed" + if re.Provider.MustGatherInfo == nil { + log.Debugf("%s: unable to read must-gather information.", prefix) + return CheckResultFail + } + if re.Provider.MustGatherInfo.ErrorEtcdLogs.FilterRequestSlowAll["all"] == nil { + log.Debugf("%s: unable to read statistics from parsed etcd logs.", prefix) + return CheckResultFail + } + if re.Provider.MustGatherInfo.ErrorEtcdLogs.FilterRequestSlowAll["all"].StatMean == "" { + log.Debugf("%s: unable to get p50/mean statistics from parsed data: %v", prefix, re.Provider.MustGatherInfo.ErrorEtcdLogs.FilterRequestSlowAll["all"]) + return CheckResultFail + } + values := strings.Split(re.Provider.MustGatherInfo.ErrorEtcdLogs.FilterRequestSlowAll["all"].StatMean, " ") + if values[0] == "" { + log.Debugf("%s: unable to get parse p50/mean: %v", prefix, values) + return CheckResultFail + } + value, err := strconv.ParseFloat(values[0], 64) + if err != nil { + log.Debugf("%s: unable to convert p50/mean to float: %v", prefix, err) + return CheckResultFail + } + if value >= 500 { + log.Debugf("%s acceptance criteria: want=[%v] got=[%v]", prefix, "<500", value) + return CheckResultFail + } + return CheckResultPass + }, + }) + checkSum.Checks = append(checkSum.Checks, &Check{ + ID: "OPCT-011", + Name: "etcd logs: slow requests: maximum should be under 1500ms", + Test: func() CheckResult { + prefix := "Check OPCT-011 Failed" + if re.Provider.MustGatherInfo == nil { + log.Debugf("%s: unable to read must-gather information.", prefix) + return CheckResultFail + } + if re.Provider.MustGatherInfo.ErrorEtcdLogs.FilterRequestSlowAll["all"] == nil { + log.Debugf("%s: unable to read statistics from parsed etcd logs.", prefix) + return CheckResultFail + } + if re.Provider.MustGatherInfo.ErrorEtcdLogs.FilterRequestSlowAll["all"].StatMax == "" { + log.Debugf("%s: unable to get max statistics from parsed data: %v", prefix, re.Provider.MustGatherInfo.ErrorEtcdLogs.FilterRequestSlowAll["all"]) + return CheckResultFail + } + values := strings.Split(re.Provider.MustGatherInfo.ErrorEtcdLogs.FilterRequestSlowAll["all"].StatMax, " ") + if values[0] == "" { + log.Debugf("%s: unable to get parse max: %v", prefix, values) + return CheckResultFail + } + value, err := strconv.ParseFloat(values[0], 64) + if err != nil { + log.Debugf("%s: unable to convert max to float: %v", prefix, err) + return CheckResultFail + } + if value >= 1000 { + log.Debugf("%s acceptance criteria: want=[%v] got=[%v]", prefix, "<1000", value) + return CheckResultFail + } + return CheckResultPass + }, + }) + // TODO(network): podConnectivityChecks must not have outages // Create docs reference when ID is set for c := range checkSum.Checks { diff --git a/internal/opct/report/report.go b/internal/opct/report/report.go index 98f3cce3..1bb08b0c 100644 --- a/internal/opct/report/report.go +++ b/internal/opct/report/report.go @@ -59,8 +59,9 @@ type ReportSummary struct { } type ReportSummaryRuntime struct { - Timers metrics.Timers `json:"timers,omitempty"` - Plugins map[string]string `json:"plugins,omitempty"` + Timers metrics.Timers `json:"timers,omitempty"` + Plugins map[string]string `json:"plugins,omitempty"` + ExecutionTime string `json:"executionTime,omitempty"` } type ReportSummaryTests struct { @@ -324,6 +325,9 @@ func (re *Report) populateSource(rs *summary.ResultSummary) error { arr := strings.Split(e.Name, "plugin finished ") re.Summary.Runtime.Plugins[arr[len(arr)-1]] = e.Delta } + if strings.HasPrefix(e.Name, "server finished") { + re.Summary.Runtime.ExecutionTime = e.Total + } } } if rs.Sonobuoy != nil && rs.Sonobuoy.MetaConfig != nil { diff --git a/internal/opct/summary/result.go b/internal/opct/summary/result.go index a77ee1d9..87cf85fe 100644 --- a/internal/opct/summary/result.go +++ b/internal/opct/summary/result.go @@ -84,9 +84,6 @@ func (rs *ResultSummary) Populate() error { switch pluginName { case plugin.PluginNameKubernetesConformance, plugin.PluginNameOpenShiftConformance: rs.isConformance = true - // default: - // log.Debugf("Ignoring Plugin %s", plugin) - // continue } log.Debugf("Processing results/Populating/Processing Plugin/%s", pluginName) err := rs.processPlugin(pluginName) @@ -95,7 +92,7 @@ func (rs *ResultSummary) Populate() error { } } - // TODO: review the fd usage for tarbal and file + log.Info("Processing results...") cleanup, err = rs.openReader() defer cleanup() if err != nil { diff --git a/internal/openshift/ci/sippy/sippy.go b/internal/openshift/ci/sippy/sippy.go index dcb2137e..e81276f7 100644 --- a/internal/openshift/ci/sippy/sippy.go +++ b/internal/openshift/ci/sippy/sippy.go @@ -123,6 +123,10 @@ func (a *SippyAPI) QueryTests(in *SippyTestsRequestInput) (*SippyTestsRequestOut } + if res.StatusCode < 200 || res.StatusCode > 299 { + return nil, fmt.Errorf("invalid status code: %d", res.StatusCode) + } + sippyResponse := SippyTestsRequestOutput{} if err := json.Unmarshal([]byte(body), &sippyResponse); err != nil { return nil, fmt.Errorf("couldn't unmarshal response body: %+v \nBody: %s", string(body), err)