feat: opct adm parsers commands

redhat-openshift-ecosystem · Mar 8, 2024 · 5d31b55 · 5d31b55
1 parent 2f5467e
commit 5d31b55
Show file tree

Hide file tree

Showing 6 changed files with 425 additions and 0 deletions.
diff --git a/cmd/root.go b/cmd/root.go
@@ -10,6 +10,7 @@ import (
 	"github.com/spf13/viper"
 	"github.com/vmware-tanzu/sonobuoy/cmd/sonobuoy/app"
 
+	"github.com/redhat-openshift-ecosystem/provider-certification-tool/pkg/cmd/adm"
 	"github.com/redhat-openshift-ecosystem/provider-certification-tool/pkg/cmd/get"
 	"github.com/redhat-openshift-ecosystem/provider-certification-tool/pkg/destroy"
 	"github.com/redhat-openshift-ecosystem/provider-certification-tool/pkg/report"
@@ -74,6 +75,7 @@ func init() {
 	rootCmd.AddCommand(version.NewCmdVersion())
 	rootCmd.AddCommand(report.NewCmdReport())
 	rootCmd.AddCommand(get.NewCmdGet())
+	rootCmd.AddCommand(adm.NewCmdAdm())
 
 	// Link in child commands direct from Sonobuoy
 	rootCmd.AddCommand(app.NewSonobuoyCommand())

diff --git a/docs/opct/adm/parse-etcd-logs.md b/docs/opct/adm/parse-etcd-logs.md
@@ -0,0 +1,46 @@
+# opct adm parse-etcd-logs
+
+Extract information from etcd logs from pods collected by must-gather.
+
+## Usage
+
+- Added to OPCT in release: v0.5.0-alpha.3
+- Command: `opct adm parse-etcd-logs options args`
+
+Options:
+
+- `--aggregator`: choose aggregator (all, day, hour, minute). Default: hour
+- `--skip-error-counter`: flag to skip the error counter calculatio to a faster report. Default: false
+
+Args:
+
+- `path/to/must-gather/directory` (optional)
+
+## Examples
+
+- Read from stdin:
+
+```bash
+export MUST_GATHER_PATH=./must-gather
+tar xfz must-gather.tar.gz -C ${MUST_GATHER_PATH}
+cat ${MUST_GATHER_PATH}/*/*/namespaces/openshift-etcd/pods/*/etcd/etcd/logs/*.log |\
+    opct adm parse-etcd-logs
+```
+
+- Parse a directory with must-gather:
+
+```bash
+opct adm parse-etcd-logs ${MUST_GATHER_PATH}
+```
+
+- Aggregate by day:
+
+```bash
+opct adm parse-etcd-logs --aggregator day ${MUST_GATHER_PATH}
+```
+
+- Ignore error counters:
+
+```bash
+opct adm parse-etcd-logs --skip-error-counter true ${MUST_GATHER_PATH} 
+```
diff --git a/docs/opct/adm/parse-metrics.md b/docs/opct/adm/parse-metrics.md
@@ -0,0 +1,82 @@
+# opct adm parse-metrics
+
+Process Prometheus metrics plotting HTML charts.
+
+## Usage
+
+- Added to OPCT in release: v0.5.0-alpha.3
+- Feature status: beta
+- Command: `opct adm parse-metrics options`
+
+Options:
+
+- `--input`: Input metrics file. Example: metrics.tar.xz
+- `--output`: Output directory. Example: /tmp/metrics
+
+## Metrics collector
+
+The metrics can be collected into two different ways:
+
+- OPCT archive (version v0.5.3-alha.3+)
+- must-gather-monitoring utility
+
+
+## Examples
+
+### Plot the metrics charts collected by OPCT
+
+1. Extract the must-gather-monitoring from OPCT archive
+
+```bash
+tar xfz archive.tar.gz plugins/99-openshift-artifacts-collector/results/global/artifacts_must-gather-metrics.tar.xz
+```
+
+2. Process the metrics generating charts
+
+```bash
+./opct adm parse-metrics \
+    --input plugins/99-openshift-artifacts-collector/results/global/artifacts_must-gather-metrics.tar.xz \
+    --output ./metrics
+```
+
+3. Open the metrics directory from your file to explore the charts.
+
+- `metrics.html`: charts plotted with [go-echarts](https://github.com/go-echarts)
+- `index.html`: charts plotted with [Plotly](https://plotly.com/javascript/)
+
+To explore the full javascript features, use a HTTP file server to view the charts:
+
+```bash
+cd ./metrics && python -m http.server 9090
+```
+
+### Plot the metrics charts collected by must-gather-monitoring
+
+1. Run must-gather-monitoring to collect the metrics
+
+```bash
+oc adm must-gather --image=quay.io/opct/must-gather-monitoring:v0.1.0 &&\
+tar xfJ must-gather-metrics.tar.xz
+```
+
+2. Process the metrics generating charts
+
+```bash
+./opct adm parse-metrics \
+    --input must-gather-metrics.tar.xz \
+    --output ./metrics
+```
+
+### Plot the metrics natively from OPCT report
+
+`opct report` command generates charts automatically when
+the metrics is available and the report HTML is enabled.
+
+- Generate the report:
+```bash
+./opct report archive.tar.gz --save-to ./report
+```
+
+- Open the HTML report in your browser at http://localhost:9090/metrics
+
+Read more about `opct report` in the [documentation](../report.md).
diff --git a/pkg/cmd/adm/parseetcdlogs.go b/pkg/cmd/adm/parseetcdlogs.go
@@ -0,0 +1,185 @@
+package adm
+
+import (
+	"bufio"
+	"fmt"
+	"os"
+	"path/filepath"
+	"regexp"
+	"sort"
+	"strings"
+	"text/tabwriter"
+
+	log "github.com/sirupsen/logrus"
+
+	"github.com/redhat-openshift-ecosystem/provider-certification-tool/internal/opct/archive"
+	mg "github.com/redhat-openshift-ecosystem/provider-certification-tool/internal/openshift/mustgather"
+	"github.com/spf13/cobra"
+)
+
+type parseEtcdLogsInput struct {
+	aggregator        string
+	skipErrorCounters bool
+}
+
+var parseEtcdLogsArgs parseEtcdLogsInput
+var parseEtcdLogsCmd = &cobra.Command{
+	Use:     "parse-etcd-logs",
+	Example: "opct adm parse-etcd-logs --aggregator hour",
+	Short:   "Parse ETCD logs.",
+	Run:     parseEtcdLogsRun,
+}
+
+func init() {
+	parseEtcdLogsCmd.Flags().StringVar(&parseEtcdLogsArgs.aggregator, "aggregator", "hour", "Aggregator. Valid: all, day, hour, minute. Default: all")
+	parseEtcdLogsCmd.Flags().BoolVar(&parseEtcdLogsArgs.skipErrorCounters, "skip-error-counter", false, "Skip calculation of error counter. Increase speed. Default: false")
+}
+
+func printTable(table [][]string) {
+	writer := tabwriter.NewWriter(os.Stdout, 0, 4, 0, '\t', 0)
+	for _, row := range table {
+		for _, col := range row {
+			fmt.Fprintf(writer, "%s\t", col)
+		}
+		fmt.Fprintf(writer, "\n")
+	}
+	writer.Flush()
+}
+
+func check(e error) {
+	if e != nil {
+		panic(e)
+	}
+}
+
+func parseEtcdLogsRun(cmd *cobra.Command, args []string) {
+
+	errCounters := &archive.ErrorCounter{}
+	filterATTL := mg.NewFilterApplyTookTooLong(parseEtcdLogsArgs.aggregator)
+
+	// when must-gather directory is provided as argument
+	if len(args) > 0 {
+		log.Printf("Processing logs from directory %s...\n", args[0])
+		reEtcdLog := regexp.MustCompile(`(\/namespaces\/openshift-etcd\/pods\/.*\/etcd\/etcd\/logs\/.*.log)`)
+		err := filepath.Walk(args[0],
+			func(path string, info os.FileInfo, err error) error {
+				if err != nil {
+					return err
+				}
+				if !reEtcdLog.MatchString(path) {
+					return nil
+				}
+
+				log.Debugf("Processing etcd log file: %s", path)
+				dat, err := os.ReadFile(path)
+				check(err)
+
+				for _, line := range strings.Split(string(dat), "\n") {
+					filterATTL.ProcessLine(line)
+					if !parseEtcdLogsArgs.skipErrorCounters {
+						lineErrCounter := archive.NewErrorCounter(&line, mg.EtcdLogErrorPatterns)
+						errCounters = archive.MergeErrorCounters(errCounters, &lineErrCounter)
+					}
+				}
+				log.Debugf("etcd log processed: %s", path)
+				return nil
+			})
+		if err != nil {
+			log.Errorf("One or more errors when reading from directory: %v", err)
+			os.Exit(1)
+		}
+
+	} else {
+		log.Println("Processing logs from stdin...")
+		s := bufio.NewScanner(os.Stdin)
+		for s.Scan() {
+			line := s.Text()
+			filterATTL.ProcessLine(line)
+			if !parseEtcdLogsArgs.skipErrorCounters {
+				lineErrCounter := archive.NewErrorCounter(&line, mg.EtcdLogErrorPatterns)
+				errCounters = archive.MergeErrorCounters(errCounters, &lineErrCounter)
+			}
+		}
+	}
+
+	stat := filterATTL.GetStat(0)
+
+	fmt.Printf("= Filter Name: %s =\n", filterATTL.Name)
+	fmt.Printf("== Group by: %s ==\n", filterATTL.GroupBy)
+
+	fmtCol := func(col string) string {
+		return fmt.Sprintf("%-13s", col)
+	}
+
+	tbSummary := [][]string{{fmtCol("ID"), fmtCol("COUNT"), fmtCol(">=500ms"), fmtCol(">=1s"), fmtCol("Max(ms)")}}
+
+	tbBuckets := [][]string{{fmtCol("ID"), fmtCol("COUNT"),
+		fmtCol(mg.BucketRangeName200Ms),
+		fmtCol(mg.BucketRangeName300Ms),
+		fmtCol(mg.BucketRangeName400Ms),
+		fmtCol(mg.BucketRangeName500Ms),
+		fmtCol(mg.BucketRangeName600Ms),
+		fmtCol(mg.BucketRangeName700Ms),
+		fmtCol(mg.BucketRangeName800Ms),
+		fmtCol(mg.BucketRangeName900Ms),
+		fmtCol(">=1s")}}
+
+	tbTimers := [][]string{{fmtCol("ID"), fmtCol("COUNT"), fmtCol("MIN"), fmtCol("AVG"),
+		fmtCol("MAX"), fmtCol("P99"), fmtCol("P99.9"), fmtCol("P90"), fmtCol("StdDev")}}
+
+	groups := make([]string, 0, len(stat))
+	for k := range stat {
+		groups = append(groups, k)
+	}
+	sort.Strings(groups)
+	for _, gk := range groups {
+
+		tbSummary = append(tbSummary, []string{fmtCol(gk),
+			fmtCol(fmt.Sprintf("%d", stat[gk].RequestCount)),
+			fmtCol(stat[gk].Higher500ms),
+			fmtCol(stat[gk].Buckets[mg.BucketRangeName1000Inf]),
+			fmtCol(stat[gk].StatMax)})
+
+		tbBuckets = append(tbBuckets, []string{fmtCol(gk),
+			fmtCol(fmt.Sprintf("%d", stat[gk].RequestCount)),
+			fmtCol(stat[gk].Buckets[mg.BucketRangeName200Ms]),
+			fmtCol(stat[gk].Buckets[mg.BucketRangeName300Ms]),
+			fmtCol(stat[gk].Buckets[mg.BucketRangeName400Ms]),
+			fmtCol(stat[gk].Buckets[mg.BucketRangeName500Ms]),
+			fmtCol(stat[gk].Buckets[mg.BucketRangeName600Ms]),
+			fmtCol(stat[gk].Buckets[mg.BucketRangeName700Ms]),
+			fmtCol(stat[gk].Buckets[mg.BucketRangeName800Ms]),
+			fmtCol(stat[gk].Buckets[mg.BucketRangeName900Ms]),
+			fmtCol(stat[gk].Buckets[mg.BucketRangeName1000Inf])})
+
+		tbTimers = append(tbTimers, []string{fmtCol(gk), fmtCol(stat[gk].StatCount),
+			fmtCol(stat[gk].StatMin), fmtCol(stat[gk].StatMedian),
+			fmtCol(stat[gk].StatMax), fmtCol(stat[gk].StatPerc99),
+			fmtCol(stat[gk].StatPerc999), fmtCol(stat[gk].StatPerc90),
+			fmtCol(stat[gk].StatStddev),
+		})
+	}
+
+	fmt.Printf("\n=== Summary ===\n")
+	printTable(tbSummary)
+
+	fmt.Printf("\n=== Buckets (ms) ===\n")
+	printTable(tbBuckets)
+
+	fmt.Printf("\n=== Timers ===\n")
+	printTable(tbTimers)
+
+	if !parseEtcdLogsArgs.skipErrorCounters {
+		fmt.Printf("\n=== Log error counters ===\n")
+		tbErrors := [][]string{{
+			fmt.Sprintf("%-60s", "ERROR PATTERN"),
+			fmt.Sprintf("%-s", "COUNTER"),
+		}}
+		for k, v := range *errCounters {
+			tbErrors = append(tbErrors, []string{
+				fmt.Sprintf("%-60s", k),
+				fmt.Sprintf(": %d", v)})
+		}
+		printTable(tbErrors)
+	}
+}