Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

fix tunnel metrics #32

Open
wants to merge 3 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
The table of contents is too big for display.
Diff view
Diff view
  •  
  •  
  •  
61 changes: 26 additions & 35 deletions go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -4,55 +4,46 @@ go 1.17

require github.com/lorenzosaino/go-sysctl v0.2.0

require (
github.com/BurntSushi/toml v0.4.1 // indirect
golang.org/x/lint v0.0.0-20210508222113-6edffad5e616 // indirect
golang.org/x/mod v0.5.0 // indirect
golang.org/x/sys v0.0.0-20220520151302-bc2c85ada10a // indirect
golang.org/x/tools v0.1.6 // indirect
golang.org/x/xerrors v0.0.0-20200804184101-5ec99f83aff1 // indirect
honnef.co/go/tools v0.2.1 // indirect
)

require (
github.com/golang/protobuf v1.5.2
go.uber.org/zap v1.21.0
go.uber.org/zap v1.24.0
google.golang.org/grpc v1.45.0
google.golang.org/protobuf v1.28.0
google.golang.org/protobuf v1.28.1
)

require (
github.com/pkg/errors v0.9.1
go.uber.org/atomic v1.7.0 // indirect
go.uber.org/multierr v1.6.0 // indirect
)
require github.com/pkg/errors v0.9.1

require (
github.com/go-ping/ping v0.0.0-20211130115550-779d1e919534
golang.org/x/net v0.0.0-20220722155237-a158d28d115b // indirect
golang.org/x/text v0.3.7 // indirect
google.golang.org/genproto v0.0.0-20200825200019-8632dd797987 // indirect
)
require github.com/go-ping/ping v0.0.0-20211130115550-779d1e919534

require (
github.com/google/shlex v0.0.0-20191202100458-e7afc7fbc510
github.com/google/uuid v1.2.0 // indirect
github.com/vishvananda/netlink v1.1.0
golang.org/x/sync v0.0.0-20210220032951-036812b2e83c // indirect
)

require (
github.com/prometheus/client_golang v1.12.1
github.com/vishvananda/netns v0.0.0-20191106174202-0a2b9b5464df // indirect
)
require github.com/prometheus/client_golang v1.14.0

require github.com/kubeslice/kubeslice-monitoring v0.1.10

require (
github.com/BurntSushi/toml v0.4.1 // indirect
github.com/beorn7/perks v1.0.1 // indirect
github.com/cespare/xxhash/v2 v2.1.2 // indirect
github.com/google/go-cmp v0.5.8 // indirect
github.com/matttproud/golang_protobuf_extensions v1.0.1 // indirect
github.com/prometheus/client_model v0.2.0 // indirect
github.com/prometheus/common v0.32.1 // indirect
github.com/prometheus/procfs v0.7.3 // indirect
github.com/stretchr/testify v1.8.0 // indirect
github.com/google/uuid v1.2.0 // indirect
github.com/matttproud/golang_protobuf_extensions v1.0.2 // indirect
github.com/prometheus/client_model v0.3.0 // indirect
github.com/prometheus/common v0.37.0 // indirect
github.com/prometheus/procfs v0.8.0 // indirect
github.com/vishvananda/netns v0.0.0-20191106174202-0a2b9b5464df // indirect
go.uber.org/atomic v1.7.0 // indirect
go.uber.org/goleak v1.1.12 // indirect
go.uber.org/multierr v1.6.0 // indirect
golang.org/x/lint v0.0.0-20210508222113-6edffad5e616 // indirect
golang.org/x/mod v0.6.0-dev.0.20220419223038-86c51ed26bb4 // indirect
golang.org/x/net v0.5.0 // indirect
golang.org/x/sync v0.0.0-20220722155255-886fb9371eb4 // indirect
golang.org/x/sys v0.4.0 // indirect
golang.org/x/text v0.6.0 // indirect
golang.org/x/tools v0.1.12 // indirect
google.golang.org/genproto v0.0.0-20210831024726-fe130286e0e2 // indirect
honnef.co/go/tools v0.2.1 // indirect
)
475 changes: 455 additions & 20 deletions go.sum

Large diffs are not rendered by default.

36 changes: 0 additions & 36 deletions pkg/metrics/metrics.go

This file was deleted.

81 changes: 30 additions & 51 deletions pkg/metrics/prometheus.go
Original file line number Diff line number Diff line change
Expand Up @@ -18,18 +18,16 @@
package metrics

import (
"fmt"
"math/rand"
"net/http"
"os"
"time"

"github.com/kubeslice/gateway-sidecar/pkg/logger"
"github.com/kubeslice/kubeslice-monitoring/pkg/metrics"
"github.com/prometheus/client_golang/prometheus"
"github.com/prometheus/client_golang/prometheus/promhttp"
)

//create latency metrics which has to be populated when we receive latency from tunnel
// create latency metrics which has to be populated when we receive latency from tunnel
var (
sourceClusterId = os.Getenv("CLUSTER_ID")
remoteClusterId = os.Getenv("REMOTE_CLUSTER_ID")
Expand All @@ -38,70 +36,51 @@ var (
sliceName = os.Getenv("SLICE_NAME")
namespace = "kubeslice_system"
constlabels = prometheus.Labels{
"slice_name": sliceName,
"slice": sliceName,
"source_slice_cluster_id": sourceClusterId,
"remote_slice_cluster_id": remoteClusterId,
"source_gateway_id": sourceGatewayId,
"remote_gateway_id": remoteGatewayId,
}
LatencyMetrics = getGaugeMetrics("slicegw_latency", "latency Metrics From Slice Gateway")
RxRateMetrics = getGaugeMetrics("rx_rate", "Rx rate from Slice Gateway.")
TxRateMetrics = getGaugeMetrics("tx_rate", "Tx rate from Slice Gateway.")
TunnelUP *prometheus.GaugeVec
LatencyMetrics *prometheus.GaugeVec
RxRateMetrics *prometheus.GaugeVec
TxRateMetrics *prometheus.GaugeVec
log *logger.Logger = logger.NewLogger()
)

//common method get gauge metrics alongwith labels
func getGaugeMetrics(name string, help string) prometheus.Gauge {
return prometheus.NewGauge(
prometheus.GaugeOpts{
Namespace: namespace,
Name: name,
Help: help,
ConstLabels: constlabels,
})
}

//method to register metrics to prometheus
// method to register metrics to prometheus
func StartMetricsCollector(metricCollectorPort string) {
metricCollectorPort = ":" + metricCollectorPort
log.Infof("Starting metric collector @ %s", metricCollectorPort)
rand.Seed(time.Now().Unix())
histogramVec := prometheus.NewHistogramVec(prometheus.HistogramOpts{
Name: "prom_request_time",
Help: "Time it has taken to retrieve the metrics",
}, []string{"time"})

prometheus.Register(histogramVec)
http.Handle("/metrics", promhttp.Handler())

mf, err := metrics.NewMetricsFactory(prometheus.DefaultRegisterer, metrics.MetricsFactoryOptions{
ReportingController: "gateway-sidecar",
})

prometheus.MustRegister(LatencyMetrics)
prometheus.MustRegister(RxRateMetrics)
prometheus.MustRegister(TxRateMetrics)
TunnelUP = mf.NewGauge("slicegateway_tunnel_up", "Slicegateway VPN tunnel status",
[]string{"slice", "source_gateway_id", "source_slice_cluster_id", "remote_gateway_id", "remote_slice_cluster_id"},
).MustCurryWith(constlabels)
LatencyMetrics = mf.NewGauge("slicegateway_tunnel_latency", "Latency between slice gateways in milliseconds",
[]string{"slice", "source_gateway_id", "source_slice_cluster_id", "remote_gateway_id", "remote_slice_cluster_id"},
).MustCurryWith(constlabels)
TxRateMetrics = mf.NewGauge("slicegateway_tunnel_txrate", "Transfer rate between slice gateways in bits per second",
[]string{"slice", "source_gateway_id", "source_slice_cluster_id", "remote_gateway_id", "remote_slice_cluster_id"},
).MustCurryWith(constlabels)
RxRateMetrics = mf.NewGauge("slicegateway_tunnel_rxrate", "Receive rate between slice gateways in bits per second",
[]string{"slice", "source_gateway_id", "source_slice_cluster_id", "remote_gateway_id", "remote_slice_cluster_id"},
).MustCurryWith(constlabels)

http.Handle("/metrics", newHandlerWithHistogram(promhttp.Handler(), histogramVec))
if err != nil {
log.Error("unable to initializ metrics factory")
os.Exit(1)
}

err := http.ListenAndServe(metricCollectorPort, nil)
err = http.ListenAndServe(metricCollectorPort, nil)
if err != nil {
log.Errorf("Failed to start metric collector @ %s", metricCollectorPort)
}
log.Info("Started Prometheus server at", metricCollectorPort)
}

//send http request
func newHandlerWithHistogram(handler http.Handler, histogram *prometheus.HistogramVec) http.Handler {
return http.HandlerFunc(func(w http.ResponseWriter, req *http.Request) {
start := time.Now()
status := http.StatusOK

defer func() {
histogram.WithLabelValues(fmt.Sprintf("%d", status)).Observe(time.Since(start).Seconds())
}()

if req.Method == http.MethodGet {
handler.ServeHTTP(w, req)
return
}
status = http.StatusBadRequest

w.WriteHeader(status)
})
}
1 change: 0 additions & 1 deletion pkg/sidecar/sidecarpb/gw_sidecar.pb.go

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

6 changes: 3 additions & 3 deletions pkg/sidecar/sidecarpb/gw_sidecar_utils.go
Original file line number Diff line number Diff line change
Expand Up @@ -67,16 +67,16 @@ func getGwPodStatus() (*GwPodStatus, error) {
if statusMonitor != nil {
// Get the monitor status checks
checks := statusMonitor.Checks()
log.Info("checks","checks",checks)
log.Info("checks", "checks", checks)
for _, v := range checks {
stats, err := v.Status()
log.Info("stats","stats ",stats)
log.Info("stats", "stats ", stats)
if err != nil {
// this means that tunnel is not established
tunnelStatus.Status = TunnelStatusType_GW_TUNNEL_STATE_DOWN
podStatus.TunnelStatus = &tunnelStatus
log.Infof("pod status : %v", podStatus)
return podStatus,nil
return podStatus, nil
}
tunnelStatus = TunnelInterfaceStatus{
NetInterface: stats.NetInterface,
Expand Down
26 changes: 19 additions & 7 deletions pkg/status/tunnel.go
Original file line number Diff line number Diff line change
Expand Up @@ -76,25 +76,35 @@ func (t *TunnelChecker) Execute(interface{}) (err error) {
if err != nil {
t.tunStatus = nil
t.log.Errorf("Unable to find the tun interface")
metrics.TunnelUP.WithLabelValues().Set(0)
metrics.LatencyMetrics.WithLabelValues().Set(0)
metrics.RxRateMetrics.WithLabelValues().Set(0)
metrics.TxRateMetrics.WithLabelValues().Set(0)
return err
}
if len(ifaceInfos) > 1 || len(ifaceInfos) == 0 {
t.tunStatus = nil
t.log.Errorf("Invalid tunnel interface")
metrics.TunnelUP.WithLabelValues().Set(0)
metrics.LatencyMetrics.WithLabelValues().Set(0)
metrics.RxRateMetrics.WithLabelValues().Set(0)
metrics.TxRateMetrics.WithLabelValues().Set(0)
return errors.New("Invalid tunnel interface")
}
if t.tunStatus == nil {
t.tunStatus = new(TunnelInterfaceStatus)
t.startTime = getCurTimeMs()
}
//add metrics which can be shown on prometheus
metrics.RecordLatencyMetric(float64(t.tunStatus.Latency))
metrics.RecordRxRateMetric(float64(t.tunStatus.RxRate))
metrics.RecordTxRateMetric(float64(t.tunStatus.TxRate))

t.tunStatus.NetInterface = ifaceInfos[0].Name
t.tunStatus.LocalIP = ifaceInfos[0].IP
t.updateNetworkStatus(ifaceInfos[0].Name)

//add metrics which can be shown on prometheus
metrics.LatencyMetrics.WithLabelValues().Set(float64(t.tunStatus.Latency))
metrics.RxRateMetrics.WithLabelValues().Set(float64(t.tunStatus.RxRate))
metrics.TxRateMetrics.WithLabelValues().Set(float64(t.tunStatus.TxRate))
metrics.TunnelUP.WithLabelValues().Set(1.0)
return nil
}

Expand Down Expand Up @@ -206,6 +216,7 @@ func (t *TunnelChecker) updateNetworkStatus(ifaceName string) error {
txCmd := fmt.Sprintf("cat /sys/class/net/%s/statistics/tx_bytes", ifaceName)
rxCmd := fmt.Sprintf("cat /sys/class/net/%s/statistics/rx_bytes", ifaceName)
var txBytes, rxBytes uint64 = 0, 0
t.startTime = getCurTimeMs()
cmdOut, err := cmd.Run(txCmd)
if err != nil {
errStr := fmt.Sprintf("Command: %v execution failed with err: %v and stderr : %v", txCmd, err, cmdOut)
Expand Down Expand Up @@ -239,13 +250,14 @@ func (t *TunnelChecker) updateNetworkStatus(ifaceName string) error {
return nil
}

t.tunStatus.TxRate = uint64(((txBytes - t.txBytes) / uint64(timeDelta)) * 8)
t.tunStatus.RxRate = uint64(((rxBytes - t.rxBytes) / uint64(timeDelta)) * 8)
// Multiplied by 8 to convert bytes to bits, Multiplied by 1000 to convert bits per milliseconds to bits per second
// Using float64 during calculation to avoid losing precision during division
t.tunStatus.TxRate = uint64(8 * float64(txBytes-t.txBytes) * 1000 / float64(timeDelta))
t.tunStatus.RxRate = uint64(8 * float64(rxBytes-t.rxBytes) * 1000 / float64(timeDelta))
t.log.Infof("TxRate: %v RxRate: %v", t.tunStatus.TxRate, t.tunStatus.RxRate)
t.log.Infof("Latency :%v\t Packet Loss:%v\t PeerIP:%v", t.tunStatus.Latency, t.tunStatus.PacketLoss, t.tunStatus.PeerIP)
t.txBytes = txBytes
t.rxBytes = rxBytes
t.startTime = getCurTimeMs()
return nil
}

Expand Down
Loading