Skip to content

Commit

Permalink
PWX-38589: Fix the CPU spike and excessive logging in telemetry pods
Browse files Browse the repository at this point in the history
  • Loading branch information
Max Morton committed Aug 19, 2024
1 parent 303cc37 commit 534f548
Show file tree
Hide file tree
Showing 7 changed files with 41 additions and 10 deletions.
2 changes: 1 addition & 1 deletion deploy/ccm/ccm.properties
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@
"/var/cores/px_info.log",
"/var/cores/px_patch_fs.log"
],
"phonehome_hour_range": 8760,
"phonehome_hour_range": 744,
"phonehome_sent": "/var/logs/phonehome.sent",
"always_scan_range_days": 7,
"max_retry_per_hour": 5,
Expand Down
17 changes: 17 additions & 0 deletions drivers/storage/portworx/component/telemetry.go
Original file line number Diff line number Diff line change
Expand Up @@ -88,6 +88,7 @@ const (
daemonsetFileNameTelemetryPhonehome = "phonehome-cluster.yaml"

configParameterApplianceID = "APPLIANCE_ID"
configParameterApplianceName = "APPLIANCE_NAME"
configParameterComponentSN = "COMPONENT_SN"
configParameterProductVersion = "PRODUCT_VERSION"
configParameterRegisterProxyURL = "REGISTER_PROXY_URL"
Expand Down Expand Up @@ -588,6 +589,7 @@ func (t *telemetry) createCCMGoConfigMapRegisterProxy(
cloudSupportPort, tcpProxyPort, envoyRedirectPort := getCCMCloudSupportPorts(cluster, defaultRegisterPort)
replaceMap := map[string]string{
configParameterApplianceID: cluster.Status.ClusterUID,
configParameterApplianceName: cluster.Status.ClusterName,
configParameterComponentSN: cluster.Name,
configParameterProductVersion: pxutil.GetPortworxVersion(cluster).String(),
configParameterRegisterProxyURL: getArcusRegisterProxyURL(cluster),
Expand Down Expand Up @@ -644,6 +646,7 @@ func (t *telemetry) createCCMGoConfigMapTelemetryPhonehomeProxy(
cloudSupportPort, tcpProxyPort, envoyRedirectPort := getCCMCloudSupportPorts(cluster, defaultPhonehomePort)
replaceMap := map[string]string{
configParameterApplianceID: cluster.Status.ClusterUID,
configParameterApplianceName: cluster.Status.ClusterName,
configParameterProductVersion: pxutil.GetPortworxVersion(cluster).String(),
configParameterRestProxyURL: getArcusRestProxyURL(cluster),
configParameterRestCloudSupportPort: fmt.Sprint(cloudSupportPort),
Expand Down Expand Up @@ -701,6 +704,7 @@ func (t *telemetry) createCCMGoConfigMapCollectorProxyV2(
cloudSupportPort, tcpProxyPort, envoyRedirectPort := getCCMCloudSupportPorts(cluster, defaultCollectorPort)
replaceMap := map[string]string{
configParameterApplianceID: cluster.Status.ClusterUID,
configParameterApplianceName: cluster.Status.ClusterName,
configParameterProductVersion: pxutil.GetPortworxVersion(cluster).String(),
configParameterRestProxyURL: getArcusRestProxyURL(cluster),
configParameterRestCloudSupportPort: fmt.Sprint(cloudSupportPort),
Expand Down Expand Up @@ -830,6 +834,10 @@ func (t *telemetry) createDeploymentTelemetryRegistration(
Name: configParameterApplianceID,
Value: cluster.Status.ClusterUID,
})
container.Env = append(container.Env, v1.EnvVar{
Name: configParameterApplianceName,
Value: cluster.Status.ClusterName,
})
} else if container.Name == containerNameTelemetryProxy {
container.Image = proxyImage
}
Expand Down Expand Up @@ -893,6 +901,15 @@ func (t *telemetry) createDaemonSetTelemetryPhonehome(
container := &daemonset.Spec.Template.Spec.Containers[i]
if container.Name == containerNameLogUploader {
container.Image = logUploaderImage
// add APPLIANCE_ID env var
container.Env = append(container.Env, v1.EnvVar{
Name: configParameterApplianceID,
Value: cluster.Status.ClusterUID,
})
container.Env = append(container.Env, v1.EnvVar{
Name: configParameterApplianceName,
Value: cluster.Status.ClusterName,
})
for j := 0; j < len(container.Ports); j++ {
port := &container.Ports[j]
if port.Name == portNameLogUploaderContainer {
Expand Down
20 changes: 12 additions & 8 deletions drivers/storage/portworx/components_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -15189,7 +15189,8 @@ func TestTelemetryCCMGoEnableAndDisable(t *testing.T) {
},
},
Status: corev1.StorageClusterStatus{
ClusterUID: "test-clusteruid",
ClusterUID: "test-clusteruid",
ClusterName: "test-clustername",
},
}

Expand Down Expand Up @@ -15447,7 +15448,8 @@ func TestTelemetryContainerOrchestratorEnable(t *testing.T) {
},
},
Status: corev1.StorageClusterStatus{
ClusterUID: "test-clusteruid",
ClusterName: "test-clustername",
ClusterUID: "test-clusteruid",
},
}

Expand All @@ -15466,7 +15468,7 @@ func TestTelemetryContainerOrchestratorEnable(t *testing.T) {
deployment := &appsv1.Deployment{}
err = testutil.Get(k8sClient, deployment, component.DeploymentNameTelemetryRegistration, cluster.Namespace)
require.NoError(t, err)
require.Len(t, deployment.Spec.Template.Spec.Containers[0].Env, 2)
require.Len(t, deployment.Spec.Template.Spec.Containers[0].Env, 3)

// Compatible PX & Incompatible Telemetry Images
cluster.Spec.Image = "portworx/image:3.2.0"
Expand All @@ -15483,7 +15485,7 @@ func TestTelemetryContainerOrchestratorEnable(t *testing.T) {
// Validate deployments
err = testutil.Get(k8sClient, deployment, component.DeploymentNameTelemetryRegistration, cluster.Namespace)
require.NoError(t, err)
require.Len(t, deployment.Spec.Template.Spec.Containers[0].Env, 2)
require.Len(t, deployment.Spec.Template.Spec.Containers[0].Env, 3)

// Incompatible PX & compatible Telemetry Images
cluster.Spec.Image = "portworx/image:3.0.0"
Expand All @@ -15500,7 +15502,7 @@ func TestTelemetryContainerOrchestratorEnable(t *testing.T) {
// Validate deployments
err = testutil.Get(k8sClient, deployment, component.DeploymentNameTelemetryRegistration, cluster.Namespace)
require.NoError(t, err)
require.Len(t, deployment.Spec.Template.Spec.Containers[0].Env, 2)
require.Len(t, deployment.Spec.Template.Spec.Containers[0].Env, 3)

// Compatible PX & Telemetry Images
cluster.Spec.Image = "portworx/image:3.2.0"
Expand All @@ -15518,13 +15520,15 @@ func TestTelemetryContainerOrchestratorEnable(t *testing.T) {
// Validate deployments
err = testutil.Get(k8sClient, deployment, component.DeploymentNameTelemetryRegistration, cluster.Namespace)
require.NoError(t, err)
require.Len(t, deployment.Spec.Template.Spec.Containers[0].Env, 3)
require.Len(t, deployment.Spec.Template.Spec.Containers[0].Env, 4)
require.Equal(t, deployment.Spec.Template.Spec.Containers[0].Env[0].Name, "CONFIG")
require.Equal(t, deployment.Spec.Template.Spec.Containers[0].Env[0].Value, "config/config_properties_px.yaml")
require.Equal(t, deployment.Spec.Template.Spec.Containers[0].Env[1].Name, "APPLIANCE_ID")
require.Equal(t, deployment.Spec.Template.Spec.Containers[0].Env[1].Value, "test-clusteruid")
require.Equal(t, deployment.Spec.Template.Spec.Containers[0].Env[2].Name, "REFRESH_TOKEN")
require.Equal(t, deployment.Spec.Template.Spec.Containers[0].Env[2].Value, "")
require.Equal(t, deployment.Spec.Template.Spec.Containers[0].Env[2].Name, "APPLIANCE_NAME")
require.Equal(t, deployment.Spec.Template.Spec.Containers[0].Env[2].Value, "test-clustername")
require.Equal(t, deployment.Spec.Template.Spec.Containers[0].Env[3].Name, "REFRESH_TOKEN")
require.Equal(t, deployment.Spec.Template.Spec.Containers[0].Env[3].Value, "")

// Port shift on OCP
cluster.Annotations[pxutil.AnnotationIsOpenshift] = "true"
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,7 @@ data:
"/var/cores/px_info.log",
"/var/cores/px_patch_fs.log"
],
"phonehome_hour_range": 8760,
"phonehome_hour_range": 744,
"phonehome_sent": "/var/logs/phonehome.sent",
"always_scan_range_days": 7,
"max_retry_per_hour": 5,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,10 @@ spec:
valueFrom:
fieldRef:
fieldPath: spec.nodeName
- name: APPLIANCE_ID
value: test-clusteruid
- name: APPLIANCE_NAME
value: test-clustername
image: docker.io/purestorage/log-upload:1.2.3
imagePullPolicy: Always
name: log-upload-service
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,10 @@ spec:
valueFrom:
fieldRef:
fieldPath: spec.nodeName
- name: APPLIANCE_ID
value: test-clusteruid
- name: APPLIANCE_NAME
value: test-clustername
image: docker.io/purestorage/log-upload:1.2.3
imagePullPolicy: Always
name: log-upload-service
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -50,6 +50,8 @@ spec:
value: config/config_properties_px.yaml
- name: APPLIANCE_ID
value: test-clusteruid
- name: APPLIANCE_NAME
value: test-clustername
image: docker.io/portworx/px-telemetry:4.3.2
imagePullPolicy: Always
name: registration
Expand Down

0 comments on commit 534f548

Please sign in to comment.