Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

PWX-38589: Fix the CPU spike and excessive logging in telemetry pods #1652

Closed
wants to merge 2 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion deploy/ccm/ccm.properties
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@
"/var/cores/px_info.log",
"/var/cores/px_patch_fs.log"
],
"phonehome_hour_range": 8760,
"phonehome_hour_range": 744,
Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@jrivera-px / @zoxpx is there some way to flag to the larger org that this is changing?
I have a hard time believing that trying to extract the last year of logs was crucial to anyone's workflow, but it's probably best to double check

"phonehome_sent": "/var/logs/phonehome.sent",
"always_scan_range_days": 7,
"max_retry_per_hour": 5,
Expand Down
17 changes: 17 additions & 0 deletions drivers/storage/portworx/component/telemetry.go
Original file line number Diff line number Diff line change
Expand Up @@ -88,6 +88,7 @@ const (
daemonsetFileNameTelemetryPhonehome = "phonehome-cluster.yaml"

configParameterApplianceID = "APPLIANCE_ID"
configParameterApplianceName = "APPLIANCE_NAME"
configParameterComponentSN = "COMPONENT_SN"
configParameterProductVersion = "PRODUCT_VERSION"
configParameterRegisterProxyURL = "REGISTER_PROXY_URL"
Expand Down Expand Up @@ -588,6 +589,7 @@ func (t *telemetry) createCCMGoConfigMapRegisterProxy(
cloudSupportPort, tcpProxyPort, envoyRedirectPort := getCCMCloudSupportPorts(cluster, defaultRegisterPort)
replaceMap := map[string]string{
configParameterApplianceID: cluster.Status.ClusterUID,
configParameterApplianceName: cluster.Status.ClusterName,
configParameterComponentSN: cluster.Name,
configParameterProductVersion: pxutil.GetPortworxVersion(cluster).String(),
configParameterRegisterProxyURL: getArcusRegisterProxyURL(cluster),
Expand Down Expand Up @@ -644,6 +646,7 @@ func (t *telemetry) createCCMGoConfigMapTelemetryPhonehomeProxy(
cloudSupportPort, tcpProxyPort, envoyRedirectPort := getCCMCloudSupportPorts(cluster, defaultPhonehomePort)
replaceMap := map[string]string{
configParameterApplianceID: cluster.Status.ClusterUID,
configParameterApplianceName: cluster.Status.ClusterName,
configParameterProductVersion: pxutil.GetPortworxVersion(cluster).String(),
configParameterRestProxyURL: getArcusRestProxyURL(cluster),
configParameterRestCloudSupportPort: fmt.Sprint(cloudSupportPort),
Expand Down Expand Up @@ -701,6 +704,7 @@ func (t *telemetry) createCCMGoConfigMapCollectorProxyV2(
cloudSupportPort, tcpProxyPort, envoyRedirectPort := getCCMCloudSupportPorts(cluster, defaultCollectorPort)
replaceMap := map[string]string{
configParameterApplianceID: cluster.Status.ClusterUID,
configParameterApplianceName: cluster.Status.ClusterName,
configParameterProductVersion: pxutil.GetPortworxVersion(cluster).String(),
configParameterRestProxyURL: getArcusRestProxyURL(cluster),
configParameterRestCloudSupportPort: fmt.Sprint(cloudSupportPort),
Expand Down Expand Up @@ -830,6 +834,10 @@ func (t *telemetry) createDeploymentTelemetryRegistration(
Name: configParameterApplianceID,
Value: cluster.Status.ClusterUID,
})
container.Env = append(container.Env, v1.EnvVar{
Name: configParameterApplianceName,
Value: cluster.Status.ClusterName,
})
} else if container.Name == containerNameTelemetryProxy {
container.Image = proxyImage
}
Expand Down Expand Up @@ -893,6 +901,15 @@ func (t *telemetry) createDaemonSetTelemetryPhonehome(
container := &daemonset.Spec.Template.Spec.Containers[i]
if container.Name == containerNameLogUploader {
container.Image = logUploaderImage
// add APPLIANCE_ID env var
container.Env = append(container.Env, v1.EnvVar{
Name: configParameterApplianceID,
Value: cluster.Status.ClusterUID,
})
container.Env = append(container.Env, v1.EnvVar{
Name: configParameterApplianceName,
Value: cluster.Status.ClusterName,
})
for j := 0; j < len(container.Ports); j++ {
port := &container.Ports[j]
if port.Name == portNameLogUploaderContainer {
Expand Down
23 changes: 14 additions & 9 deletions drivers/storage/portworx/components_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -15189,7 +15189,8 @@ func TestTelemetryCCMGoEnableAndDisable(t *testing.T) {
},
},
Status: corev1.StorageClusterStatus{
ClusterUID: "test-clusteruid",
ClusterUID: "test-clusteruid",
ClusterName: "test-clustername",
},
}

Expand Down Expand Up @@ -15447,7 +15448,8 @@ func TestTelemetryContainerOrchestratorEnable(t *testing.T) {
},
},
Status: corev1.StorageClusterStatus{
ClusterUID: "test-clusteruid",
ClusterName: "test-clustername",
ClusterUID: "test-clusteruid",
},
}

Expand All @@ -15466,7 +15468,7 @@ func TestTelemetryContainerOrchestratorEnable(t *testing.T) {
deployment := &appsv1.Deployment{}
err = testutil.Get(k8sClient, deployment, component.DeploymentNameTelemetryRegistration, cluster.Namespace)
require.NoError(t, err)
require.Len(t, deployment.Spec.Template.Spec.Containers[0].Env, 2)
require.Len(t, deployment.Spec.Template.Spec.Containers[0].Env, 3)

// Compatible PX & Incompatible Telemetry Images
cluster.Spec.Image = "portworx/image:3.2.0"
Expand All @@ -15483,7 +15485,7 @@ func TestTelemetryContainerOrchestratorEnable(t *testing.T) {
// Validate deployments
err = testutil.Get(k8sClient, deployment, component.DeploymentNameTelemetryRegistration, cluster.Namespace)
require.NoError(t, err)
require.Len(t, deployment.Spec.Template.Spec.Containers[0].Env, 2)
require.Len(t, deployment.Spec.Template.Spec.Containers[0].Env, 3)

// Incompatible PX & compatible Telemetry Images
cluster.Spec.Image = "portworx/image:3.0.0"
Expand All @@ -15500,7 +15502,7 @@ func TestTelemetryContainerOrchestratorEnable(t *testing.T) {
// Validate deployments
err = testutil.Get(k8sClient, deployment, component.DeploymentNameTelemetryRegistration, cluster.Namespace)
require.NoError(t, err)
require.Len(t, deployment.Spec.Template.Spec.Containers[0].Env, 2)
require.Len(t, deployment.Spec.Template.Spec.Containers[0].Env, 3)

// Compatible PX & Telemetry Images
cluster.Spec.Image = "portworx/image:3.2.0"
Expand All @@ -15518,13 +15520,15 @@ func TestTelemetryContainerOrchestratorEnable(t *testing.T) {
// Validate deployments
err = testutil.Get(k8sClient, deployment, component.DeploymentNameTelemetryRegistration, cluster.Namespace)
require.NoError(t, err)
require.Len(t, deployment.Spec.Template.Spec.Containers[0].Env, 3)
require.Len(t, deployment.Spec.Template.Spec.Containers[0].Env, 4)
require.Equal(t, deployment.Spec.Template.Spec.Containers[0].Env[0].Name, "CONFIG")
require.Equal(t, deployment.Spec.Template.Spec.Containers[0].Env[0].Value, "config/config_properties_px.yaml")
require.Equal(t, deployment.Spec.Template.Spec.Containers[0].Env[1].Name, "APPLIANCE_ID")
require.Equal(t, deployment.Spec.Template.Spec.Containers[0].Env[1].Value, "test-clusteruid")
require.Equal(t, deployment.Spec.Template.Spec.Containers[0].Env[2].Name, "REFRESH_TOKEN")
require.Equal(t, deployment.Spec.Template.Spec.Containers[0].Env[2].Value, "")
require.Equal(t, deployment.Spec.Template.Spec.Containers[0].Env[2].Name, "APPLIANCE_NAME")
require.Equal(t, deployment.Spec.Template.Spec.Containers[0].Env[2].Value, "test-clustername")
require.Equal(t, deployment.Spec.Template.Spec.Containers[0].Env[3].Name, "REFRESH_TOKEN")
require.Equal(t, deployment.Spec.Template.Spec.Containers[0].Env[3].Value, "")

// Port shift on OCP
cluster.Annotations[pxutil.AnnotationIsOpenshift] = "true"
Expand Down Expand Up @@ -16636,7 +16640,8 @@ func TestTelemetryCCMGoRestartPhonehome(t *testing.T) {
StartPort: &startPort,
},
Status: corev1.StorageClusterStatus{
ClusterUID: "test-clusteruid",
ClusterUID: "test-clusteruid",
ClusterName: "test-clustername",
},
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,7 @@ data:
"/var/cores/px_info.log",
"/var/cores/px_patch_fs.log"
],
"phonehome_hour_range": 8760,
"phonehome_hour_range": 744,
"phonehome_sent": "/var/logs/phonehome.sent",
"always_scan_range_days": 7,
"max_retry_per_hour": 5,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,10 @@ spec:
valueFrom:
fieldRef:
fieldPath: spec.nodeName
- name: APPLIANCE_ID
value: test-clusteruid
- name: APPLIANCE_NAME
value: test-clustername
image: docker.io/purestorage/log-upload:1.2.3
imagePullPolicy: Always
name: log-upload-service
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,10 @@ spec:
valueFrom:
fieldRef:
fieldPath: spec.nodeName
- name: APPLIANCE_ID
value: test-clusteruid
- name: APPLIANCE_NAME
value: test-clustername
image: docker.io/purestorage/log-upload:1.2.3
imagePullPolicy: Always
name: log-upload-service
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -50,6 +50,8 @@ spec:
value: config/config_properties_px.yaml
- name: APPLIANCE_ID
value: test-clusteruid
- name: APPLIANCE_NAME
value: test-clustername
image: docker.io/portworx/px-telemetry:4.3.2
imagePullPolicy: Always
name: registration
Expand Down
Loading