diff --git a/chart/README.md b/chart/README.md index 89885d22c9..7a6e94cbae 100644 --- a/chart/README.md +++ b/chart/README.md @@ -289,12 +289,13 @@ For more details like types or options, you can refer to **Settings Reference** | defaultSettings.engineReplicaTimeout | In seconds. The setting specifies the timeout between the engine and replica(s), and the value should be between 8 and 30 seconds. The default value is 8 seconds. | | defaultSettings.failedBackupTTL | In minutes. This setting determines how long Longhorn will keep the backup resource that was failed. Set to 0 to disable the auto-deletion. | | defaultSettings.fastReplicaRebuildEnabled | This feature supports the fast replica rebuilding. It relies on the checksum of snapshot disk files, so setting the snapshot-data-integrity to **enable** or **fast-check** is a prerequisite. | -| defaultSettings.guaranteedInstanceManagerCPU | This integer value indicates how many percentages of the total allocatable CPU on each node will be reserved for each instance manager Pod. You can leave it with the default value, which is 12%. | +| defaultSettings.guaranteedInstanceManagerCPU | This integer value indicates how many percentages of the total allocatable CPU on each node will be reserved for each instance manager Pod for v1 data engine. You can leave it with the default value, which is 12%. | | defaultSettings.kubernetesClusterAutoscalerEnabled | Enabling this setting will notify Longhorn that the cluster is using Kubernetes Cluster Autoscaler. | | defaultSettings.logLevel | The log level Panic, Fatal, Error, Warn, Info, Debug, Trace used in longhorn manager. Default to Info. | | defaultSettings.nodeDownPodDeletionPolicy | Defines the Longhorn action when a Volume is stuck with a StatefulSet/Deployment Pod on a node that is down. | | defaultSettings.nodeDrainPolicy | Define the policy to use when a node with the last healthy replica of a volume is drained. | | defaultSettings.offlineReplicaRebuilding | This setting allows users to enable the offline replica rebuilding for volumes using v2 data engine. | +| defaultSettings.v2DataEngineGuaranteedInstanceManagerCPU | This integer value indicates how many millicpus of CPU on each node will be reserved for each instance manager Pod for v2 data engine. You can leave it with the default value, which is 1250 millicpus. | | defaultSettings.orphanAutoDeletion | This setting allows Longhorn to delete the orphan resource and its corresponding orphaned data automatically like stale replicas. Orphan resources on down or unknown nodes will not be cleaned up automatically. | | defaultSettings.priorityClass | priorityClass for Longhorn system-managed components This setting can help prevent Longhorn components from being evicted under Node Pressure. Notice that this will be applied to Longhorn user-deployed components by default if there are no priority class values set yet, such as `longhornManager.priorityClass`. | | defaultSettings.recurringFailedJobsHistoryLimit | This setting specifies how many failed backup or snapshot job histories should be retained. History will not be retained if the value is 0. | diff --git a/chart/questions.yaml b/chart/questions.yaml index d556163313..61330a621f 100644 --- a/chart/questions.yaml +++ b/chart/questions.yaml @@ -541,8 +541,8 @@ questions: min: 0 default: 300 - variable: defaultSettings.guaranteedInstanceManagerCPU - label: Guaranteed Instance Manager CPU - description: "This integer value indicates how many percentages of the total allocatable CPU on each node will be reserved for each instance manager Pod. You can leave it with the default value, which is 12%." + label: Guaranteed Instance Manager CPU for V1 Data Engine + description: "This integer value indicates how many percentages of the total allocatable CPU on each node will be reserved for each instance manager Pod for v1 data engine. You can leave it with the default value, which is 12%." group: "Longhorn Default Settings" type: int min: 0 @@ -861,3 +861,11 @@ questions: - "rke1" - "rke2" - "k3s" + - variable: defaultSettings.v2DataEngineGuaranteedInstanceManagerCPU + label: Guaranteed Instance Manager CPU for V2 Data Engine + description: "This integer value indicates how many millicpus on each node will be reserved for each instance manager Pod for v2 data engine. By default, the SPDK target daemon within an instance manager Pod utilizes 1 CPU core. Ensuring a minimum CPU usage is essential for sustaining engine and replica stability, especially during periods of high node workload." + group: "Longhorn Default Settings" + type: int + min: 0 + max: 2000 + default: 1250 \ No newline at end of file diff --git a/chart/templates/default-setting.yaml b/chart/templates/default-setting.yaml index 44996b4df2..674a21d251 100644 --- a/chart/templates/default-setting.yaml +++ b/chart/templates/default-setting.yaml @@ -215,3 +215,6 @@ data: {{- if not (kindIs "invalid" .Values.defaultSettings.disableSnapshotPurge) }} disable-snapshot-purge: {{ .Values.defaultSettings.disableSnapshotPurge }} {{- end }} + {{- if not (kindIs "invalid" .Values.defaultSettings.v2DataEngineGuaranteedInstanceManagerCPU) }} + v2-data-engine-guaranteed-instance-manager-cpu: {{ .Values.defaultSettings.v2DataEngineGuaranteedInstanceManagerCPU }} + {{- end }} \ No newline at end of file diff --git a/chart/values.yaml b/chart/values.yaml index 683ae168a1..617126d10d 100644 --- a/chart/values.yaml +++ b/chart/values.yaml @@ -290,7 +290,7 @@ defaultSettings: # -- This interval in seconds determines how long Longhorn will wait before re-downloading the backing image file # when all disk files of this backing image become failed or unknown. backingImageRecoveryWaitInterval: ~ - # -- This integer value indicates how many percentages of the total allocatable CPU on each node will be reserved for each instance manager Pod. + # -- This integer value indicates how many percent of the total allocatable CPU on each node will be reserved for each instance manager Pod for v1 data engine. # You can leave it with the default value, which is 12%. guaranteedInstanceManagerCPU: ~ # -- Enabling this setting will notify Longhorn that the cluster is using Kubernetes Cluster Autoscaler. @@ -333,6 +333,10 @@ defaultSettings: v2DataEngine: ~ # -- This setting allows users to enable the offline replica rebuilding for volumes using v2 data engine. offlineReplicaRebuilding: ~ + # -- This integer value indicates how many millicpus of CPU on each node will be reserved for each instance manager Pod for v2 data engine. + # You can leave it with the default value, which is 1250 millicpus. + v2DataEngineGuaranteedInstanceManagerCPU: ~ + # -- Enabling this setting will notify Longhorn that the cluster is using Kubernetes Cluster Autoscaler. # -- Allow Scheduling Empty Node Selector Volumes To Any Node allowEmptyNodeSelectorVolume: ~ # -- Allow Scheduling Empty Disk Selector Volumes To Any Disk diff --git a/deploy/upgrade_responder_server/chart-values.yaml b/deploy/upgrade_responder_server/chart-values.yaml index 33e283fd9a..6f9f68e389 100644 --- a/deploy/upgrade_responder_server/chart-values.yaml +++ b/deploy/upgrade_responder_server/chart-values.yaml @@ -280,6 +280,9 @@ configMap: "longhornSettingGuaranteedInstanceManagerCpu": { "dataType": "float" }, + "longhornSettingV2DataEngineGuaranteedInstanceManagerCpu": { + "dataType": "float" + }, "longhornSettingRecurringFailedJobsHistoryLimit": { "dataType": "float" }, diff --git a/dev/upgrade-responder/install.sh b/dev/upgrade-responder/install.sh index 1a6e0f1e4e..f9d3170ebe 100755 --- a/dev/upgrade-responder/install.sh +++ b/dev/upgrade-responder/install.sh @@ -297,6 +297,9 @@ configMap: "longhornSettingGuaranteedInstanceManagerCpu": { "dataType": "float" }, + "longhornSettingV2DataEngineGuaranteedInstanceManagerCpu": { + "dataType": "float" + }, "longhornSettingRecurringFailedJobsHistoryLimit": { "dataType": "float" },