Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat(nodeadm): enable CDI for 1.32+ #2173

Merged
merged 1 commit into from
Mar 6, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 7 additions & 0 deletions nodeadm/cmd/nodeadm/init/init.go
Original file line number Diff line number Diff line change
Expand Up @@ -149,6 +149,13 @@ func (c *initCmd) Run(log *zap.Logger, opts *cli.GlobalOptions) error {
// Various initializations and verifications of the NodeConfig and
// perform in-place updates when allowed by the user
func enrichConfig(log *zap.Logger, cfg *api.NodeConfig) error {
log.Info("Fetching kubelet version..")
kubeletVersion, err := kubelet.GetKubeletVersion()
if err != nil {
return err
}
cfg.Status.KubeletVersion = kubeletVersion
log.Info("Fetched kubelet version", zap.String("version", kubeletVersion))
Comment on lines +152 to +158
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We were calling kubelet.GetKubeletVersion() in several places, and they all had access to the NodeConfig, so I just cached it in the NodeConfigStatus.

log.Info("Fetching instance details..")
awsConfig, err := config.LoadDefaultConfig(context.TODO(),
config.WithClientLogMode(aws.LogRetries),
Expand Down
2 changes: 2 additions & 0 deletions nodeadm/internal/api/merge_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -119,6 +119,7 @@ discard_unpacked_layers = true
[plugins."io.containerd.grpc.v1.cri"]
sandbox_image = "{{.SandboxImage}}"
enable_cdi = true
[plugins."io.containerd.grpc.v1.cri".registry]
config_path = "/etc/containerd/certs.d:/etc/docker/certs.d"
Expand Down Expand Up @@ -195,6 +196,7 @@ discard_unpacked_layers = false
[plugins."io.containerd.grpc.v1.cri"]
sandbox_image = "{{.SandboxImage}}"
enable_cdi = true
[plugins."io.containerd.grpc.v1.cri".registry]
config_path = "/etc/containerd/certs.d:/etc/docker/certs.d"
Expand Down
9 changes: 5 additions & 4 deletions nodeadm/internal/api/types.go
Original file line number Diff line number Diff line change
Expand Up @@ -36,8 +36,9 @@ type NodeConfigSpec struct {
}

type NodeConfigStatus struct {
Instance InstanceDetails `json:"instance,omitempty"`
Defaults DefaultOptions `json:"default,omitempty"`
Instance InstanceDetails `json:"instance,omitempty"`
Defaults DefaultOptions `json:"default,omitempty"`
KubeletVersion string `json:"kubeletVersion,omitempty"`
}

type InstanceDetails struct {
Expand Down Expand Up @@ -102,9 +103,9 @@ type LocalStorageOptions struct {
type LocalStorageStrategy string

const (
LocalStorageRAID0 LocalStorageStrategy = "RAID0"
LocalStorageRAID0 LocalStorageStrategy = "RAID0"
LocalStorageRAID10 LocalStorageStrategy = "RAID10"
LocalStorageMount LocalStorageStrategy = "Mount"
LocalStorageMount LocalStorageStrategy = "Mount"
)

type Feature string
Expand Down
3 changes: 3 additions & 0 deletions nodeadm/internal/containerd/config.go
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@ import (
"github.com/awslabs/amazon-eks-ami/nodeadm/internal/util"
"github.com/pelletier/go-toml/v2"
"go.uber.org/zap"
"golang.org/x/mod/semver"
)

const ContainerRuntimeEndpoint = "unix:///run/containerd/containerd.sock"
Expand All @@ -25,6 +26,7 @@ var (
)

type containerdTemplateVars struct {
EnableCDI bool
SandboxImage string
RuntimeName string
RuntimeBinaryName string
Expand Down Expand Up @@ -65,6 +67,7 @@ func generateContainerdConfig(cfg *api.NodeConfig) ([]byte, error) {
SandboxImage: cfg.Status.Defaults.SandboxImage,
RuntimeBinaryName: instanceOptions.RuntimeBinaryName,
RuntimeName: instanceOptions.RuntimeName,
EnableCDI: semver.Compare(cfg.Status.KubeletVersion, "v1.32.0") >= 0,
}
var buf bytes.Buffer
if err := containerdConfigTemplate.Execute(&buf, configVars); err != nil {
Expand Down
1 change: 1 addition & 0 deletions nodeadm/internal/containerd/config.template.toml
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@ discard_unpacked_layers = true

[plugins."io.containerd.grpc.v1.cri"]
sandbox_image = "{{.SandboxImage}}"
enable_cdi = {{.EnableCDI}}

[plugins."io.containerd.grpc.v1.cri".registry]
config_path = "/etc/containerd/certs.d:/etc/docker/certs.d"
Expand Down
35 changes: 12 additions & 23 deletions nodeadm/internal/kubelet/config.go
Original file line number Diff line number Diff line change
Expand Up @@ -36,13 +36,9 @@ const (
)

func (k *kubelet) writeKubeletConfig(cfg *api.NodeConfig) error {
kubeletVersion, err := GetKubeletVersion()
if err != nil {
return err
}
// tracking: https://github.com/kubernetes/enhancements/issues/3983
// for enabling drop-in configuration
if semver.Compare(kubeletVersion, "v1.29.0") < 0 {
if semver.Compare(cfg.Status.KubeletVersion, "v1.29.0") < 0 {
return k.writeKubeletConfigToFile(cfg)
} else {
return k.writeKubeletConfigToDir(cfg)
Expand Down Expand Up @@ -211,9 +207,9 @@ func (ksc *kubeletConfig) withNodeIp(cfg *api.NodeConfig, flags map[string]strin
return nil
}

func (ksc *kubeletConfig) withVersionToggles(kubeletVersion string, flags map[string]string) {
func (ksc *kubeletConfig) withVersionToggles(cfg *api.NodeConfig, flags map[string]string) {
// TODO: remove when 1.26 is EOL
if semver.Compare(kubeletVersion, "v1.27.0") < 0 {
if semver.Compare(cfg.Status.KubeletVersion, "v1.27.0") < 0 {
// --container-runtime flag is gone in 1.27+
flags["container-runtime"] = "remote"
// --container-runtime-endpoint moved to kubelet config start from 1.27
Expand All @@ -223,20 +219,20 @@ func (ksc *kubeletConfig) withVersionToggles(kubeletVersion string, flags map[st

// TODO: Remove this during 1.27 EOL
// Enable Feature Gate for KubeletCredentialProviders in versions less than 1.28 since this feature flag was removed in 1.28.
if semver.Compare(kubeletVersion, "v1.28.0") < 0 {
if semver.Compare(cfg.Status.KubeletVersion, "v1.28.0") < 0 {
ksc.FeatureGates["KubeletCredentialProviders"] = true
}

// for K8s versions that suport API Priority & Fairness, increase our API server QPS
// in 1.27, the default is already increased to 50/100, so use the higher defaults
if semver.Compare(kubeletVersion, "v1.22.0") >= 0 && semver.Compare(kubeletVersion, "v1.27.0") < 0 {
if semver.Compare(cfg.Status.KubeletVersion, "v1.22.0") >= 0 && semver.Compare(cfg.Status.KubeletVersion, "v1.27.0") < 0 {
ksc.KubeAPIQPS = ptr.Int(10)
ksc.KubeAPIBurst = ptr.Int(20)
}
}

func (ksc *kubeletConfig) withCloudProvider(kubeletVersion string, cfg *api.NodeConfig, flags map[string]string) {
if semver.Compare(kubeletVersion, "v1.26.0") >= 0 {
func (ksc *kubeletConfig) withCloudProvider(cfg *api.NodeConfig, flags map[string]string) {
if semver.Compare(cfg.Status.KubeletVersion, "v1.26.0") >= 0 {
// ref: https://github.com/kubernetes/kubernetes/pull/121367
flags["cloud-provider"] = "external"
// provider ID needs to be specified when the cloud provider is external
Expand Down Expand Up @@ -280,24 +276,17 @@ func (ksc *kubeletConfig) withDefaultReservedResources(cfg *api.NodeConfig) {
//
// TODO: revisit once the minimum supportted version catches up or the container
// runtime is moved to containerd 2.0
func (ksc *kubeletConfig) withPodInfraContainerImage(cfg *api.NodeConfig, kubeletVersion string, flags map[string]string) error {
func (ksc *kubeletConfig) withPodInfraContainerImage(cfg *api.NodeConfig, flags map[string]string) error {
// the flag is a noop on 1.29+, since the behavior was changed to use the
// CRI image pinning behavior and no longer considers the flag value.
// see: https://github.com/kubernetes/kubernetes/pull/118544
if semver.Compare(kubeletVersion, "v1.29.0") < 0 {
if semver.Compare(cfg.Status.KubeletVersion, "v1.29.0") < 0 {
flags["pod-infra-container-image"] = cfg.Status.Defaults.SandboxImage
}
return nil
}

func (k *kubelet) GenerateKubeletConfig(cfg *api.NodeConfig) (*kubeletConfig, error) {
// Get the kubelet/kubernetes version to help conditionally enable features
kubeletVersion, err := GetKubeletVersion()
if err != nil {
return nil, err
}
zap.L().Info("Detected kubelet version", zap.String("version", kubeletVersion))

kubeletConfig := defaultKubeletSubConfig()

if err := kubeletConfig.withFallbackClusterDns(&cfg.Spec.Cluster); err != nil {
Expand All @@ -309,12 +298,12 @@ func (k *kubelet) GenerateKubeletConfig(cfg *api.NodeConfig) (*kubeletConfig, er
if err := kubeletConfig.withNodeIp(cfg, k.flags); err != nil {
return nil, err
}
if err := kubeletConfig.withPodInfraContainerImage(cfg, kubeletVersion, k.flags); err != nil {
if err := kubeletConfig.withPodInfraContainerImage(cfg, k.flags); err != nil {
return nil, err
}

kubeletConfig.withVersionToggles(kubeletVersion, k.flags)
kubeletConfig.withCloudProvider(kubeletVersion, cfg, k.flags)
kubeletConfig.withVersionToggles(cfg, k.flags)
kubeletConfig.withCloudProvider(cfg, k.flags)
kubeletConfig.withDefaultReservedResources(cfg)

return &kubeletConfig, nil
Expand Down
24 changes: 20 additions & 4 deletions nodeadm/internal/kubelet/config_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,12 @@ func TestKubeletCredentialProvidersFeatureFlag(t *testing.T) {

for _, test := range tests {
kubetConfig := defaultKubeletSubConfig()
kubetConfig.withVersionToggles(test.kubeletVersion, make(map[string]string))
nodeConfig := api.NodeConfig{
Status: api.NodeConfigStatus{
KubeletVersion: test.kubeletVersion,
},
}
kubetConfig.withVersionToggles(&nodeConfig, make(map[string]string))
kubeletCredentialProviders, present := kubetConfig.FeatureGates["KubeletCredentialProviders"]
if test.expectedValue == nil && present {
t.Errorf("KubeletCredentialProviders shouldn't be set for versions %s", test.kubeletVersion)
Expand All @@ -44,7 +49,12 @@ func TestContainerRuntime(t *testing.T) {
for _, test := range tests {
kubeletAruments := make(map[string]string)
kubetConfig := defaultKubeletSubConfig()
kubetConfig.withVersionToggles(test.kubeletVersion, kubeletAruments)
nodeConfig := api.NodeConfig{
Status: api.NodeConfigStatus{
KubeletVersion: test.kubeletVersion,
},
}
kubetConfig.withVersionToggles(&nodeConfig, kubeletAruments)
containerRuntime, present := kubeletAruments["container-runtime"]
if test.expectedContainerRuntime == nil {
if present {
Expand Down Expand Up @@ -78,7 +88,12 @@ func TestKubeAPILimits(t *testing.T) {

for _, test := range tests {
kubetConfig := defaultKubeletSubConfig()
kubetConfig.withVersionToggles(test.kubeletVersion, make(map[string]string))
nodeConfig := api.NodeConfig{
Status: api.NodeConfigStatus{
KubeletVersion: test.kubeletVersion,
},
}
kubetConfig.withVersionToggles(&nodeConfig, make(map[string]string))
assert.Equal(t, test.expectedKubeAPIQS, kubetConfig.KubeAPIQPS)
assert.Equal(t, test.expectedKubeAPIBurst, kubetConfig.KubeAPIBurst)
}
Expand Down Expand Up @@ -108,7 +123,8 @@ func TestProviderID(t *testing.T) {
for _, test := range tests {
kubeletAruments := make(map[string]string)
kubetConfig := defaultKubeletSubConfig()
kubetConfig.withCloudProvider(test.kubeletVersion, &nodeConfig, kubeletAruments)
nodeConfig.Status.KubeletVersion = test.kubeletVersion
kubetConfig.withCloudProvider(&nodeConfig, kubeletAruments)
assert.Equal(t, test.expectedCloudProvider, kubeletAruments["cloud-provider"])
if kubeletAruments["cloud-provider"] == "external" {
assert.Equal(t, *kubetConfig.ProviderID, providerId)
Expand Down
6 changes: 1 addition & 5 deletions nodeadm/internal/kubelet/image-credential-provider.go
Original file line number Diff line number Diff line change
Expand Up @@ -64,11 +64,7 @@ func generateImageCredentialProviderConfig(cfg *api.NodeConfig, ecrCredentialPro
templateVars := imageCredentialProviderTemplateVars{
EcrProviderName: filepath.Base(ecrCredentialProviderBinPath),
}
kubeletVersion, err := GetKubeletVersion()
if err != nil {
return nil, err
}
if semver.Compare(kubeletVersion, "v1.27.0") < 0 {
if semver.Compare(cfg.Status.KubeletVersion, "v1.27.0") < 0 {
templateVars.ConfigApiVersion = "kubelet.config.k8s.io/v1alpha1"
templateVars.ProviderApiVersion = "credentialprovider.kubelet.k8s.io/v1alpha1"
} else {
Expand Down
4 changes: 4 additions & 0 deletions nodeadm/internal/kubelet/version.go
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,8 @@ import (
"os"
"os/exec"
"regexp"

"go.uber.org/zap"
)

func GetKubeletVersion() (string, error) {
Expand All @@ -20,10 +22,12 @@ const kubeletVersionFile = "/etc/eks/kubelet-version.txt"

func GetKubeletVersionRaw() ([]byte, error) {
if _, err := os.Stat(kubeletVersionFile); errors.Is(err, os.ErrNotExist) {
zap.L().Info("Reading kubelet version from executable")
return exec.Command("kubelet", "--version").Output()
} else if err != nil {
return nil, err
}
zap.L().Info("Reading kubelet version from file", zap.String("path", kubeletVersionFile))
return os.ReadFile(kubeletVersionFile)
}

Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
root = '/var/lib/containerd'
state = '/run/containerd'
version = 2

[grpc]
address = '/run/foo/foo.sock'

[plugins]
[plugins.'io.containerd.grpc.v1.cri']
enable_cdi = false
sandbox_image = 'localhost/kubernetes/pause'

[plugins.'io.containerd.grpc.v1.cri'.cni]
bin_dir = '/opt/cni/bin'
conf_dir = '/etc/cni/net.d'

[plugins.'io.containerd.grpc.v1.cri'.containerd]
default_runtime_name = 'runc'
discard_unpacked_layers = false

[plugins.'io.containerd.grpc.v1.cri'.containerd.runtimes]
[plugins.'io.containerd.grpc.v1.cri'.containerd.runtimes.runc]
base_runtime_spec = '/etc/containerd/base-runtime-spec.json'
runtime_type = 'io.containerd.runc.v2'

[plugins.'io.containerd.grpc.v1.cri'.containerd.runtimes.runc.options]
BinaryName = '/usr/sbin/runc'
SystemdCgroup = true

[plugins.'io.containerd.grpc.v1.cri'.registry]
config_path = '/etc/containerd/certs.d:/etc/docker/certs.d'
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@ address = '/run/foo/foo.sock'

[plugins]
[plugins.'io.containerd.grpc.v1.cri']
enable_cdi = true
sandbox_image = 'localhost/kubernetes/pause'

[plugins.'io.containerd.grpc.v1.cri'.cni]
Expand Down
6 changes: 5 additions & 1 deletion nodeadm/test/e2e/cases/containerd-config/run.sh
Original file line number Diff line number Diff line change
Expand Up @@ -7,9 +7,13 @@ set -o pipefail
source /helpers.sh

mock::aws
mock::kubelet 1.27.0
wait::dbus-ready

mock::kubelet 1.31.0
nodeadm init --skip run --config-source file://config.yaml
assert::files-equal /etc/containerd/config.toml expected-containerd-config-pre-1.32.toml

# enable_cdi defaults to true in 1.32+
mock::kubelet 1.32.0
nodeadm init --skip run --config-source file://config.yaml
assert::files-equal /etc/containerd/config.toml expected-containerd-config.toml
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@ address = '/run/foo/foo.sock'

[plugins]
[plugins.'io.containerd.grpc.v1.cri']
enable_cdi = false
sandbox_image = 'localhost/kubernetes/pause'

[plugins.'io.containerd.grpc.v1.cri'.cni]
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@ address = '/run/foo/foo.sock'

[plugins]
[plugins.'io.containerd.grpc.v1.cri']
enable_cdi = false
sandbox_image = 'localhost/kubernetes/pause'

[plugins.'io.containerd.grpc.v1.cri'.cni]
Expand Down
Loading