Skip to content

Commit

Permalink
feat(nodeadm): enable CDI for 1.32+
Browse files Browse the repository at this point in the history
  • Loading branch information
cartermckinnon committed Mar 5, 2025
1 parent 3d8088a commit 21d670a
Show file tree
Hide file tree
Showing 14 changed files with 94 additions and 37 deletions.
7 changes: 7 additions & 0 deletions nodeadm/cmd/nodeadm/init/init.go
Original file line number Diff line number Diff line change
Expand Up @@ -149,6 +149,13 @@ func (c *initCmd) Run(log *zap.Logger, opts *cli.GlobalOptions) error {
// Various initializations and verifications of the NodeConfig and
// perform in-place updates when allowed by the user
func enrichConfig(log *zap.Logger, cfg *api.NodeConfig) error {
log.Info("Fetching kubelet version..")
kubeletVersion, err := kubelet.GetKubeletVersion()
if err != nil {
return err
}
cfg.Status.KubeletVersion = kubeletVersion
log.Info("Fetched kubelet version", zap.String("version", kubeletVersion))
log.Info("Fetching instance details..")
awsConfig, err := config.LoadDefaultConfig(context.TODO(),
config.WithClientLogMode(aws.LogRetries),
Expand Down
2 changes: 2 additions & 0 deletions nodeadm/internal/api/merge_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -119,6 +119,7 @@ discard_unpacked_layers = true
[plugins."io.containerd.grpc.v1.cri"]
sandbox_image = "{{.SandboxImage}}"
enable_cdi = true
[plugins."io.containerd.grpc.v1.cri".registry]
config_path = "/etc/containerd/certs.d:/etc/docker/certs.d"
Expand Down Expand Up @@ -195,6 +196,7 @@ discard_unpacked_layers = false
[plugins."io.containerd.grpc.v1.cri"]
sandbox_image = "{{.SandboxImage}}"
enable_cdi = true
[plugins."io.containerd.grpc.v1.cri".registry]
config_path = "/etc/containerd/certs.d:/etc/docker/certs.d"
Expand Down
9 changes: 5 additions & 4 deletions nodeadm/internal/api/types.go
Original file line number Diff line number Diff line change
Expand Up @@ -36,8 +36,9 @@ type NodeConfigSpec struct {
}

type NodeConfigStatus struct {
Instance InstanceDetails `json:"instance,omitempty"`
Defaults DefaultOptions `json:"default,omitempty"`
Instance InstanceDetails `json:"instance,omitempty"`
Defaults DefaultOptions `json:"default,omitempty"`
KubeletVersion string `json:"kubeletVersion,omitempty"`
}

type InstanceDetails struct {
Expand Down Expand Up @@ -102,9 +103,9 @@ type LocalStorageOptions struct {
type LocalStorageStrategy string

const (
LocalStorageRAID0 LocalStorageStrategy = "RAID0"
LocalStorageRAID0 LocalStorageStrategy = "RAID0"
LocalStorageRAID10 LocalStorageStrategy = "RAID10"
LocalStorageMount LocalStorageStrategy = "Mount"
LocalStorageMount LocalStorageStrategy = "Mount"
)

type Feature string
Expand Down
3 changes: 3 additions & 0 deletions nodeadm/internal/containerd/config.go
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@ import (
"github.com/awslabs/amazon-eks-ami/nodeadm/internal/util"
"github.com/pelletier/go-toml/v2"
"go.uber.org/zap"
"golang.org/x/mod/semver"
)

const ContainerRuntimeEndpoint = "unix:///run/containerd/containerd.sock"
Expand All @@ -25,6 +26,7 @@ var (
)

type containerdTemplateVars struct {
EnableCDI bool
SandboxImage string
RuntimeName string
RuntimeBinaryName string
Expand Down Expand Up @@ -65,6 +67,7 @@ func generateContainerdConfig(cfg *api.NodeConfig) ([]byte, error) {
SandboxImage: cfg.Status.Defaults.SandboxImage,
RuntimeBinaryName: instanceOptions.RuntimeBinaryName,
RuntimeName: instanceOptions.RuntimeName,
EnableCDI: semver.Compare(cfg.Status.KubeletVersion, "v1.32.0") >= 0,
}
var buf bytes.Buffer
if err := containerdConfigTemplate.Execute(&buf, configVars); err != nil {
Expand Down
1 change: 1 addition & 0 deletions nodeadm/internal/containerd/config.template.toml
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@ discard_unpacked_layers = true

[plugins."io.containerd.grpc.v1.cri"]
sandbox_image = "{{.SandboxImage}}"
enable_cdi = {{.EnableCDI}}

[plugins."io.containerd.grpc.v1.cri".registry]
config_path = "/etc/containerd/certs.d:/etc/docker/certs.d"
Expand Down
35 changes: 12 additions & 23 deletions nodeadm/internal/kubelet/config.go
Original file line number Diff line number Diff line change
Expand Up @@ -36,13 +36,9 @@ const (
)

func (k *kubelet) writeKubeletConfig(cfg *api.NodeConfig) error {
kubeletVersion, err := GetKubeletVersion()
if err != nil {
return err
}
// tracking: https://github.com/kubernetes/enhancements/issues/3983
// for enabling drop-in configuration
if semver.Compare(kubeletVersion, "v1.29.0") < 0 {
if semver.Compare(cfg.Status.KubeletVersion, "v1.29.0") < 0 {
return k.writeKubeletConfigToFile(cfg)
} else {
return k.writeKubeletConfigToDir(cfg)
Expand Down Expand Up @@ -211,9 +207,9 @@ func (ksc *kubeletConfig) withNodeIp(cfg *api.NodeConfig, flags map[string]strin
return nil
}

func (ksc *kubeletConfig) withVersionToggles(kubeletVersion string, flags map[string]string) {
func (ksc *kubeletConfig) withVersionToggles(cfg *api.NodeConfig, flags map[string]string) {
// TODO: remove when 1.26 is EOL
if semver.Compare(kubeletVersion, "v1.27.0") < 0 {
if semver.Compare(cfg.Status.KubeletVersion, "v1.27.0") < 0 {
// --container-runtime flag is gone in 1.27+
flags["container-runtime"] = "remote"
// --container-runtime-endpoint moved to kubelet config start from 1.27
Expand All @@ -223,20 +219,20 @@ func (ksc *kubeletConfig) withVersionToggles(kubeletVersion string, flags map[st

// TODO: Remove this during 1.27 EOL
// Enable Feature Gate for KubeletCredentialProviders in versions less than 1.28 since this feature flag was removed in 1.28.
if semver.Compare(kubeletVersion, "v1.28.0") < 0 {
if semver.Compare(cfg.Status.KubeletVersion, "v1.28.0") < 0 {
ksc.FeatureGates["KubeletCredentialProviders"] = true
}

// for K8s versions that suport API Priority & Fairness, increase our API server QPS
// in 1.27, the default is already increased to 50/100, so use the higher defaults
if semver.Compare(kubeletVersion, "v1.22.0") >= 0 && semver.Compare(kubeletVersion, "v1.27.0") < 0 {
if semver.Compare(cfg.Status.KubeletVersion, "v1.22.0") >= 0 && semver.Compare(cfg.Status.KubeletVersion, "v1.27.0") < 0 {
ksc.KubeAPIQPS = ptr.Int(10)
ksc.KubeAPIBurst = ptr.Int(20)
}
}

func (ksc *kubeletConfig) withCloudProvider(kubeletVersion string, cfg *api.NodeConfig, flags map[string]string) {
if semver.Compare(kubeletVersion, "v1.26.0") >= 0 {
func (ksc *kubeletConfig) withCloudProvider(cfg *api.NodeConfig, flags map[string]string) {
if semver.Compare(cfg.Status.KubeletVersion, "v1.26.0") >= 0 {
// ref: https://github.com/kubernetes/kubernetes/pull/121367
flags["cloud-provider"] = "external"
// provider ID needs to be specified when the cloud provider is external
Expand Down Expand Up @@ -280,24 +276,17 @@ func (ksc *kubeletConfig) withDefaultReservedResources(cfg *api.NodeConfig) {
//
// TODO: revisit once the minimum supportted version catches up or the container
// runtime is moved to containerd 2.0
func (ksc *kubeletConfig) withPodInfraContainerImage(cfg *api.NodeConfig, kubeletVersion string, flags map[string]string) error {
func (ksc *kubeletConfig) withPodInfraContainerImage(cfg *api.NodeConfig, flags map[string]string) error {
// the flag is a noop on 1.29+, since the behavior was changed to use the
// CRI image pinning behavior and no longer considers the flag value.
// see: https://github.com/kubernetes/kubernetes/pull/118544
if semver.Compare(kubeletVersion, "v1.29.0") < 0 {
if semver.Compare(cfg.Status.KubeletVersion, "v1.29.0") < 0 {
flags["pod-infra-container-image"] = cfg.Status.Defaults.SandboxImage
}
return nil
}

func (k *kubelet) GenerateKubeletConfig(cfg *api.NodeConfig) (*kubeletConfig, error) {
// Get the kubelet/kubernetes version to help conditionally enable features
kubeletVersion, err := GetKubeletVersion()
if err != nil {
return nil, err
}
zap.L().Info("Detected kubelet version", zap.String("version", kubeletVersion))

kubeletConfig := defaultKubeletSubConfig()

if err := kubeletConfig.withFallbackClusterDns(&cfg.Spec.Cluster); err != nil {
Expand All @@ -309,12 +298,12 @@ func (k *kubelet) GenerateKubeletConfig(cfg *api.NodeConfig) (*kubeletConfig, er
if err := kubeletConfig.withNodeIp(cfg, k.flags); err != nil {
return nil, err
}
if err := kubeletConfig.withPodInfraContainerImage(cfg, kubeletVersion, k.flags); err != nil {
if err := kubeletConfig.withPodInfraContainerImage(cfg, k.flags); err != nil {
return nil, err
}

kubeletConfig.withVersionToggles(kubeletVersion, k.flags)
kubeletConfig.withCloudProvider(kubeletVersion, cfg, k.flags)
kubeletConfig.withVersionToggles(cfg, k.flags)
kubeletConfig.withCloudProvider(cfg, k.flags)
kubeletConfig.withDefaultReservedResources(cfg)

return &kubeletConfig, nil
Expand Down
24 changes: 20 additions & 4 deletions nodeadm/internal/kubelet/config_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,12 @@ func TestKubeletCredentialProvidersFeatureFlag(t *testing.T) {

for _, test := range tests {
kubetConfig := defaultKubeletSubConfig()
kubetConfig.withVersionToggles(test.kubeletVersion, make(map[string]string))
nodeConfig := api.NodeConfig{
Status: api.NodeConfigStatus{
KubeletVersion: test.kubeletVersion,
},
}
kubetConfig.withVersionToggles(&nodeConfig, make(map[string]string))
kubeletCredentialProviders, present := kubetConfig.FeatureGates["KubeletCredentialProviders"]
if test.expectedValue == nil && present {
t.Errorf("KubeletCredentialProviders shouldn't be set for versions %s", test.kubeletVersion)
Expand All @@ -44,7 +49,12 @@ func TestContainerRuntime(t *testing.T) {
for _, test := range tests {
kubeletAruments := make(map[string]string)
kubetConfig := defaultKubeletSubConfig()
kubetConfig.withVersionToggles(test.kubeletVersion, kubeletAruments)
nodeConfig := api.NodeConfig{
Status: api.NodeConfigStatus{
KubeletVersion: test.kubeletVersion,
},
}
kubetConfig.withVersionToggles(&nodeConfig, kubeletAruments)
containerRuntime, present := kubeletAruments["container-runtime"]
if test.expectedContainerRuntime == nil {
if present {
Expand Down Expand Up @@ -78,7 +88,12 @@ func TestKubeAPILimits(t *testing.T) {

for _, test := range tests {
kubetConfig := defaultKubeletSubConfig()
kubetConfig.withVersionToggles(test.kubeletVersion, make(map[string]string))
nodeConfig := api.NodeConfig{
Status: api.NodeConfigStatus{
KubeletVersion: test.kubeletVersion,
},
}
kubetConfig.withVersionToggles(&nodeConfig, make(map[string]string))
assert.Equal(t, test.expectedKubeAPIQS, kubetConfig.KubeAPIQPS)
assert.Equal(t, test.expectedKubeAPIBurst, kubetConfig.KubeAPIBurst)
}
Expand Down Expand Up @@ -108,7 +123,8 @@ func TestProviderID(t *testing.T) {
for _, test := range tests {
kubeletAruments := make(map[string]string)
kubetConfig := defaultKubeletSubConfig()
kubetConfig.withCloudProvider(test.kubeletVersion, &nodeConfig, kubeletAruments)
nodeConfig.Status.KubeletVersion = test.kubeletVersion
kubetConfig.withCloudProvider(&nodeConfig, kubeletAruments)
assert.Equal(t, test.expectedCloudProvider, kubeletAruments["cloud-provider"])
if kubeletAruments["cloud-provider"] == "external" {
assert.Equal(t, *kubetConfig.ProviderID, providerId)
Expand Down
6 changes: 1 addition & 5 deletions nodeadm/internal/kubelet/image-credential-provider.go
Original file line number Diff line number Diff line change
Expand Up @@ -64,11 +64,7 @@ func generateImageCredentialProviderConfig(cfg *api.NodeConfig, ecrCredentialPro
templateVars := imageCredentialProviderTemplateVars{
EcrProviderName: filepath.Base(ecrCredentialProviderBinPath),
}
kubeletVersion, err := GetKubeletVersion()
if err != nil {
return nil, err
}
if semver.Compare(kubeletVersion, "v1.27.0") < 0 {
if semver.Compare(cfg.Status.KubeletVersion, "v1.27.0") < 0 {
templateVars.ConfigApiVersion = "kubelet.config.k8s.io/v1alpha1"
templateVars.ProviderApiVersion = "credentialprovider.kubelet.k8s.io/v1alpha1"
} else {
Expand Down
4 changes: 4 additions & 0 deletions nodeadm/internal/kubelet/version.go
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,8 @@ import (
"os"
"os/exec"
"regexp"

"go.uber.org/zap"
)

func GetKubeletVersion() (string, error) {
Expand All @@ -20,10 +22,12 @@ const kubeletVersionFile = "/etc/eks/kubelet-version.txt"

func GetKubeletVersionRaw() ([]byte, error) {
if _, err := os.Stat(kubeletVersionFile); errors.Is(err, os.ErrNotExist) {
zap.L().Info("Reading kubelet version from executable")
return exec.Command("kubelet", "--version").Output()
} else if err != nil {
return nil, err
}
zap.L().Info("Reading kubelet version from file", zap.String("path", kubeletVersionFile))
return os.ReadFile(kubeletVersionFile)
}

Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
root = '/var/lib/containerd'
state = '/run/containerd'
version = 2

[grpc]
address = '/run/foo/foo.sock'

[plugins]
[plugins.'io.containerd.grpc.v1.cri']
enable_cdi = false
sandbox_image = 'localhost/kubernetes/pause'

[plugins.'io.containerd.grpc.v1.cri'.cni]
bin_dir = '/opt/cni/bin'
conf_dir = '/etc/cni/net.d'

[plugins.'io.containerd.grpc.v1.cri'.containerd]
default_runtime_name = 'runc'
discard_unpacked_layers = false

[plugins.'io.containerd.grpc.v1.cri'.containerd.runtimes]
[plugins.'io.containerd.grpc.v1.cri'.containerd.runtimes.runc]
base_runtime_spec = '/etc/containerd/base-runtime-spec.json'
runtime_type = 'io.containerd.runc.v2'

[plugins.'io.containerd.grpc.v1.cri'.containerd.runtimes.runc.options]
BinaryName = '/usr/sbin/runc'
SystemdCgroup = true

[plugins.'io.containerd.grpc.v1.cri'.registry]
config_path = '/etc/containerd/certs.d:/etc/docker/certs.d'
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@ address = '/run/foo/foo.sock'

[plugins]
[plugins.'io.containerd.grpc.v1.cri']
enable_cdi = true
sandbox_image = 'localhost/kubernetes/pause'

[plugins.'io.containerd.grpc.v1.cri'.cni]
Expand Down
6 changes: 5 additions & 1 deletion nodeadm/test/e2e/cases/containerd-config/run.sh
Original file line number Diff line number Diff line change
Expand Up @@ -7,9 +7,13 @@ set -o pipefail
source /helpers.sh

mock::aws
mock::kubelet 1.27.0
wait::dbus-ready

mock::kubelet 1.31.0
nodeadm init --skip run --config-source file://config.yaml
assert::files-equal /etc/containerd/config.toml expected-containerd-config-pre-1.32.toml

# enable_cdi defaults to true in 1.32+
mock::kubelet 1.32.0
nodeadm init --skip run --config-source file://config.yaml
assert::files-equal /etc/containerd/config.toml expected-containerd-config.toml
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@ address = '/run/foo/foo.sock'

[plugins]
[plugins.'io.containerd.grpc.v1.cri']
enable_cdi = false
sandbox_image = 'localhost/kubernetes/pause'

[plugins.'io.containerd.grpc.v1.cri'.cni]
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@ address = '/run/foo/foo.sock'

[plugins]
[plugins.'io.containerd.grpc.v1.cri']
enable_cdi = false
sandbox_image = 'localhost/kubernetes/pause'

[plugins.'io.containerd.grpc.v1.cri'.cni]
Expand Down

0 comments on commit 21d670a

Please sign in to comment.