gke: allow customization of autoscaling strategy (#21)

* gke: allow customization of autoscaling strategy * feat: allow customizing autoscaling profile * disable resource limits - unknown behavior * add support for node scale request Signed-off-by: vsoch <vsoch@users.noreply.github.com>
converged-computing · Apr 14, 2024 · b9caea8 · b9caea8
1 parent 160cf28
commit b9caea8
Show file tree

Hide file tree

Showing 3 changed files with 66 additions and 23 deletions.
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -14,6 +14,7 @@ and **Merged pull requests**. Critical items to know are:
 The versions coincide with releases on pip. Only major versions will be released as tags on Github.
 
 ## [0.0.x](https://github.com/converged-computing/kubescaler/tree/main) (0.0.x)
+ - allow customization of autoscaling (0.0.2)
  - ensure we do not add size for node scaling up/down times (0.0.19)
  - do not use the waiter for nodegroup_active it does not work! (0.0.18)
    - support for Google Cloud instance group creation, etc.

diff --git a/kubescaler/scaler/google.py b/kubescaler/scaler/google.py
@@ -40,6 +40,7 @@ def __init__(
         max_memory=32,
         # Initial labels for the default cluster
         labels=None,
+        scaling_profile=0,
         **kwargs,
     ):
         """
@@ -56,6 +57,7 @@ def __init__(
         self.tags = self.tags or ["kubescaler-cluster"]
         self.default_pool = default_pool_name
         self.configuration = None
+        self.scaling_profile = scaling_profile
         self.labels = labels
         self.zone = zone
         self.max_vcpu = max_vcpu
@@ -307,38 +309,54 @@ def delete_nodegroup(self, name=None):
         self.client.delete_node_pool(request=request)
         return self.wait_for_status(2)
 
-    def get_cluster(self, node_pools=None):
+    def get_cluster(self, node_pools=None, scaling_profile=None):
         """
         Get the cluster proto with our defaults
         """
+        if scaling_profile is None:
+            scaling_profile = self.scaling_profile
+        if scaling_profile not in [0, 1, 2]:
+            raise ValueError("Scaling profile must be one of 0,1,2")
+
+        # autoprovisioning node defaults. Note that upgrade settings
+        # default to a surge strategy, max surge 1 and nodes unavailable 2
+        # I tried setting auto_upgrade and auto_repair to False but that
+        # must be the default, they don't show up
+
         # Design our initial cluster!
         # Autoscaling - try optimizing
         # PROFILE_UNSPECIFIED = 0
         # OPTIMIZE_UTILIZATION = 1
         # BALANCED = 2
-        autoscaling_profile = container_v1.ClusterAutoscaling.AutoscalingProfile(1)
+        autoscaling_profile = container_v1.ClusterAutoscaling.AutoscalingProfile(
+            scaling_profile
+        )
 
-        # These are required, you get an error without them.
+        # These are only intended if you want GKE to make new node pools for you
+        # I highly do not recommend this, I've never had this result in desired
+        # behavior.
         # https://cloud.google.com/compute/docs/compute-optimized-machines
-        resource_limits = [
-            container_v1.ResourceLimit(
-                resource_type="cpu",
-                minimum=0,
-                maximum=self.max_vcpu * self.node_count,
-            ),
-            container_v1.ResourceLimit(
-                resource_type="memory",
-                minimum=0,
-                maximum=self.max_memory * self.node_count,
-            ),
-        ]
-
-        # Note that I removed resource_limits, no limits!
+        # resource_limits = [
+        #    container_v1.ResourceLimit(
+        #        resource_type="cpu",
+        #        minimum=0,
+        #        maximum=self.max_vcpu * self.node_count,
+        #    ),
+        #    container_v1.ResourceLimit(
+        #        resource_type="memory",
+        #        minimum=0,
+        #        maximum=self.max_memory * self.node_count,
+        #    ),
+        # ]
+
+        # When autoprovisioning is enabled the cluster explodes into much
+        # larger sizes than you want.
         cluster_autoscaling = container_v1.ClusterAutoscaling(
-            enable_node_autoprovisioning=True,
-            autoprovisioning_locations=[self.zone],
+            enable_node_autoprovisioning=False,
             autoscaling_profile=autoscaling_profile,
-            resource_limits=resource_limits,
+            # These two fields are only for node autoprovisioning
+            # autoprovisioning_locations=[self.location],
+            # resource_limits=resource_limits,
         )
 
         # vertical_pod_autoscaling (google.cloud.container_v1.types.VerticalPodAutoscaling):
@@ -365,6 +383,30 @@ def get_cluster(self, node_pools=None):
         print(cluster)
         return cluster
 
+    @timed
+    def update_cluster(self, size, max_nodes, min_nodes):
+        """
+        Update a cluster. Currently we support the max and min size
+        """
+        autoscaling = container_v1.NodePoolAutoscaling(
+            enabled=True,
+            total_max_node_count=max_nodes,
+            total_min_node_count=min_nodes,
+        )
+        request = container_v1.SetNodePoolAutoscalingRequest(
+            autoscaling=autoscaling,
+            name=f"projects/{self.project}/locations/{self.location}/clusters/{self.name}/nodePools/{self.default_pool}",
+        )
+        print("\n🥣️ cluster node pool update request")
+        print(request)
+
+        response = self.client.set_node_pool_autoscaling(request=request)
+        print(response)
+
+        # Status 2 is running (1 is provisioning)
+        print(f"⏱️   Waiting for {self.cluster_name} to be ready...")
+        return self.wait_for_status(2)
+
     @timed
     def create_cluster(self):
         """
@@ -382,8 +424,8 @@ def create_cluster(self):
         # If you don't set this, your cluster will grow as it pleases.
         autoscaling = container_v1.NodePoolAutoscaling(
             enabled=True,
-            min_node_count=self.min_nodes,
-            max_node_count=self.max_nodes,
+            total_max_node_count=self.max_nodes,
+            total_min_node_count=self.min_nodes,
         )
         node_pool = container_v1.types.NodePool(
             name=self.default_pool,

diff --git a/kubescaler/version.py b/kubescaler/version.py
@@ -3,7 +3,7 @@
 #
 # SPDX-License-Identifier: (MIT)
 
-__version__ = "0.0.19"
+__version__ = "0.0.2"
 AUTHOR = "Vanessa Sochat"
 EMAIL = "vsoch@users.noreply.github.com"
 NAME = "kubescaler"