Skip to content

Commit

Permalink
[core][autoscaler][v1] fix crashes by infeasible strict spread placem…
Browse files Browse the repository at this point in the history
…ent groups (ray-project#50959)

Signed-off-by: Rueian <rueiancsie@gmail.com>
  • Loading branch information
rueian authored Feb 28, 2025
1 parent 544125c commit d08468f
Showing 1 changed file with 9 additions and 5 deletions.
14 changes: 9 additions & 5 deletions python/ray/autoscaler/_private/resource_demand_scheduler.py
Original file line number Diff line number Diff line change
Expand Up @@ -568,7 +568,7 @@ def reserve_and_allocate_spread(
for bundles in strict_spreads:
# Try to pack as many bundles of this group as possible on existing
# nodes. The remaining will be allocated on new nodes.
unfulfilled, node_resources = get_bin_pack_residual(
unfulfilled, updated_node_resources = get_bin_pack_residual(
node_resources, bundles, strict_spread=True
)
max_to_add = self.max_workers + 1 - sum(node_type_counts.values())
Expand All @@ -582,8 +582,6 @@ def reserve_and_allocate_spread(
utilization_scorer=utilization_scorer,
strict_spread=True,
)
_inplace_add(node_type_counts, to_launch)
_inplace_add(to_add, to_launch)
new_node_resources = _node_type_counts_to_node_resources(
self.node_types, to_launch
)
Expand All @@ -592,8 +590,14 @@ def reserve_and_allocate_spread(
unfulfilled, including_reserved = get_bin_pack_residual(
new_node_resources, unfulfilled, strict_spread=True
)
assert not unfulfilled
node_resources += including_reserved
if unfulfilled:
logger.debug(
"Unfulfilled strict spread placement group: {}".format(bundles)
)
continue
_inplace_add(node_type_counts, to_launch)
_inplace_add(to_add, to_launch)
node_resources = updated_node_resources + including_reserved
return to_add, node_resources, node_type_counts

def debug_string(
Expand Down

0 comments on commit d08468f

Please sign in to comment.