From abf5f5a8dabefbd5bdaa1102aa339b647964cafc Mon Sep 17 00:00:00 2001 From: Alvaro Moran Date: Fri, 13 Sep 2024 13:46:27 +0000 Subject: [PATCH] fix(Jetstream Pt): batch returned in prefill initialized to None This is required when there are no more tokens generated after prefill. --- .../text_generation_server/jetstream_pt_support/generator.py | 1 + 1 file changed, 1 insertion(+) diff --git a/text-generation-inference/server/text_generation_server/jetstream_pt_support/generator.py b/text-generation-inference/server/text_generation_server/jetstream_pt_support/generator.py index 59653e40..b6e24051 100644 --- a/text-generation-inference/server/text_generation_server/jetstream_pt_support/generator.py +++ b/text-generation-inference/server/text_generation_server/jetstream_pt_support/generator.py @@ -476,6 +476,7 @@ def prefill(self, batch: Batch) -> Tuple[List[Generation], CachedBatch]: self.slots.append(slot) len_active_slots += 1 + batch = None if len_active_slots > 0: # Whatever initial batch these requests came from, we always return all pending requests in a single batch request_ids = [slot.request_id for slot in self.slots if slot.state == Slot.State.READY]