在压测中支持反思模型

baidubce · Feb 7, 2025 · 04da975 · 04da975
1 parent 3c3f87a
commit 04da975
Show file tree

Hide file tree

Showing 3 changed files with 28 additions and 5 deletions.
diff --git a/python/qianfan/dataset/stress_test/qianfan_llm_load.py b/python/qianfan/dataset/stress_test/qianfan_llm_load.py
@@ -453,6 +453,7 @@ def _process_responses(
     ) -> _InnerResponseProcessRet:
         last_resp: Optional[QfResponse] = None
         merged_query = ""
+        merged_reasoning_content = ""
         res_choices: Dict[int, Dict[str, Any]] = {}
         first_flag, all_empty = True, True
         clear_history = False
@@ -523,17 +524,25 @@ def _process_responses(
                 index = stream_json.get("index", -1)
                 if "delta" in stream_json:
                     content = stream_json["delta"].get("content", "")
-                    merged_query += content
+                    reasoning_content = stream_json["delta"].get(
+                        "reasoning_content", ""
+                    )
+                    merged_query += content if content is not None else ""
+                    merged_reasoning_content += (
+                        reasoning_content if reasoning_content is not None else ""
+                    )
                     if index not in res_choices:
                         res_choices[index] = {}
                     choice = res_choices[index]
                     choice.update(stream_json)
                     choice["delta"]["content"] = merged_query
+                    choice["delta"]["reasoning_content"] = merged_reasoning_content
                 else:
                     self.exc = Exception("ERROR CODE 结果无法解析")
                     break
-            if len(content) != 0:
-                all_empty = False
+
+        if len(merged_query) != 0:
+            all_empty = False
 
         assert last_resp is not None
         if all_empty and not clear_history:
@@ -675,7 +684,9 @@ def _process_responses(
                 request_meta["output_tokens"] = 0
 
             stream_json = resp["body"]
-            merged_query += stream_json["result"]
+            merged_query += (
+                stream_json["result"] if stream_json["result"] is not None else ""
+            )
             if first_flag:
                 request_meta["first_token_latency"] = resp.statistic[
                     "first_token_latency"

diff --git a/python/qianfan/resources/llm/chat_completion.py b/python/qianfan/resources/llm/chat_completion.py
@@ -2190,6 +2190,7 @@ def create(
             assert isinstance(resp, QfResponse)
             result = Completion.parse_obj(resp.body)
             result.statistic = CompletionStatistic.parse_obj(resp.statistic)
+            result.raw = resp.body
             return result
         else:
             assert isinstance(resp, Iterator)
@@ -2252,6 +2253,7 @@ async def acreate(
             assert isinstance(resp, QfResponse)
             result = Completion.parse_obj(resp.body)
             result.statistic = CompletionStatistic.parse_obj(resp.statistic)
+            result.raw = resp.body
             return result
         else:
             assert isinstance(resp, AsyncIterator)
@@ -2263,6 +2265,7 @@ def _create_completion_stream(
         for r in resp:
             result = CompletionChunk.parse_obj(r.body)
             result.statistic = CompletionStatistic.parse_obj(r.statistic)
+            result.raw = r.body
             yield result
 
     async def _acreate_completion_stream(
@@ -2271,6 +2274,7 @@ async def _acreate_completion_stream(
         async for r in resp:
             result = CompletionChunk.parse_obj(r.body)
             result.statistic = CompletionStatistic.parse_obj(r.statistic)
+            result.raw = r.body
             yield result
 
     def _convert_v2_request_to_v1(self, request: Any) -> Any:

diff --git a/python/qianfan/resources/typing_client.py b/python/qianfan/resources/typing_client.py
@@ -15,7 +15,7 @@
 client typing
 """
 
-from typing import List, Optional
+from typing import Any, List, Optional
 
 from typing_extensions import Literal
 
@@ -46,6 +46,8 @@ class ChatCompletionMessage(BaseModel):
     content: Optional[str] = None
     """The contents of the message."""
 
+    reasoning_content: Optional[str] = None
+
     role: Literal["assistant"]
     """The role of the author of this message."""
 
@@ -128,11 +130,15 @@ class Completion(BaseModel):
 
     statistic: Optional[CompletionStatistic] = None
 
+    raw: Any = None
+
 
 class ChoiceDelta(BaseModel):
     content: Optional[str] = None
     """The contents of the message."""
 
+    reasoning_content: Optional[str] = None
+
     tool_calls: Optional[List[ToolCall]] = None
 
 
@@ -185,3 +191,5 @@ class CompletionChunk(BaseModel):
     statistic: Optional[CompletionStatistic] = None
 
     web_search: Optional[SearchResult] = None
+
+    raw: Any = None