fix: correct linewrap think display in generic openai api (#13260)
Signed-off-by: xhe <xw897002528@gmail.com>
This commit is contained in:
parent
7673c36af3
commit
da2ee04fce
@ -1,13 +1,10 @@
|
|||||||
import json
|
|
||||||
from collections.abc import Generator
|
from collections.abc import Generator
|
||||||
from typing import Optional, Union
|
from typing import Optional, Union
|
||||||
|
|
||||||
import requests
|
|
||||||
from yarl import URL
|
from yarl import URL
|
||||||
|
|
||||||
from core.model_runtime.entities.llm_entities import LLMMode, LLMResult, LLMResultChunk, LLMResultChunkDelta
|
from core.model_runtime.entities.llm_entities import LLMMode, LLMResult
|
||||||
from core.model_runtime.entities.message_entities import (
|
from core.model_runtime.entities.message_entities import (
|
||||||
AssistantPromptMessage,
|
|
||||||
PromptMessage,
|
PromptMessage,
|
||||||
PromptMessageTool,
|
PromptMessageTool,
|
||||||
)
|
)
|
||||||
@ -39,208 +36,3 @@ class DeepseekLargeLanguageModel(OAIAPICompatLargeLanguageModel):
|
|||||||
credentials["mode"] = LLMMode.CHAT.value
|
credentials["mode"] = LLMMode.CHAT.value
|
||||||
credentials["function_calling_type"] = "tool_call"
|
credentials["function_calling_type"] = "tool_call"
|
||||||
credentials["stream_function_calling"] = "support"
|
credentials["stream_function_calling"] = "support"
|
||||||
|
|
||||||
def _handle_generate_stream_response(
|
|
||||||
self, model: str, credentials: dict, response: requests.Response, prompt_messages: list[PromptMessage]
|
|
||||||
) -> Generator:
|
|
||||||
"""
|
|
||||||
Handle llm stream response
|
|
||||||
|
|
||||||
:param model: model name
|
|
||||||
:param credentials: model credentials
|
|
||||||
:param response: streamed response
|
|
||||||
:param prompt_messages: prompt messages
|
|
||||||
:return: llm response chunk generator
|
|
||||||
"""
|
|
||||||
full_assistant_content = ""
|
|
||||||
chunk_index = 0
|
|
||||||
is_reasoning_started = False # Add flag to track reasoning state
|
|
||||||
|
|
||||||
def create_final_llm_result_chunk(
|
|
||||||
id: Optional[str], index: int, message: AssistantPromptMessage, finish_reason: str, usage: dict
|
|
||||||
) -> LLMResultChunk:
|
|
||||||
# calculate num tokens
|
|
||||||
prompt_tokens = usage and usage.get("prompt_tokens")
|
|
||||||
if prompt_tokens is None:
|
|
||||||
prompt_tokens = self._num_tokens_from_string(model, prompt_messages[0].content)
|
|
||||||
completion_tokens = usage and usage.get("completion_tokens")
|
|
||||||
if completion_tokens is None:
|
|
||||||
completion_tokens = self._num_tokens_from_string(model, full_assistant_content)
|
|
||||||
|
|
||||||
# transform usage
|
|
||||||
usage = self._calc_response_usage(model, credentials, prompt_tokens, completion_tokens)
|
|
||||||
|
|
||||||
return LLMResultChunk(
|
|
||||||
id=id,
|
|
||||||
model=model,
|
|
||||||
prompt_messages=prompt_messages,
|
|
||||||
delta=LLMResultChunkDelta(index=index, message=message, finish_reason=finish_reason, usage=usage),
|
|
||||||
)
|
|
||||||
|
|
||||||
# delimiter for stream response, need unicode_escape
|
|
||||||
import codecs
|
|
||||||
|
|
||||||
delimiter = credentials.get("stream_mode_delimiter", "\n\n")
|
|
||||||
delimiter = codecs.decode(delimiter, "unicode_escape")
|
|
||||||
|
|
||||||
tools_calls: list[AssistantPromptMessage.ToolCall] = []
|
|
||||||
|
|
||||||
def increase_tool_call(new_tool_calls: list[AssistantPromptMessage.ToolCall]):
|
|
||||||
def get_tool_call(tool_call_id: str):
|
|
||||||
if not tool_call_id:
|
|
||||||
return tools_calls[-1]
|
|
||||||
|
|
||||||
tool_call = next((tool_call for tool_call in tools_calls if tool_call.id == tool_call_id), None)
|
|
||||||
if tool_call is None:
|
|
||||||
tool_call = AssistantPromptMessage.ToolCall(
|
|
||||||
id=tool_call_id,
|
|
||||||
type="function",
|
|
||||||
function=AssistantPromptMessage.ToolCall.ToolCallFunction(name="", arguments=""),
|
|
||||||
)
|
|
||||||
tools_calls.append(tool_call)
|
|
||||||
|
|
||||||
return tool_call
|
|
||||||
|
|
||||||
for new_tool_call in new_tool_calls:
|
|
||||||
# get tool call
|
|
||||||
tool_call = get_tool_call(new_tool_call.function.name)
|
|
||||||
# update tool call
|
|
||||||
if new_tool_call.id:
|
|
||||||
tool_call.id = new_tool_call.id
|
|
||||||
if new_tool_call.type:
|
|
||||||
tool_call.type = new_tool_call.type
|
|
||||||
if new_tool_call.function.name:
|
|
||||||
tool_call.function.name = new_tool_call.function.name
|
|
||||||
if new_tool_call.function.arguments:
|
|
||||||
tool_call.function.arguments += new_tool_call.function.arguments
|
|
||||||
|
|
||||||
finish_reason = None # The default value of finish_reason is None
|
|
||||||
message_id, usage = None, None
|
|
||||||
for chunk in response.iter_lines(decode_unicode=True, delimiter=delimiter):
|
|
||||||
chunk = chunk.strip()
|
|
||||||
if chunk:
|
|
||||||
# ignore sse comments
|
|
||||||
if chunk.startswith(":"):
|
|
||||||
continue
|
|
||||||
decoded_chunk = chunk.strip().removeprefix("data:").lstrip()
|
|
||||||
if decoded_chunk == "[DONE]": # Some provider returns "data: [DONE]"
|
|
||||||
continue
|
|
||||||
|
|
||||||
try:
|
|
||||||
chunk_json: dict = json.loads(decoded_chunk)
|
|
||||||
# stream ended
|
|
||||||
except json.JSONDecodeError as e:
|
|
||||||
yield create_final_llm_result_chunk(
|
|
||||||
id=message_id,
|
|
||||||
index=chunk_index + 1,
|
|
||||||
message=AssistantPromptMessage(content=""),
|
|
||||||
finish_reason="Non-JSON encountered.",
|
|
||||||
usage=usage,
|
|
||||||
)
|
|
||||||
break
|
|
||||||
# handle the error here. for issue #11629
|
|
||||||
if chunk_json.get("error") and chunk_json.get("choices") is None:
|
|
||||||
raise ValueError(chunk_json.get("error"))
|
|
||||||
|
|
||||||
if chunk_json:
|
|
||||||
if u := chunk_json.get("usage"):
|
|
||||||
usage = u
|
|
||||||
if not chunk_json or len(chunk_json["choices"]) == 0:
|
|
||||||
continue
|
|
||||||
|
|
||||||
choice = chunk_json["choices"][0]
|
|
||||||
finish_reason = chunk_json["choices"][0].get("finish_reason")
|
|
||||||
message_id = chunk_json.get("id")
|
|
||||||
chunk_index += 1
|
|
||||||
|
|
||||||
if "delta" in choice:
|
|
||||||
delta = choice["delta"]
|
|
||||||
is_reasoning = delta.get("reasoning_content")
|
|
||||||
delta_content = delta.get("content") or delta.get("reasoning_content")
|
|
||||||
|
|
||||||
assistant_message_tool_calls = None
|
|
||||||
|
|
||||||
if "tool_calls" in delta and credentials.get("function_calling_type", "no_call") == "tool_call":
|
|
||||||
assistant_message_tool_calls = delta.get("tool_calls", None)
|
|
||||||
elif (
|
|
||||||
"function_call" in delta
|
|
||||||
and credentials.get("function_calling_type", "no_call") == "function_call"
|
|
||||||
):
|
|
||||||
assistant_message_tool_calls = [
|
|
||||||
{"id": "tool_call_id", "type": "function", "function": delta.get("function_call", {})}
|
|
||||||
]
|
|
||||||
|
|
||||||
# assistant_message_function_call = delta.delta.function_call
|
|
||||||
|
|
||||||
# extract tool calls from response
|
|
||||||
if assistant_message_tool_calls:
|
|
||||||
tool_calls = self._extract_response_tool_calls(assistant_message_tool_calls)
|
|
||||||
increase_tool_call(tool_calls)
|
|
||||||
|
|
||||||
if delta_content is None or delta_content == "":
|
|
||||||
continue
|
|
||||||
|
|
||||||
# Add markdown quote markers for reasoning content
|
|
||||||
if is_reasoning:
|
|
||||||
if not is_reasoning_started:
|
|
||||||
delta_content = "> 💭 " + delta_content
|
|
||||||
is_reasoning_started = True
|
|
||||||
elif "\n\n" in delta_content:
|
|
||||||
delta_content = delta_content.replace("\n\n", "\n> ")
|
|
||||||
elif "\n" in delta_content:
|
|
||||||
delta_content = delta_content.replace("\n", "\n> ")
|
|
||||||
elif is_reasoning_started:
|
|
||||||
# If we were in reasoning mode but now getting regular content,
|
|
||||||
# add \n\n to close the reasoning block
|
|
||||||
delta_content = "\n\n" + delta_content
|
|
||||||
is_reasoning_started = False
|
|
||||||
|
|
||||||
# transform assistant message to prompt message
|
|
||||||
assistant_prompt_message = AssistantPromptMessage(
|
|
||||||
content=delta_content,
|
|
||||||
)
|
|
||||||
|
|
||||||
# reset tool calls
|
|
||||||
tool_calls = []
|
|
||||||
full_assistant_content += delta_content
|
|
||||||
elif "text" in choice:
|
|
||||||
choice_text = choice.get("text", "")
|
|
||||||
if choice_text == "":
|
|
||||||
continue
|
|
||||||
|
|
||||||
# transform assistant message to prompt message
|
|
||||||
assistant_prompt_message = AssistantPromptMessage(content=choice_text)
|
|
||||||
full_assistant_content += choice_text
|
|
||||||
else:
|
|
||||||
continue
|
|
||||||
|
|
||||||
yield LLMResultChunk(
|
|
||||||
id=message_id,
|
|
||||||
model=model,
|
|
||||||
prompt_messages=prompt_messages,
|
|
||||||
delta=LLMResultChunkDelta(
|
|
||||||
index=chunk_index,
|
|
||||||
message=assistant_prompt_message,
|
|
||||||
),
|
|
||||||
)
|
|
||||||
|
|
||||||
chunk_index += 1
|
|
||||||
|
|
||||||
if tools_calls:
|
|
||||||
yield LLMResultChunk(
|
|
||||||
id=message_id,
|
|
||||||
model=model,
|
|
||||||
prompt_messages=prompt_messages,
|
|
||||||
delta=LLMResultChunkDelta(
|
|
||||||
index=chunk_index,
|
|
||||||
message=AssistantPromptMessage(tool_calls=tools_calls, content=""),
|
|
||||||
),
|
|
||||||
)
|
|
||||||
|
|
||||||
yield create_final_llm_result_chunk(
|
|
||||||
id=message_id,
|
|
||||||
index=chunk_index,
|
|
||||||
message=AssistantPromptMessage(content=""),
|
|
||||||
finish_reason=finish_reason,
|
|
||||||
usage=usage,
|
|
||||||
)
|
|
||||||
|
@ -1,5 +1,6 @@
|
|||||||
import json
|
import json
|
||||||
import logging
|
import logging
|
||||||
|
import re
|
||||||
from collections.abc import Generator
|
from collections.abc import Generator
|
||||||
from decimal import Decimal
|
from decimal import Decimal
|
||||||
from typing import Optional, Union, cast
|
from typing import Optional, Union, cast
|
||||||
@ -515,6 +516,8 @@ class OAIAPICompatLargeLanguageModel(_CommonOaiApiCompat, LargeLanguageModel):
|
|||||||
if "delta" in choice:
|
if "delta" in choice:
|
||||||
delta = choice["delta"]
|
delta = choice["delta"]
|
||||||
delta_content = delta.get("content")
|
delta_content = delta.get("content")
|
||||||
|
if not delta_content:
|
||||||
|
delta_content = ""
|
||||||
|
|
||||||
if not is_reasoning_started_tag and "<think>" in delta_content:
|
if not is_reasoning_started_tag and "<think>" in delta_content:
|
||||||
is_reasoning_started_tag = True
|
is_reasoning_started_tag = True
|
||||||
@ -523,20 +526,21 @@ class OAIAPICompatLargeLanguageModel(_CommonOaiApiCompat, LargeLanguageModel):
|
|||||||
delta_content = delta_content.replace("</think>", "") + "\n\n"
|
delta_content = delta_content.replace("</think>", "") + "\n\n"
|
||||||
is_reasoning_started_tag = False
|
is_reasoning_started_tag = False
|
||||||
elif is_reasoning_started_tag:
|
elif is_reasoning_started_tag:
|
||||||
if "\n\n" in delta_content:
|
if "\n" in delta_content:
|
||||||
delta_content = delta_content.replace("\n\n", "\n> ")
|
delta_content = re.sub(r"\n(?!(>|\n))", "\n> ", delta_content)
|
||||||
elif "\n" in delta_content:
|
|
||||||
delta_content = delta_content.replace("\n", "\n> ")
|
|
||||||
|
|
||||||
reasoning_content = delta.get("reasoning_content")
|
reasoning_content = delta.get("reasoning_content")
|
||||||
if reasoning_content:
|
if is_reasoning_started and not reasoning_content and not delta_content:
|
||||||
|
delta_content = ""
|
||||||
|
elif reasoning_content:
|
||||||
if not is_reasoning_started:
|
if not is_reasoning_started:
|
||||||
delta_content = "> 💭 " + reasoning_content
|
delta_content = "> 💭 " + reasoning_content
|
||||||
is_reasoning_started = True
|
is_reasoning_started = True
|
||||||
elif "\n\n" in delta_content:
|
else:
|
||||||
delta_content = reasoning_content.replace("\n\n", "\n> ")
|
delta_content = reasoning_content
|
||||||
elif "\n" in delta_content:
|
|
||||||
delta_content = reasoning_content.replace("\n", "\n> ")
|
if "\n" in delta_content:
|
||||||
|
delta_content = re.sub(r"\n(?!(>|\n))", "\n> ", delta_content)
|
||||||
elif is_reasoning_started:
|
elif is_reasoning_started:
|
||||||
# If we were in reasoning mode but now getting regular content,
|
# If we were in reasoning mode but now getting regular content,
|
||||||
# add \n\n to close the reasoning block
|
# add \n\n to close the reasoning block
|
||||||
|
@ -1,13 +1,9 @@
|
|||||||
import json
|
|
||||||
from collections.abc import Generator
|
from collections.abc import Generator
|
||||||
from typing import Optional, Union
|
from typing import Optional, Union
|
||||||
|
|
||||||
import requests
|
|
||||||
|
|
||||||
from core.model_runtime.entities.common_entities import I18nObject
|
from core.model_runtime.entities.common_entities import I18nObject
|
||||||
from core.model_runtime.entities.llm_entities import LLMMode, LLMResult, LLMResultChunk, LLMResultChunkDelta
|
from core.model_runtime.entities.llm_entities import LLMMode, LLMResult
|
||||||
from core.model_runtime.entities.message_entities import (
|
from core.model_runtime.entities.message_entities import (
|
||||||
AssistantPromptMessage,
|
|
||||||
PromptMessage,
|
PromptMessage,
|
||||||
PromptMessageTool,
|
PromptMessageTool,
|
||||||
)
|
)
|
||||||
@ -96,208 +92,3 @@ class SiliconflowLargeLanguageModel(OAIAPICompatLargeLanguageModel):
|
|||||||
),
|
),
|
||||||
],
|
],
|
||||||
)
|
)
|
||||||
|
|
||||||
def _handle_generate_stream_response(
|
|
||||||
self, model: str, credentials: dict, response: requests.Response, prompt_messages: list[PromptMessage]
|
|
||||||
) -> Generator:
|
|
||||||
"""
|
|
||||||
Handle llm stream response
|
|
||||||
|
|
||||||
:param model: model name
|
|
||||||
:param credentials: model credentials
|
|
||||||
:param response: streamed response
|
|
||||||
:param prompt_messages: prompt messages
|
|
||||||
:return: llm response chunk generator
|
|
||||||
"""
|
|
||||||
full_assistant_content = ""
|
|
||||||
chunk_index = 0
|
|
||||||
is_reasoning_started = False # Add flag to track reasoning state
|
|
||||||
|
|
||||||
def create_final_llm_result_chunk(
|
|
||||||
id: Optional[str], index: int, message: AssistantPromptMessage, finish_reason: str, usage: dict
|
|
||||||
) -> LLMResultChunk:
|
|
||||||
# calculate num tokens
|
|
||||||
prompt_tokens = usage and usage.get("prompt_tokens")
|
|
||||||
if prompt_tokens is None:
|
|
||||||
prompt_tokens = self._num_tokens_from_string(model, prompt_messages[0].content)
|
|
||||||
completion_tokens = usage and usage.get("completion_tokens")
|
|
||||||
if completion_tokens is None:
|
|
||||||
completion_tokens = self._num_tokens_from_string(model, full_assistant_content)
|
|
||||||
|
|
||||||
# transform usage
|
|
||||||
usage = self._calc_response_usage(model, credentials, prompt_tokens, completion_tokens)
|
|
||||||
|
|
||||||
return LLMResultChunk(
|
|
||||||
id=id,
|
|
||||||
model=model,
|
|
||||||
prompt_messages=prompt_messages,
|
|
||||||
delta=LLMResultChunkDelta(index=index, message=message, finish_reason=finish_reason, usage=usage),
|
|
||||||
)
|
|
||||||
|
|
||||||
# delimiter for stream response, need unicode_escape
|
|
||||||
import codecs
|
|
||||||
|
|
||||||
delimiter = credentials.get("stream_mode_delimiter", "\n\n")
|
|
||||||
delimiter = codecs.decode(delimiter, "unicode_escape")
|
|
||||||
|
|
||||||
tools_calls: list[AssistantPromptMessage.ToolCall] = []
|
|
||||||
|
|
||||||
def increase_tool_call(new_tool_calls: list[AssistantPromptMessage.ToolCall]):
|
|
||||||
def get_tool_call(tool_call_id: str):
|
|
||||||
if not tool_call_id:
|
|
||||||
return tools_calls[-1]
|
|
||||||
|
|
||||||
tool_call = next((tool_call for tool_call in tools_calls if tool_call.id == tool_call_id), None)
|
|
||||||
if tool_call is None:
|
|
||||||
tool_call = AssistantPromptMessage.ToolCall(
|
|
||||||
id=tool_call_id,
|
|
||||||
type="function",
|
|
||||||
function=AssistantPromptMessage.ToolCall.ToolCallFunction(name="", arguments=""),
|
|
||||||
)
|
|
||||||
tools_calls.append(tool_call)
|
|
||||||
|
|
||||||
return tool_call
|
|
||||||
|
|
||||||
for new_tool_call in new_tool_calls:
|
|
||||||
# get tool call
|
|
||||||
tool_call = get_tool_call(new_tool_call.function.name)
|
|
||||||
# update tool call
|
|
||||||
if new_tool_call.id:
|
|
||||||
tool_call.id = new_tool_call.id
|
|
||||||
if new_tool_call.type:
|
|
||||||
tool_call.type = new_tool_call.type
|
|
||||||
if new_tool_call.function.name:
|
|
||||||
tool_call.function.name = new_tool_call.function.name
|
|
||||||
if new_tool_call.function.arguments:
|
|
||||||
tool_call.function.arguments += new_tool_call.function.arguments
|
|
||||||
|
|
||||||
finish_reason = None # The default value of finish_reason is None
|
|
||||||
message_id, usage = None, None
|
|
||||||
for chunk in response.iter_lines(decode_unicode=True, delimiter=delimiter):
|
|
||||||
chunk = chunk.strip()
|
|
||||||
if chunk:
|
|
||||||
# ignore sse comments
|
|
||||||
if chunk.startswith(":"):
|
|
||||||
continue
|
|
||||||
decoded_chunk = chunk.strip().removeprefix("data:").lstrip()
|
|
||||||
if decoded_chunk == "[DONE]": # Some provider returns "data: [DONE]"
|
|
||||||
continue
|
|
||||||
|
|
||||||
try:
|
|
||||||
chunk_json: dict = json.loads(decoded_chunk)
|
|
||||||
# stream ended
|
|
||||||
except json.JSONDecodeError as e:
|
|
||||||
yield create_final_llm_result_chunk(
|
|
||||||
id=message_id,
|
|
||||||
index=chunk_index + 1,
|
|
||||||
message=AssistantPromptMessage(content=""),
|
|
||||||
finish_reason="Non-JSON encountered.",
|
|
||||||
usage=usage,
|
|
||||||
)
|
|
||||||
break
|
|
||||||
# handle the error here. for issue #11629
|
|
||||||
if chunk_json.get("error") and chunk_json.get("choices") is None:
|
|
||||||
raise ValueError(chunk_json.get("error"))
|
|
||||||
|
|
||||||
if chunk_json:
|
|
||||||
if u := chunk_json.get("usage"):
|
|
||||||
usage = u
|
|
||||||
if not chunk_json or len(chunk_json["choices"]) == 0:
|
|
||||||
continue
|
|
||||||
|
|
||||||
choice = chunk_json["choices"][0]
|
|
||||||
finish_reason = chunk_json["choices"][0].get("finish_reason")
|
|
||||||
message_id = chunk_json.get("id")
|
|
||||||
chunk_index += 1
|
|
||||||
|
|
||||||
if "delta" in choice:
|
|
||||||
delta = choice["delta"]
|
|
||||||
delta_content = delta.get("content")
|
|
||||||
|
|
||||||
assistant_message_tool_calls = None
|
|
||||||
|
|
||||||
if "tool_calls" in delta and credentials.get("function_calling_type", "no_call") == "tool_call":
|
|
||||||
assistant_message_tool_calls = delta.get("tool_calls", None)
|
|
||||||
elif (
|
|
||||||
"function_call" in delta
|
|
||||||
and credentials.get("function_calling_type", "no_call") == "function_call"
|
|
||||||
):
|
|
||||||
assistant_message_tool_calls = [
|
|
||||||
{"id": "tool_call_id", "type": "function", "function": delta.get("function_call", {})}
|
|
||||||
]
|
|
||||||
|
|
||||||
# assistant_message_function_call = delta.delta.function_call
|
|
||||||
|
|
||||||
# extract tool calls from response
|
|
||||||
if assistant_message_tool_calls:
|
|
||||||
tool_calls = self._extract_response_tool_calls(assistant_message_tool_calls)
|
|
||||||
increase_tool_call(tool_calls)
|
|
||||||
|
|
||||||
if delta_content is None or delta_content == "":
|
|
||||||
continue
|
|
||||||
|
|
||||||
# Check for think tags
|
|
||||||
if "<think>" in delta_content:
|
|
||||||
is_reasoning_started = True
|
|
||||||
# Remove <think> tag and add markdown quote
|
|
||||||
delta_content = "> 💭 " + delta_content.replace("<think>", "")
|
|
||||||
elif "</think>" in delta_content:
|
|
||||||
# Remove </think> tag and add newlines to end quote block
|
|
||||||
delta_content = delta_content.replace("</think>", "") + "\n\n"
|
|
||||||
is_reasoning_started = False
|
|
||||||
elif is_reasoning_started:
|
|
||||||
# Add quote markers for content within thinking block
|
|
||||||
if "\n\n" in delta_content:
|
|
||||||
delta_content = delta_content.replace("\n\n", "\n> ")
|
|
||||||
elif "\n" in delta_content:
|
|
||||||
delta_content = delta_content.replace("\n", "\n> ")
|
|
||||||
|
|
||||||
# transform assistant message to prompt message
|
|
||||||
assistant_prompt_message = AssistantPromptMessage(
|
|
||||||
content=delta_content,
|
|
||||||
)
|
|
||||||
|
|
||||||
# reset tool calls
|
|
||||||
tool_calls = []
|
|
||||||
full_assistant_content += delta_content
|
|
||||||
elif "text" in choice:
|
|
||||||
choice_text = choice.get("text", "")
|
|
||||||
if choice_text == "":
|
|
||||||
continue
|
|
||||||
|
|
||||||
# transform assistant message to prompt message
|
|
||||||
assistant_prompt_message = AssistantPromptMessage(content=choice_text)
|
|
||||||
full_assistant_content += choice_text
|
|
||||||
else:
|
|
||||||
continue
|
|
||||||
|
|
||||||
yield LLMResultChunk(
|
|
||||||
id=message_id,
|
|
||||||
model=model,
|
|
||||||
prompt_messages=prompt_messages,
|
|
||||||
delta=LLMResultChunkDelta(
|
|
||||||
index=chunk_index,
|
|
||||||
message=assistant_prompt_message,
|
|
||||||
),
|
|
||||||
)
|
|
||||||
|
|
||||||
chunk_index += 1
|
|
||||||
|
|
||||||
if tools_calls:
|
|
||||||
yield LLMResultChunk(
|
|
||||||
id=message_id,
|
|
||||||
model=model,
|
|
||||||
prompt_messages=prompt_messages,
|
|
||||||
delta=LLMResultChunkDelta(
|
|
||||||
index=chunk_index,
|
|
||||||
message=AssistantPromptMessage(tool_calls=tools_calls, content=""),
|
|
||||||
),
|
|
||||||
)
|
|
||||||
|
|
||||||
yield create_final_llm_result_chunk(
|
|
||||||
id=message_id,
|
|
||||||
index=chunk_index,
|
|
||||||
message=AssistantPromptMessage(content=""),
|
|
||||||
finish_reason=finish_reason,
|
|
||||||
usage=usage,
|
|
||||||
)
|
|
||||||
|
@ -1,4 +1,5 @@
|
|||||||
import logging
|
import logging
|
||||||
|
import re
|
||||||
from collections.abc import Generator
|
from collections.abc import Generator
|
||||||
from typing import Optional
|
from typing import Optional
|
||||||
|
|
||||||
@ -251,25 +252,23 @@ class VolcengineMaaSLargeLanguageModel(LargeLanguageModel):
|
|||||||
for chunk in chunks:
|
for chunk in chunks:
|
||||||
content = ""
|
content = ""
|
||||||
if chunk.choices:
|
if chunk.choices:
|
||||||
if hasattr(chunk.choices[0].delta, "reasoning_content"):
|
delta = chunk.choices[0].delta
|
||||||
delta_content = ""
|
if is_reasoning_started and not hasattr(delta, "reasoning_content") and not delta.content:
|
||||||
|
content = ""
|
||||||
|
elif hasattr(delta, "reasoning_content"):
|
||||||
if not is_reasoning_started:
|
if not is_reasoning_started:
|
||||||
is_reasoning_started = True
|
is_reasoning_started = True
|
||||||
delta_content = "> 💭 " + chunk.choices[0].delta.reasoning_content
|
content = "> 💭 " + delta.reasoning_content
|
||||||
else:
|
else:
|
||||||
delta_content = chunk.choices[0].delta.reasoning_content
|
content = delta.reasoning_content
|
||||||
|
|
||||||
if "\n\n" in delta_content:
|
if "\n" in content:
|
||||||
delta_content = delta_content.replace("\n\n", "\n> ")
|
content = re.sub(r"\n(?!(>|\n))", "\n> ", content)
|
||||||
elif "\n" in delta_content:
|
|
||||||
delta_content = delta_content.replace("\n", "\n> ")
|
|
||||||
|
|
||||||
content = delta_content
|
|
||||||
elif is_reasoning_started:
|
elif is_reasoning_started:
|
||||||
content = "\n\n" + chunk.choices[0].delta.content
|
content = "\n\n" + delta.content
|
||||||
is_reasoning_started = False
|
is_reasoning_started = False
|
||||||
else:
|
else:
|
||||||
content = chunk.choices[0].delta.content
|
content = delta.content
|
||||||
|
|
||||||
yield LLMResultChunk(
|
yield LLMResultChunk(
|
||||||
model=model,
|
model=model,
|
||||||
|
Loading…
Reference in New Issue
Block a user