Merge 15160646fd into a30945312a

2025-03-21 15:04:12 +08:00 · 2025-03-21 15:04:12 +08:00 · cd1c2769d9
commit cd1c2769d9
parent a30945312a 15160646fd
2 changed files with 239 additions and 206 deletions
--- a/api/core/agent/fc_agent_runner.py
+++ b/api/core/agent/fc_agent_runner.py
@ -32,7 +32,9 @@ logger = logging.getLogger(__name__)


 class FunctionCallAgentRunner(BaseAgentRunner):
-    def run(self, message: Message, query: str, **kwargs: Any) -> Generator[LLMResultChunk, None, None]:
+    def run(
+        self, message: Message, query: str, **kwargs: Any
+    ) -> Union[Generator[LLMResultChunk, None, None], LLMResult]:
        """
        Run FunctionCall agent application
        """
@ -72,55 +74,98 @@ class FunctionCallAgentRunner(BaseAgentRunner):

        model_instance = self.model_instance

-        while function_call_state and iteration_step <= max_iteration_steps:
-            function_call_state = False
+        final_prompt_messages = None
+        final_system_fingerprint = None

-            if iteration_step == max_iteration_steps:
-                # the last iteration, remove all tools
-                prompt_messages_tools = []
+        def response_generator() -> Generator[LLMResultChunk, None, None]:
+            nonlocal \
+                function_call_state, \
+                function_call_state, \
+                iteration_step, \
+                prompt_messages_tools, \
+                final_answer, \
+                final_prompt_messages, \
+                final_system_fingerprint

-            message_file_ids: list[str] = []
-            agent_thought = self.create_agent_thought(
-                message_id=message.id, message="", tool_name="", tool_input="", messages_ids=message_file_ids
-            )
+            while function_call_state and iteration_step <= max_iteration_steps:
+                function_call_state = False

-            # recalc llm max tokens
-            prompt_messages = self._organize_prompt_messages()
-            self.recalc_llm_max_tokens(self.model_config, prompt_messages)
-            # invoke model
-            chunks: Union[Generator[LLMResultChunk, None, None], LLMResult] = model_instance.invoke_llm(
-                prompt_messages=prompt_messages,
-                model_parameters=app_generate_entity.model_conf.parameters,
-                tools=prompt_messages_tools,
-                stop=app_generate_entity.model_conf.stop,
-                stream=self.stream_tool_call,
-                user=self.user_id,
-                callbacks=[],
-            )
+                if iteration_step == max_iteration_steps:
+                    # the last iteration, remove all tools
+                    prompt_messages_tools = []

-            tool_calls: list[tuple[str, str, dict[str, Any]]] = []
+                message_file_ids: list[str] = []
+                agent_thought = self.create_agent_thought(
+                    message_id=message.id, message="", tool_name="", tool_input="", messages_ids=message_file_ids
+                )

-            # save full response
-            response = ""
+                # recalc llm max tokens
+                prompt_messages = self._organize_prompt_messages()
+                self.recalc_llm_max_tokens(self.model_config, prompt_messages)
+                # invoke model
+                chunks: Union[Generator[LLMResultChunk, None, None], LLMResult] = model_instance.invoke_llm(
+                    prompt_messages=prompt_messages,
+                    model_parameters=app_generate_entity.model_conf.parameters,
+                    tools=prompt_messages_tools,
+                    stop=app_generate_entity.model_conf.stop,
+                    stream=self.stream_tool_call,
+                    user=self.user_id,
+                    callbacks=[],
+                )

-            # save tool call names and inputs
-            tool_call_names = ""
-            tool_call_inputs = ""
+                tool_calls: list[tuple[str, str, dict[str, Any]]] = []

-            current_llm_usage = None
+                # save full response
+                response = ""

-            if isinstance(chunks, Generator):
-                is_first_chunk = True
-                for chunk in chunks:
-                    if is_first_chunk:
-                        self.queue_manager.publish(
-                            QueueAgentThoughtEvent(agent_thought_id=agent_thought.id), PublishFrom.APPLICATION_MANAGER
-                        )
-                        is_first_chunk = False
+                # save tool call names and inputs
+                tool_call_names = ""
+                tool_call_inputs = ""
+
+                current_llm_usage = None
+
+                if isinstance(chunks, Generator):
+                    is_first_chunk = True
+                    for chunk in chunks:
+                        if is_first_chunk:
+                            self.queue_manager.publish(
+                                QueueAgentThoughtEvent(agent_thought_id=agent_thought.id),
+                                PublishFrom.APPLICATION_MANAGER,
+                            )
+                            is_first_chunk = False
+                        # check if there is any tool call
+                        if self.check_tool_calls(chunk):
+                            function_call_state = True
+                            tool_calls.extend(self.extract_tool_calls(chunk) or [])
+                            tool_call_names = ";".join([tool_call[1] for tool_call in tool_calls])
+                            try:
+                                tool_call_inputs = json.dumps(
+                                    {tool_call[1]: tool_call[2] for tool_call in tool_calls}, ensure_ascii=False
+                                )
+                            except json.JSONDecodeError:
+                                # ensure ascii to avoid encoding error
+                                tool_call_inputs = json.dumps({tool_call[1]: tool_call[2] for tool_call in tool_calls})
+
+                        final_prompt_messages = chunk.prompt_messages
+                        final_system_fingerprint = chunk.system_fingerprint
+                        if chunk.delta.message and chunk.delta.message.content:
+                            if isinstance(chunk.delta.message.content, list):
+                                for content in chunk.delta.message.content:
+                                    response += content.data
+                            else:
+                                response += str(chunk.delta.message.content)
+
+                        if chunk.delta.usage:
+                            increase_usage(llm_usage, chunk.delta.usage)
+                            current_llm_usage = chunk.delta.usage
+
+                        yield chunk
+                else:
+                    result = chunks
                    # check if there is any tool call
-                    if self.check_tool_calls(chunk):
+                    if self.check_blocking_tool_calls(result):
                        function_call_state = True
-                        tool_calls.extend(self.extract_tool_calls(chunk) or [])
+                        tool_calls.extend(self.extract_blocking_tool_calls(result) or [])
                        tool_call_names = ";".join([tool_call[1] for tool_call in tool_calls])
                        try:
                            tool_call_inputs = json.dumps(
@ -130,189 +175,180 @@ class FunctionCallAgentRunner(BaseAgentRunner):
                            # ensure ascii to avoid encoding error
                            tool_call_inputs = json.dumps({tool_call[1]: tool_call[2] for tool_call in tool_calls})

-                    if chunk.delta.message and chunk.delta.message.content:
-                        if isinstance(chunk.delta.message.content, list):
-                            for content in chunk.delta.message.content:
+                    if result.usage:
+                        increase_usage(llm_usage, result.usage)
+                        current_llm_usage = result.usage
+
+                    final_prompt_messages = result.prompt_messages
+                    final_system_fingerprint = result.system_fingerprint
+                    if result.message and result.message.content:
+                        if isinstance(result.message.content, list):
+                            for content in result.message.content:
                                response += content.data
                        else:
-                            response += str(chunk.delta.message.content)
+                            response += str(result.message.content)

-                    if chunk.delta.usage:
-                        increase_usage(llm_usage, chunk.delta.usage)
-                        current_llm_usage = chunk.delta.usage
+                    if not result.message.content:
+                        result.message.content = ""

-                    yield chunk
-            else:
-                result = chunks
-                # check if there is any tool call
-                if self.check_blocking_tool_calls(result):
-                    function_call_state = True
-                    tool_calls.extend(self.extract_blocking_tool_calls(result) or [])
-                    tool_call_names = ";".join([tool_call[1] for tool_call in tool_calls])
-                    try:
-                        tool_call_inputs = json.dumps(
-                            {tool_call[1]: tool_call[2] for tool_call in tool_calls}, ensure_ascii=False
-                        )
-                    except json.JSONDecodeError:
-                        # ensure ascii to avoid encoding error
-                        tool_call_inputs = json.dumps({tool_call[1]: tool_call[2] for tool_call in tool_calls})
+                    self.queue_manager.publish(
+                        QueueAgentThoughtEvent(agent_thought_id=agent_thought.id), PublishFrom.APPLICATION_MANAGER
+                    )

-                if result.usage:
-                    increase_usage(llm_usage, result.usage)
-                    current_llm_usage = result.usage
-
-                if result.message and result.message.content:
-                    if isinstance(result.message.content, list):
-                        for content in result.message.content:
-                            response += content.data
-                    else:
-                        response += str(result.message.content)
-
-                if not result.message.content:
-                    result.message.content = ""
-
-                self.queue_manager.publish(
-                    QueueAgentThoughtEvent(agent_thought_id=agent_thought.id), PublishFrom.APPLICATION_MANAGER
-                )
-
-                yield LLMResultChunk(
-                    model=model_instance.model,
-                    prompt_messages=result.prompt_messages,
-                    system_fingerprint=result.system_fingerprint,
-                    delta=LLMResultChunkDelta(
-                        index=0,
-                        message=result.message,
-                        usage=result.usage,
-                    ),
-                )
-
-            assistant_message = AssistantPromptMessage(content="", tool_calls=[])
-            if tool_calls:
-                assistant_message.tool_calls = [
-                    AssistantPromptMessage.ToolCall(
-                        id=tool_call[0],
-                        type="function",
-                        function=AssistantPromptMessage.ToolCall.ToolCallFunction(
-                            name=tool_call[1], arguments=json.dumps(tool_call[2], ensure_ascii=False)
+                    yield LLMResultChunk(
+                        model=model_instance.model,
+                        prompt_messages=result.prompt_messages,
+                        system_fingerprint=result.system_fingerprint,
+                        delta=LLMResultChunkDelta(
+                            index=0,
+                            message=result.message,
+                            usage=result.usage,
                        ),
                    )
-                    for tool_call in tool_calls
-                ]
-            else:
-                assistant_message.content = response

-            self._current_thoughts.append(assistant_message)
-
-            # save thought
-            self.save_agent_thought(
-                agent_thought=agent_thought,
-                tool_name=tool_call_names,
-                tool_input=tool_call_inputs,
-                thought=response,
-                tool_invoke_meta=None,
-                observation=None,
-                answer=response,
-                messages_ids=[],
-                llm_usage=current_llm_usage,
-            )
-            self.queue_manager.publish(
-                QueueAgentThoughtEvent(agent_thought_id=agent_thought.id), PublishFrom.APPLICATION_MANAGER
-            )
-
-            final_answer += response + "\n"
-
-            # call tools
-            tool_responses = []
-            for tool_call_id, tool_call_name, tool_call_args in tool_calls:
-                tool_instance = tool_instances.get(tool_call_name)
-                if not tool_instance:
-                    tool_response = {
-                        "tool_call_id": tool_call_id,
-                        "tool_call_name": tool_call_name,
-                        "tool_response": f"there is not a tool named {tool_call_name}",
-                        "meta": ToolInvokeMeta.error_instance(f"there is not a tool named {tool_call_name}").to_dict(),
-                    }
+                assistant_message = AssistantPromptMessage(content="", tool_calls=[])
+                if tool_calls:
+                    assistant_message.tool_calls = [
+                        AssistantPromptMessage.ToolCall(
+                            id=tool_call[0],
+                            type="function",
+                            function=AssistantPromptMessage.ToolCall.ToolCallFunction(
+                                name=tool_call[1], arguments=json.dumps(tool_call[2], ensure_ascii=False)
+                            ),
+                        )
+                        for tool_call in tool_calls
+                    ]
                else:
-                    # invoke tool
-                    tool_invoke_response, message_files, tool_invoke_meta = ToolEngine.agent_invoke(
-                        tool=tool_instance,
-                        tool_parameters=tool_call_args,
-                        user_id=self.user_id,
-                        tenant_id=self.tenant_id,
-                        message=self.message,
-                        invoke_from=self.application_generate_entity.invoke_from,
-                        agent_tool_callback=self.agent_callback,
-                        trace_manager=trace_manager,
-                        app_id=self.application_generate_entity.app_config.app_id,
-                        message_id=self.message.id,
-                        conversation_id=self.conversation.id,
-                    )
-                    # publish files
-                    for message_file_id in message_files:
-                        # publish message file
-                        self.queue_manager.publish(
-                            QueueMessageFileEvent(message_file_id=message_file_id), PublishFrom.APPLICATION_MANAGER
-                        )
-                        # add message file ids
-                        message_file_ids.append(message_file_id)
+                    assistant_message.content = response

-                    tool_response = {
-                        "tool_call_id": tool_call_id,
-                        "tool_call_name": tool_call_name,
-                        "tool_response": tool_invoke_response,
-                        "meta": tool_invoke_meta.to_dict(),
-                    }
+                self._current_thoughts.append(assistant_message)

-                tool_responses.append(tool_response)
-                if tool_response["tool_response"] is not None:
-                    self._current_thoughts.append(
-                        ToolPromptMessage(
-                            content=str(tool_response["tool_response"]),
-                            tool_call_id=tool_call_id,
-                            name=tool_call_name,
-                        )
-                    )
-
-            if len(tool_responses) > 0:
-                # save agent thought
+                # save thought
                self.save_agent_thought(
                    agent_thought=agent_thought,
-                    tool_name="",
-                    tool_input="",
-                    thought="",
-                    tool_invoke_meta={
-                        tool_response["tool_call_name"]: tool_response["meta"] for tool_response in tool_responses
-                    },
-                    observation={
-                        tool_response["tool_call_name"]: tool_response["tool_response"]
-                        for tool_response in tool_responses
-                    },
-                    answer="",
-                    messages_ids=message_file_ids,
+                    tool_name=tool_call_names,
+                    tool_input=tool_call_inputs,
+                    thought=response,
+                    tool_invoke_meta=None,
+                    observation=None,
+                    answer=response,
+                    messages_ids=[],
+                    llm_usage=current_llm_usage,
                )
                self.queue_manager.publish(
                    QueueAgentThoughtEvent(agent_thought_id=agent_thought.id), PublishFrom.APPLICATION_MANAGER
                )

-            # update prompt tool
-            for prompt_tool in prompt_messages_tools:
-                self.update_prompt_message_tool(tool_instances[prompt_tool.name], prompt_tool)
+                final_answer += response

-            iteration_step += 1
+                # call tools
+                tool_responses = []
+                for tool_call_id, tool_call_name, tool_call_args in tool_calls:
+                    tool_instance = tool_instances.get(tool_call_name)
+                    if not tool_instance:
+                        tool_response = {
+                            "tool_call_id": tool_call_id,
+                            "tool_call_name": tool_call_name,
+                            "tool_response": f"there is not a tool named {tool_call_name}",
+                            "meta": ToolInvokeMeta.error_instance(
+                                f"there is not a tool named {tool_call_name}"
+                            ).to_dict(),
+                        }
+                    else:
+                        # invoke tool
+                        tool_invoke_response, message_files, tool_invoke_meta = ToolEngine.agent_invoke(
+                            tool=tool_instance,
+                            tool_parameters=tool_call_args,
+                            user_id=self.user_id,
+                            tenant_id=self.tenant_id,
+                            message=self.message,
+                            invoke_from=self.application_generate_entity.invoke_from,
+                            agent_tool_callback=self.agent_callback,
+                            trace_manager=trace_manager,
+                            app_id=self.application_generate_entity.app_config.app_id,
+                            message_id=self.message.id,
+                            conversation_id=self.conversation.id,
+                        )
+                        # publish files
+                        for message_file_id in message_files:
+                            # publish message file
+                            self.queue_manager.publish(
+                                QueueMessageFileEvent(message_file_id=message_file_id), PublishFrom.APPLICATION_MANAGER
+                            )
+                            # add message file ids
+                            message_file_ids.append(message_file_id)

-        # publish end event
-        self.queue_manager.publish(
-            QueueMessageEndEvent(
-                llm_result=LLMResult(
-                    model=model_instance.model,
-                    prompt_messages=prompt_messages,
-                    message=AssistantPromptMessage(content=final_answer),
-                    usage=llm_usage["usage"] or LLMUsage.empty_usage(),
-                    system_fingerprint="",
-                )
-            ),
-            PublishFrom.APPLICATION_MANAGER,
-        )
+                        tool_response = {
+                            "tool_call_id": tool_call_id,
+                            "tool_call_name": tool_call_name,
+                            "tool_response": tool_invoke_response,
+                            "meta": tool_invoke_meta.to_dict(),
+                        }
+
+                    tool_responses.append(tool_response)
+                    if tool_response["tool_response"] is not None:
+                        self._current_thoughts.append(
+                            ToolPromptMessage(
+                                content=str(tool_response["tool_response"]),
+                                tool_call_id=tool_call_id,
+                                name=tool_call_name,
+                            )
+                        )
+
+                if len(tool_responses) > 0:
+                    # save agent thought
+                    self.save_agent_thought(
+                        agent_thought=agent_thought,
+                        tool_name="",
+                        tool_input="",
+                        thought="",
+                        tool_invoke_meta={
+                            tool_response["tool_call_name"]: tool_response["meta"] for tool_response in tool_responses
+                        },
+                        observation={
+                            tool_response["tool_call_name"]: tool_response["tool_response"]
+                            for tool_response in tool_responses
+                        },
+                        answer="",
+                        messages_ids=message_file_ids,
+                    )
+                    self.queue_manager.publish(
+                        QueueAgentThoughtEvent(agent_thought_id=agent_thought.id), PublishFrom.APPLICATION_MANAGER
+                    )
+
+                # update prompt tool
+                for prompt_tool in prompt_messages_tools:
+                    self.update_prompt_message_tool(tool_instances[prompt_tool.name], prompt_tool)
+
+                iteration_step += 1
+
+            # publish end event
+            self.queue_manager.publish(
+                QueueMessageEndEvent(
+                    llm_result=LLMResult(
+                        model=model_instance.model,
+                        prompt_messages=prompt_messages,
+                        message=AssistantPromptMessage(content=final_answer),
+                        usage=llm_usage["usage"] or LLMUsage.empty_usage(),
+                        system_fingerprint="",
+                    )
+                ),
+                PublishFrom.APPLICATION_MANAGER,
+            )
+
+        chunk_generator = response_generator()
+
+        if app_generate_entity.stream:
+            return chunk_generator
+        else:
+            list(chunk_generator)
+            return LLMResult(
+                model=model_instance.model,
+                prompt_messages=final_prompt_messages or [],
+                message=AssistantPromptMessage(content=final_answer),
+                usage=llm_usage["usage"] or LLMUsage.empty_usage(),
+                system_fingerprint=final_system_fingerprint or "",
+            )

    def check_tool_calls(self, llm_result_chunk: LLMResultChunk) -> bool:
        """
--- a/api/core/app/apps/agent_chat/app_generator.py
+++ b/api/core/app/apps/agent_chat/app_generator.py
@ -82,9 +82,6 @@ class AgentChatAppGenerator(MessageBasedAppGenerator):
        :param invoke_from: invoke from source
        :param stream: is stream
        """
-        if not streaming:
-            raise ValueError("Agent Chat App does not support blocking mode")
-
        if not args.get("query"):
            raise ValueError("query is required")