From 95fae0438ddc2fd1a7f49718888148a5409fd955 Mon Sep 17 00:00:00 2001 From: jyong <718720800@qq.com> Date: Thu, 9 May 2024 18:24:51 +0800 Subject: [PATCH] add tool resource --- api/core/tools/entities/tool_entities.py | 3 +- .../builtin/bing/tools/bing_web_search.py | 4 +- .../builtin/google/tools/google_search.py | 71 +++++++++++++------ .../builtin/google/tools/google_search.yaml | 5 ++ api/core/tools/tool/tool.py | 24 ++++++- api/core/tools/tool_engine.py | 2 +- .../workflow/nodes/llm/knowledge_resource.py | 16 +++++ api/core/workflow/nodes/llm/llm_node.py | 3 + api/core/workflow/nodes/tool/tool_node.py | 33 +++++++-- 9 files changed, 129 insertions(+), 32 deletions(-) create mode 100644 api/core/workflow/nodes/llm/knowledge_resource.py diff --git a/api/core/tools/entities/tool_entities.py b/api/core/tools/entities/tool_entities.py index fad91baf83..6f57e76923 100644 --- a/api/core/tools/entities/tool_entities.py +++ b/api/core/tools/entities/tool_entities.py @@ -77,12 +77,13 @@ class ToolInvokeMessage(BaseModel): LINK = "link" BLOB = "blob" IMAGE_LINK = "image_link" + CHUNK = "chunk" type: MessageType = MessageType.TEXT """ plain text, image url or link url """ - message: Union[str, bytes] = None + message: Union[str, bytes, list] = None meta: dict[str, Any] = None save_as: str = '' diff --git a/api/core/tools/provider/builtin/bing/tools/bing_web_search.py b/api/core/tools/provider/builtin/bing/tools/bing_web_search.py index d133f38082..592499cd23 100644 --- a/api/core/tools/provider/builtin/bing/tools/bing_web_search.py +++ b/api/core/tools/provider/builtin/bing/tools/bing_web_search.py @@ -40,7 +40,7 @@ class BingSearchTool(BuiltinTool): news = response['news']['value'] if 'news' in response else [] computation = response['computation']['value'] if 'computation' in response else None - if result_type == 'link': + if result_type == 'link' or result_type == 'chunk': results = [] if search_results: for result in search_results: @@ -72,7 +72,7 @@ class BingSearchTool(BuiltinTool): )) return results - else: + if result_type == 'text' or result_type == 'chunk': # construct text text = '' if search_results: diff --git a/api/core/tools/provider/builtin/google/tools/google_search.py b/api/core/tools/provider/builtin/google/tools/google_search.py index 0b1978ad3e..0ab9d88f1c 100644 --- a/api/core/tools/provider/builtin/google/tools/google_search.py +++ b/api/core/tools/provider/builtin/google/tools/google_search.py @@ -6,6 +6,7 @@ from serpapi import GoogleSearch from core.tools.entities.tool_entities import ToolInvokeMessage from core.tools.tool.builtin_tool import BuiltinTool +from core.workflow.nodes.llm.knowledge_resource import KnowledgeResource class HiddenPrints: @@ -35,7 +36,7 @@ class SerpAPI: self.serpapi_api_key = api_key self.search_engine = GoogleSearch - def run(self, query: str, **kwargs: Any) -> str: + def run(self, query: str, **kwargs: Any) -> str | list[KnowledgeResource]: """Run query through SerpAPI and parse result.""" typ = kwargs.get("result_type", "text") return self._process_response(self.results(query), typ=typ) @@ -64,63 +65,79 @@ class SerpAPI: return params @staticmethod - def _process_response(res: dict, typ: str) -> str: + def _process_response(res: dict, typ: str) -> str | list[KnowledgeResource]: """Process response from SerpAPI.""" if "error" in res.keys(): raise ValueError(f"Got error from SerpAPI: {res['error']}") - - if typ == "text": - toret = "" + chunks = [] + toret = "" + if typ == "text" or typ == "chunk": if "answer_box" in res.keys() and type(res["answer_box"]) == list: res["answer_box"] = res["answer_box"][0] + "\n" if "answer_box" in res.keys() and "answer" in res["answer_box"].keys(): toret += res["answer_box"]["answer"] + "\n" + chunks.append(KnowledgeResource(content=res["answer_box"]["answer"], title=res["answer_box"]["answer"])) if "answer_box" in res.keys() and "snippet" in res["answer_box"].keys(): toret += res["answer_box"]["snippet"] + "\n" + chunks.append( + KnowledgeResource(content=res["answer_box"]["snippet"], title=res["answer_box"]["snippet"])) if ( - "answer_box" in res.keys() - and "snippet_highlighted_words" in res["answer_box"].keys() + "answer_box" in res.keys() + and "snippet_highlighted_words" in res["answer_box"].keys() ): for item in res["answer_box"]["snippet_highlighted_words"]: toret += item + "\n" + chunks.append(KnowledgeResource(content=item, title=item)) if ( - "sports_results" in res.keys() - and "game_spotlight" in res["sports_results"].keys() + "sports_results" in res.keys() + and "game_spotlight" in res["sports_results"].keys() ): toret += res["sports_results"]["game_spotlight"] + "\n" + chunks.append(KnowledgeResource(content=res["sports_results"]["game_spotlight"], + title=res["sports_results"]["game_spotlight"])) if ( - "shopping_results" in res.keys() - and "title" in res["shopping_results"][0].keys() + "shopping_results" in res.keys() + and "title" in res["shopping_results"][0].keys() ): toret += res["shopping_results"][:3] + "\n" + chunks.append(KnowledgeResource(content=res["shopping_results"][:3], title=res["shopping_results"][:3])) if ( - "knowledge_graph" in res.keys() - and "description" in res["knowledge_graph"].keys() + "knowledge_graph" in res.keys() + and "description" in res["knowledge_graph"].keys() ): toret = res["knowledge_graph"]["description"] + "\n" + chunks.append(KnowledgeResource(content=res["knowledge_graph"]["description"], + title=res["knowledge_graph"]["description"])) if "snippet" in res["organic_results"][0].keys(): for item in res["organic_results"]: toret += "content: " + item["snippet"] + "\n" + "link: " + item["link"] + "\n" + chunks.append(KnowledgeResource(content=item["snippet"], title=item["title"], url=item["link"])) if ( - "images_results" in res.keys() - and "thumbnail" in res["images_results"][0].keys() + "images_results" in res.keys() + and "thumbnail" in res["images_results"][0].keys() ): thumbnails = [item["thumbnail"] for item in res["images_results"][:10]] toret = thumbnails + chunks.append(KnowledgeResource(content=thumbnails, title=thumbnails)) if toret == "": toret = "No good search result found" - elif typ == "link": + if typ == "link" or typ == "chunk": if "knowledge_graph" in res.keys() and "title" in res["knowledge_graph"].keys() \ and "description_link" in res["knowledge_graph"].keys(): toret = res["knowledge_graph"]["description_link"] + chunks.append(KnowledgeResource(content=res["knowledge_graph"]["description"], + title=res["knowledge_graph"]["title"], + url=res["knowledge_graph"]["knowledge_graph_search_link"]) + ) elif "knowledge_graph" in res.keys() and "see_results_about" in res["knowledge_graph"].keys() \ - and len(res["knowledge_graph"]["see_results_about"]) > 0: + and len(res["knowledge_graph"]["see_results_about"]) > 0: see_result_about = res["knowledge_graph"]["see_results_about"] toret = "" for item in see_result_about: if "name" not in item.keys() or "link" not in item.keys(): continue toret += f"[{item['name']}]({item['link']})\n" + chunks.append(KnowledgeResource(content=f"[{item['name']}]({item['link']})\n", title=item['name'], url=item['link'])) elif "organic_results" in res.keys() and len(res["organic_results"]) > 0: organic_results = res["organic_results"] toret = "" @@ -128,6 +145,7 @@ class SerpAPI: if "title" not in item.keys() or "link" not in item.keys(): continue toret += f"[{item['title']}]({item['link']})\n" + chunks.append(KnowledgeResource(content=f"[{item['title']}]({item['link']})\n", title=item['title'], url=item['link'])) elif "related_questions" in res.keys() and len(res["related_questions"]) > 0: related_questions = res["related_questions"] toret = "" @@ -135,6 +153,7 @@ class SerpAPI: if "question" not in item.keys() or "link" not in item.keys(): continue toret += f"[{item['question']}]({item['link']})\n" + chunks.append(KnowledgeResource(content=f"[{item['question']}]({item['link']})\n", title=item['title'], url=item['link'])) elif "related_searches" in res.keys() and len(res["related_searches"]) > 0: related_searches = res["related_searches"] toret = "" @@ -142,15 +161,19 @@ class SerpAPI: if "query" not in item.keys() or "link" not in item.keys(): continue toret += f"[{item['query']}]({item['link']})\n" + chunks.append(KnowledgeResource(content=f"[{item['query']}]({item['link']})\n", title=item['query'], url=item['link'])) else: toret = "No good search result found" + if typ == "chunk": + return chunks return toret + class GoogleSearchTool(BuiltinTool): - def _invoke(self, + def _invoke(self, user_id: str, - tool_parameters: dict[str, Any], - ) -> Union[ToolInvokeMessage, list[ToolInvokeMessage]]: + tool_parameters: dict[str, Any], + ) -> Union[ToolInvokeMessage, list[ToolInvokeMessage]]: """ invoke tools """ @@ -160,5 +183,9 @@ class GoogleSearchTool(BuiltinTool): result = SerpAPI(api_key).run(query, result_type=result_type) if result_type == 'text': return self.create_text_message(text=result) - return self.create_link_message(link=result) - \ No newline at end of file + elif result_type == 'link': + return self.create_link_message(link=result) + elif result_type == 'chunk': + return self.create_chunk_message(chunks=result) + else: + raise ValueError(f"Invalid result type: {result_type}") diff --git a/api/core/tools/provider/builtin/google/tools/google_search.yaml b/api/core/tools/provider/builtin/google/tools/google_search.yaml index 9dc5023992..787522b4a5 100644 --- a/api/core/tools/provider/builtin/google/tools/google_search.yaml +++ b/api/core/tools/provider/builtin/google/tools/google_search.yaml @@ -39,6 +39,11 @@ parameters: en_US: link zh_Hans: 链接 pt_BR: link + - value: chunk + label: + en_US: chunk + zh_Hans: 分段 + pt_BR: chunk default: link label: en_US: Result type diff --git a/api/core/tools/tool/tool.py b/api/core/tools/tool/tool.py index 03aa0623fe..0c106d424e 100644 --- a/api/core/tools/tool/tool.py +++ b/api/core/tools/tool/tool.py @@ -1,6 +1,6 @@ from abc import ABC, abstractmethod from enum import Enum -from typing import Any, Optional, Union +from typing import Any, Optional, Union, List from pydantic import BaseModel, validator @@ -15,6 +15,7 @@ from core.tools.entities.tool_entities import ( ToolRuntimeVariablePool, ) from core.tools.tool_file_manager import ToolFileManager +from core.workflow.nodes.llm.knowledge_resource import KnowledgeResource class Tool(BaseModel, ABC): @@ -337,6 +338,8 @@ class Tool(BaseModel, ABC): create an image message :param image: the url of the image + :param save_as: the save_as + :return: the image message """ return ToolInvokeMessage(type=ToolInvokeMessage.MessageType.IMAGE, @@ -348,6 +351,7 @@ class Tool(BaseModel, ABC): create a link message :param link: the url of the link + :param save_as: the save_as :return: the link message """ return ToolInvokeMessage(type=ToolInvokeMessage.MessageType.LINK, @@ -359,21 +363,37 @@ class Tool(BaseModel, ABC): create a text message :param text: the text + :param save_as: the save_as :return: the text message """ return ToolInvokeMessage(type=ToolInvokeMessage.MessageType.TEXT, message=text, save_as=save_as ) + + def create_chunk_message(self, chunks: List[KnowledgeResource], save_as: str = '') -> ToolInvokeMessage: + """ + create a chunk message + + :param chunks: the chunks + :param save_as: the save_as + :return: the text message + """ + return ToolInvokeMessage(type=ToolInvokeMessage.MessageType.CHUNK, + message=chunks, + save_as=save_as + ) def create_blob_message(self, blob: bytes, meta: dict = None, save_as: str = '') -> ToolInvokeMessage: """ create a blob message :param blob: the blob + :param meta: the meta + :param save_as: the save_as :return: the blob message """ - return ToolInvokeMessage(type=ToolInvokeMessage.MessageType.BLOB, + return ToolInvokeMessage(type=ToolInvokeMessage.MessageType.BLOB, message=blob, meta=meta, save_as=save_as ) \ No newline at end of file diff --git a/api/core/tools/tool_engine.py b/api/core/tools/tool_engine.py index f96d7940bd..2b56d07fbd 100644 --- a/api/core/tools/tool_engine.py +++ b/api/core/tools/tool_engine.py @@ -131,7 +131,7 @@ class ToolEngine: # hit the callback handler workflow_tool_callback.on_tool_end( - tool_name=tool.identity.name, + tool_name=tool.identity.name, tool_inputs=tool_parameters, tool_outputs=response ) diff --git a/api/core/workflow/nodes/llm/knowledge_resource.py b/api/core/workflow/nodes/llm/knowledge_resource.py new file mode 100644 index 0000000000..7ecc97c39b --- /dev/null +++ b/api/core/workflow/nodes/llm/knowledge_resource.py @@ -0,0 +1,16 @@ +from typing import Any, Optional + +from pydantic import BaseModel + + +class KnowledgeResource(BaseModel): + """ + Knowledge Resource. + """ + content: str + title: str + url: Optional[str] = None + icon: Optional[str] = None + score: Optional[float] = None + metadata: Optional[dict[str, Any]] = None + diff --git a/api/core/workflow/nodes/llm/llm_node.py b/api/core/workflow/nodes/llm/llm_node.py index c8b7f279ab..c1f29b272f 100644 --- a/api/core/workflow/nodes/llm/llm_node.py +++ b/api/core/workflow/nodes/llm/llm_node.py @@ -311,6 +311,9 @@ class LLMNode(BaseNode): } return source + if ('metadata' in context_dict and '_source' in context_dict['metadata'] + and context_dict['metadata']['_source'] == 'tool'): + return context_dict return None diff --git a/api/core/workflow/nodes/tool/tool_node.py b/api/core/workflow/nodes/tool/tool_node.py index d183dbe17b..2d6a982f28 100644 --- a/api/core/workflow/nodes/tool/tool_node.py +++ b/api/core/workflow/nodes/tool/tool_node.py @@ -70,13 +70,14 @@ class ToolNode(BaseNode): ) # convert tool messages - plain_text, files = self._convert_tool_messages(messages) + plain_text, files, chunks = self._convert_tool_messages(messages) return NodeRunResult( status=WorkflowNodeExecutionStatus.SUCCEEDED, outputs={ 'text': plain_text, - 'files': files + 'files': files, + 'chunks': chunks }, metadata={ NodeRunMetadataKey.TOOL_INFO: tool_info @@ -111,7 +112,7 @@ class ToolNode(BaseNode): return template_parser.format(inputs) - def _convert_tool_messages(self, messages: list[ToolInvokeMessage]) -> tuple[str, list[FileVar]]: + def _convert_tool_messages(self, messages: list[ToolInvokeMessage]) -> tuple[str, list[FileVar], list]: """ Convert ToolInvokeMessages into tuple[plain_text, files] """ @@ -125,8 +126,9 @@ class ToolNode(BaseNode): # extract plain text and files files = self._extract_tool_response_binary(messages) plain_text = self._extract_tool_response_text(messages) + chunks = self._extract_tool_response_chunk(messages) - return plain_text, files + return plain_text, files, chunks def _extract_tool_response_binary(self, tool_response: list[ToolInvokeMessage]) -> list[FileVar]: """ @@ -180,6 +182,29 @@ class ToolNode(BaseNode): for message in tool_response ]) + def _extract_tool_response_chunk(self, tool_response: list[ToolInvokeMessage]) -> list: + """ + Extract tool response text + """ + all_chunks = [] + node_data = cast(ToolNodeData, self.node_data) + icon = ToolManager.get_tool_icon( + tenant_id=self.tenant_id, + provider_type=node_data.provider_type, + provider_id=node_data.provider_id + ) + for message in tool_response: + if message.type == ToolInvokeMessage.MessageType.CHUNK: + for chunk in message.message: + chunk.icon = icon + chunk.metadata = { + '_source': 'tool' + } + all_chunks.append(chunk) + return all_chunks + + + @classmethod def _extract_variable_selector_to_variable_mapping(cls, node_data: ToolNodeData) -> dict[str, list[str]]: """