add tool resource

2024-05-09 18:24:51 +08:00 · 2024-05-09 18:24:51 +08:00 · 95fae0438d
commit 95fae0438d
parent 8137d63000
9 changed files with 129 additions and 32 deletions
--- a/api/core/tools/entities/tool_entities.py
+++ b/api/core/tools/entities/tool_entities.py
@ -77,12 +77,13 @@ class ToolInvokeMessage(BaseModel):
        LINK = "link"
        BLOB = "blob"
        IMAGE_LINK = "image_link"
+        CHUNK = "chunk"

    type: MessageType = MessageType.TEXT
    """
        plain text, image url or link url
    """
-    message: Union[str, bytes] = None
+    message: Union[str, bytes, list] = None
    meta: dict[str, Any] = None
    save_as: str = ''

--- a/api/core/tools/provider/builtin/bing/tools/bing_web_search.py
+++ b/api/core/tools/provider/builtin/bing/tools/bing_web_search.py
@ -40,7 +40,7 @@ class BingSearchTool(BuiltinTool):
        news = response['news']['value'] if 'news' in response else []
        computation = response['computation']['value'] if 'computation' in response else None

-        if result_type == 'link':
+        if result_type == 'link' or result_type == 'chunk':
            results = []
            if search_results:
                for result in search_results:
@ -72,7 +72,7 @@ class BingSearchTool(BuiltinTool):
                    ))
                    
            return results
-        else:
+        if result_type == 'text' or result_type == 'chunk':
            # construct text
            text = ''
            if search_results:
--- a/api/core/tools/provider/builtin/google/tools/google_search.py
+++ b/api/core/tools/provider/builtin/google/tools/google_search.py
@ -6,6 +6,7 @@ from serpapi import GoogleSearch

 from core.tools.entities.tool_entities import ToolInvokeMessage
 from core.tools.tool.builtin_tool import BuiltinTool
+from core.workflow.nodes.llm.knowledge_resource import KnowledgeResource


 class HiddenPrints:
@ -35,7 +36,7 @@ class SerpAPI:
        self.serpapi_api_key = api_key
        self.search_engine = GoogleSearch

-    def run(self, query: str, **kwargs: Any) -> str:
+    def run(self, query: str, **kwargs: Any) -> str | list[KnowledgeResource]:
        """Run query through SerpAPI and parse result."""
        typ = kwargs.get("result_type", "text")
        return self._process_response(self.results(query), typ=typ)
@ -64,63 +65,79 @@ class SerpAPI:
        return params

    @staticmethod
-    def _process_response(res: dict, typ: str) -> str:
+    def _process_response(res: dict, typ: str) -> str | list[KnowledgeResource]:
        """Process response from SerpAPI."""
        if "error" in res.keys():
            raise ValueError(f"Got error from SerpAPI: {res['error']}")
-        
-        if typ == "text":
-            toret = ""
+        chunks = []
+        toret = ""
+        if typ == "text" or typ == "chunk":
            if "answer_box" in res.keys() and type(res["answer_box"]) == list:
                res["answer_box"] = res["answer_box"][0] + "\n"
            if "answer_box" in res.keys() and "answer" in res["answer_box"].keys():
                toret += res["answer_box"]["answer"] + "\n"
+                chunks.append(KnowledgeResource(content=res["answer_box"]["answer"], title=res["answer_box"]["answer"]))
            if "answer_box" in res.keys() and "snippet" in res["answer_box"].keys():
                toret += res["answer_box"]["snippet"] + "\n"
+                chunks.append(
+                    KnowledgeResource(content=res["answer_box"]["snippet"], title=res["answer_box"]["snippet"]))
            if (
-                "answer_box" in res.keys()
-                and "snippet_highlighted_words" in res["answer_box"].keys()
+                    "answer_box" in res.keys()
+                    and "snippet_highlighted_words" in res["answer_box"].keys()
            ):
                for item in res["answer_box"]["snippet_highlighted_words"]:
                    toret += item + "\n"
+                    chunks.append(KnowledgeResource(content=item, title=item))
            if (
-                "sports_results" in res.keys()
-                and "game_spotlight" in res["sports_results"].keys()
+                    "sports_results" in res.keys()
+                    and "game_spotlight" in res["sports_results"].keys()
            ):
                toret += res["sports_results"]["game_spotlight"] + "\n"
+                chunks.append(KnowledgeResource(content=res["sports_results"]["game_spotlight"],
+                                                title=res["sports_results"]["game_spotlight"]))
            if (
-                "shopping_results" in res.keys()
-                and "title" in res["shopping_results"][0].keys()
+                    "shopping_results" in res.keys()
+                    and "title" in res["shopping_results"][0].keys()
            ):
                toret += res["shopping_results"][:3] + "\n"
+                chunks.append(KnowledgeResource(content=res["shopping_results"][:3], title=res["shopping_results"][:3]))
            if (
-                "knowledge_graph" in res.keys()
-                and "description" in res["knowledge_graph"].keys()
+                    "knowledge_graph" in res.keys()
+                    and "description" in res["knowledge_graph"].keys()
            ):
                toret = res["knowledge_graph"]["description"] + "\n"
+                chunks.append(KnowledgeResource(content=res["knowledge_graph"]["description"],
+                                                title=res["knowledge_graph"]["description"]))
            if "snippet" in res["organic_results"][0].keys():
                for item in res["organic_results"]:
                    toret += "content: " + item["snippet"] + "\n" + "link: " + item["link"] + "\n"
+                    chunks.append(KnowledgeResource(content=item["snippet"], title=item["title"], url=item["link"]))
            if (
-                "images_results" in res.keys()
-                and "thumbnail" in res["images_results"][0].keys()
+                    "images_results" in res.keys()
+                    and "thumbnail" in res["images_results"][0].keys()
            ):
                thumbnails = [item["thumbnail"] for item in res["images_results"][:10]]
                toret = thumbnails
+                chunks.append(KnowledgeResource(content=thumbnails, title=thumbnails))
            if toret == "":
                toret = "No good search result found"
-        elif typ == "link":
+        if typ == "link" or typ == "chunk":
            if "knowledge_graph" in res.keys() and "title" in res["knowledge_graph"].keys() \
                    and "description_link" in res["knowledge_graph"].keys():
                toret = res["knowledge_graph"]["description_link"]
+                chunks.append(KnowledgeResource(content=res["knowledge_graph"]["description"],
+                                                title=res["knowledge_graph"]["title"],
+                                                url=res["knowledge_graph"]["knowledge_graph_search_link"])
+                              )
            elif "knowledge_graph" in res.keys() and "see_results_about" in res["knowledge_graph"].keys() \
-                and len(res["knowledge_graph"]["see_results_about"]) > 0:
+                    and len(res["knowledge_graph"]["see_results_about"]) > 0:
                see_result_about = res["knowledge_graph"]["see_results_about"]
                toret = ""
                for item in see_result_about:
                    if "name" not in item.keys() or "link" not in item.keys():
                        continue
                    toret += f"[{item['name']}]({item['link']})\n"
+                    chunks.append(KnowledgeResource(content=f"[{item['name']}]({item['link']})\n", title=item['name'], url=item['link']))
            elif "organic_results" in res.keys() and len(res["organic_results"]) > 0:
                organic_results = res["organic_results"]
                toret = ""
@ -128,6 +145,7 @@ class SerpAPI:
                    if "title" not in item.keys() or "link" not in item.keys():
                        continue
                    toret += f"[{item['title']}]({item['link']})\n"
+                    chunks.append(KnowledgeResource(content=f"[{item['title']}]({item['link']})\n", title=item['title'], url=item['link']))
            elif "related_questions" in res.keys() and len(res["related_questions"]) > 0:
                related_questions = res["related_questions"]
                toret = ""
@ -135,6 +153,7 @@ class SerpAPI:
                    if "question" not in item.keys() or "link" not in item.keys():
                        continue
                    toret += f"[{item['question']}]({item['link']})\n"
+                    chunks.append(KnowledgeResource(content=f"[{item['question']}]({item['link']})\n", title=item['title'], url=item['link']))
            elif "related_searches" in res.keys() and len(res["related_searches"]) > 0:
                related_searches = res["related_searches"]
                toret = ""
@ -142,15 +161,19 @@ class SerpAPI:
                    if "query" not in item.keys() or "link" not in item.keys():
                        continue
                    toret += f"[{item['query']}]({item['link']})\n"
+                    chunks.append(KnowledgeResource(content=f"[{item['query']}]({item['link']})\n", title=item['query'], url=item['link']))
            else:
                toret = "No good search result found"
+        if typ == "chunk":
+            return chunks
        return toret

+
 class GoogleSearchTool(BuiltinTool):
-    def _invoke(self, 
+    def _invoke(self,
                user_id: str,
-               tool_parameters: dict[str, Any], 
-        ) -> Union[ToolInvokeMessage, list[ToolInvokeMessage]]:
+                tool_parameters: dict[str, Any],
+                ) -> Union[ToolInvokeMessage, list[ToolInvokeMessage]]:
        """
            invoke tools
        """
@ -160,5 +183,9 @@ class GoogleSearchTool(BuiltinTool):
        result = SerpAPI(api_key).run(query, result_type=result_type)
        if result_type == 'text':
            return self.create_text_message(text=result)
-        return self.create_link_message(link=result)
-    
+        elif result_type == 'link':
+            return self.create_link_message(link=result)
+        elif result_type == 'chunk':
+            return self.create_chunk_message(chunks=result)
+        else:
+            raise ValueError(f"Invalid result type: {result_type}")
--- a/api/core/tools/provider/builtin/google/tools/google_search.yaml
+++ b/api/core/tools/provider/builtin/google/tools/google_search.yaml
@ -39,6 +39,11 @@ parameters:
          en_US: link
          zh_Hans: 链接
          pt_BR: link
+      - value: chunk
+        label:
+          en_US: chunk
+          zh_Hans: 分段
+          pt_BR: chunk
    default: link
    label:
      en_US: Result type
--- a/api/core/tools/tool/tool.py
+++ b/api/core/tools/tool/tool.py
@ -1,6 +1,6 @@
 from abc import ABC, abstractmethod
 from enum import Enum
-from typing import Any, Optional, Union
+from typing import Any, Optional, Union, List

 from pydantic import BaseModel, validator

@ -15,6 +15,7 @@ from core.tools.entities.tool_entities import (
    ToolRuntimeVariablePool,
 )
 from core.tools.tool_file_manager import ToolFileManager
+from core.workflow.nodes.llm.knowledge_resource import KnowledgeResource


 class Tool(BaseModel, ABC):
@ -337,6 +338,8 @@ class Tool(BaseModel, ABC):
            create an image message

            :param image: the url of the image
+            :param save_as: the save_as
+
            :return: the image message
        """
        return ToolInvokeMessage(type=ToolInvokeMessage.MessageType.IMAGE, 
@ -348,6 +351,7 @@ class Tool(BaseModel, ABC):
            create a link message

            :param link: the url of the link
+            :param save_as: the save_as
            :return: the link message
        """
        return ToolInvokeMessage(type=ToolInvokeMessage.MessageType.LINK, 
@ -359,21 +363,37 @@ class Tool(BaseModel, ABC):
            create a text message

            :param text: the text
+            :param save_as: the save_as
            :return: the text message
        """
        return ToolInvokeMessage(type=ToolInvokeMessage.MessageType.TEXT, 
                                 message=text,
                                 save_as=save_as
                                 )
+
+    def create_chunk_message(self, chunks: List[KnowledgeResource], save_as: str = '') -> ToolInvokeMessage:
+        """
+            create a chunk message
+
+            :param chunks: the chunks
+            :param save_as: the save_as
+            :return: the text message
+        """
+        return ToolInvokeMessage(type=ToolInvokeMessage.MessageType.CHUNK,
+                                 message=chunks,
+                                 save_as=save_as
+                                 )
    
    def create_blob_message(self, blob: bytes, meta: dict = None, save_as: str = '') -> ToolInvokeMessage:
        """
            create a blob message

            :param blob: the blob
+            :param meta: the meta
+            :param save_as: the save_as
            :return: the blob message
        """
-        return ToolInvokeMessage(type=ToolInvokeMessage.MessageType.BLOB, 
+        return ToolInvokeMessage(type=ToolInvokeMessage.MessageType.BLOB,
                                 message=blob, meta=meta,
                                 save_as=save_as
                                 )
--- a/api/core/tools/tool_engine.py
+++ b/api/core/tools/tool_engine.py
@ -131,7 +131,7 @@ class ToolEngine:

            # hit the callback handler
            workflow_tool_callback.on_tool_end(
-                tool_name=tool.identity.name, 
+                tool_name=tool.identity.name,
                tool_inputs=tool_parameters, 
                tool_outputs=response
            )
--- a/api/core/workflow/nodes/llm/knowledge_resource.py
+++ b/api/core/workflow/nodes/llm/knowledge_resource.py
@ -0,0 +1,16 @@
+from typing import Any, Optional
+
+from pydantic import BaseModel
+
+
+class KnowledgeResource(BaseModel):
+    """
+    Knowledge Resource.
+    """
+    content: str
+    title: str
+    url: Optional[str] = None
+    icon: Optional[str] = None
+    score: Optional[float] = None
+    metadata: Optional[dict[str, Any]] = None
+
--- a/api/core/workflow/nodes/llm/llm_node.py
+++ b/api/core/workflow/nodes/llm/llm_node.py
@ -311,6 +311,9 @@ class LLMNode(BaseNode):
            }

            return source
+        if ('metadata' in context_dict and '_source' in context_dict['metadata']
+                and context_dict['metadata']['_source'] == 'tool'):
+            return context_dict

        return None

--- a/api/core/workflow/nodes/tool/tool_node.py
+++ b/api/core/workflow/nodes/tool/tool_node.py
@ -70,13 +70,14 @@ class ToolNode(BaseNode):
            )

        # convert tool messages
-        plain_text, files = self._convert_tool_messages(messages)
+        plain_text, files, chunks = self._convert_tool_messages(messages)

        return NodeRunResult(
            status=WorkflowNodeExecutionStatus.SUCCEEDED,
            outputs={
                'text': plain_text,
-                'files': files
+                'files': files,
+                'chunks': chunks
            },
            metadata={
                NodeRunMetadataKey.TOOL_INFO: tool_info
@ -111,7 +112,7 @@ class ToolNode(BaseNode):
        
        return template_parser.format(inputs)

-    def _convert_tool_messages(self, messages: list[ToolInvokeMessage]) -> tuple[str, list[FileVar]]:
+    def _convert_tool_messages(self, messages: list[ToolInvokeMessage]) -> tuple[str, list[FileVar], list]:
        """
        Convert ToolInvokeMessages into tuple[plain_text, files]
        """
@ -125,8 +126,9 @@ class ToolNode(BaseNode):
        # extract plain text and files
        files = self._extract_tool_response_binary(messages)
        plain_text = self._extract_tool_response_text(messages)
+        chunks = self._extract_tool_response_chunk(messages)

-        return plain_text, files
+        return plain_text, files, chunks

    def _extract_tool_response_binary(self, tool_response: list[ToolInvokeMessage]) -> list[FileVar]:
        """
@ -180,6 +182,29 @@ class ToolNode(BaseNode):
            for message in tool_response
        ])

+    def _extract_tool_response_chunk(self, tool_response: list[ToolInvokeMessage]) -> list:
+        """
+        Extract tool response text
+        """
+        all_chunks = []
+        node_data = cast(ToolNodeData, self.node_data)
+        icon = ToolManager.get_tool_icon(
+            tenant_id=self.tenant_id,
+            provider_type=node_data.provider_type,
+            provider_id=node_data.provider_id
+        )
+        for message in tool_response:
+            if message.type == ToolInvokeMessage.MessageType.CHUNK:
+                for chunk in message.message:
+                    chunk.icon = icon
+                    chunk.metadata = {
+                        '_source': 'tool'
+                    }
+                    all_chunks.append(chunk)
+        return all_chunks
+
+
+
    @classmethod
    def _extract_variable_selector_to_variable_mapping(cls, node_data: ToolNodeData) -> dict[str, list[str]]:
        """