From 95fae0438ddc2fd1a7f49718888148a5409fd955 Mon Sep 17 00:00:00 2001
From: jyong <718720800@qq.com>
Date: Thu, 9 May 2024 18:24:51 +0800
Subject: [PATCH] add tool resource

---
 api/core/tools/entities/tool_entities.py      |  3 +-
 .../builtin/bing/tools/bing_web_search.py     |  4 +-
 .../builtin/google/tools/google_search.py     | 71 +++++++++++++------
 .../builtin/google/tools/google_search.yaml   |  5 ++
 api/core/tools/tool/tool.py                   | 24 ++++++-
 api/core/tools/tool_engine.py                 |  2 +-
 .../workflow/nodes/llm/knowledge_resource.py  | 16 +++++
 api/core/workflow/nodes/llm/llm_node.py       |  3 +
 api/core/workflow/nodes/tool/tool_node.py     | 33 +++++++--
 9 files changed, 129 insertions(+), 32 deletions(-)
 create mode 100644 api/core/workflow/nodes/llm/knowledge_resource.py

diff --git a/api/core/tools/entities/tool_entities.py b/api/core/tools/entities/tool_entities.py
index fad91baf83..6f57e76923 100644
--- a/api/core/tools/entities/tool_entities.py
+++ b/api/core/tools/entities/tool_entities.py
@@ -77,12 +77,13 @@ class ToolInvokeMessage(BaseModel):
         LINK = "link"
         BLOB = "blob"
         IMAGE_LINK = "image_link"
+        CHUNK = "chunk"
 
     type: MessageType = MessageType.TEXT
     """
         plain text, image url or link url
     """
-    message: Union[str, bytes] = None
+    message: Union[str, bytes, list] = None
     meta: dict[str, Any] = None
     save_as: str = ''
 
diff --git a/api/core/tools/provider/builtin/bing/tools/bing_web_search.py b/api/core/tools/provider/builtin/bing/tools/bing_web_search.py
index d133f38082..592499cd23 100644
--- a/api/core/tools/provider/builtin/bing/tools/bing_web_search.py
+++ b/api/core/tools/provider/builtin/bing/tools/bing_web_search.py
@@ -40,7 +40,7 @@ class BingSearchTool(BuiltinTool):
         news = response['news']['value'] if 'news' in response else []
         computation = response['computation']['value'] if 'computation' in response else None
 
-        if result_type == 'link':
+        if result_type == 'link' or result_type == 'chunk':
             results = []
             if search_results:
                 for result in search_results:
@@ -72,7 +72,7 @@ class BingSearchTool(BuiltinTool):
                     ))
                     
             return results
-        else:
+        if result_type == 'text' or result_type == 'chunk':
             # construct text
             text = ''
             if search_results:
diff --git a/api/core/tools/provider/builtin/google/tools/google_search.py b/api/core/tools/provider/builtin/google/tools/google_search.py
index 0b1978ad3e..0ab9d88f1c 100644
--- a/api/core/tools/provider/builtin/google/tools/google_search.py
+++ b/api/core/tools/provider/builtin/google/tools/google_search.py
@@ -6,6 +6,7 @@ from serpapi import GoogleSearch
 
 from core.tools.entities.tool_entities import ToolInvokeMessage
 from core.tools.tool.builtin_tool import BuiltinTool
+from core.workflow.nodes.llm.knowledge_resource import KnowledgeResource
 
 
 class HiddenPrints:
@@ -35,7 +36,7 @@ class SerpAPI:
         self.serpapi_api_key = api_key
         self.search_engine = GoogleSearch
 
-    def run(self, query: str, **kwargs: Any) -> str:
+    def run(self, query: str, **kwargs: Any) -> str | list[KnowledgeResource]:
         """Run query through SerpAPI and parse result."""
         typ = kwargs.get("result_type", "text")
         return self._process_response(self.results(query), typ=typ)
@@ -64,63 +65,79 @@ class SerpAPI:
         return params
 
     @staticmethod
-    def _process_response(res: dict, typ: str) -> str:
+    def _process_response(res: dict, typ: str) -> str | list[KnowledgeResource]:
         """Process response from SerpAPI."""
         if "error" in res.keys():
             raise ValueError(f"Got error from SerpAPI: {res['error']}")
-        
-        if typ == "text":
-            toret = ""
+        chunks = []
+        toret = ""
+        if typ == "text" or typ == "chunk":
             if "answer_box" in res.keys() and type(res["answer_box"]) == list:
                 res["answer_box"] = res["answer_box"][0] + "\n"
             if "answer_box" in res.keys() and "answer" in res["answer_box"].keys():
                 toret += res["answer_box"]["answer"] + "\n"
+                chunks.append(KnowledgeResource(content=res["answer_box"]["answer"], title=res["answer_box"]["answer"]))
             if "answer_box" in res.keys() and "snippet" in res["answer_box"].keys():
                 toret += res["answer_box"]["snippet"] + "\n"
+                chunks.append(
+                    KnowledgeResource(content=res["answer_box"]["snippet"], title=res["answer_box"]["snippet"]))
             if (
-                "answer_box" in res.keys()
-                and "snippet_highlighted_words" in res["answer_box"].keys()
+                    "answer_box" in res.keys()
+                    and "snippet_highlighted_words" in res["answer_box"].keys()
             ):
                 for item in res["answer_box"]["snippet_highlighted_words"]:
                     toret += item + "\n"
+                    chunks.append(KnowledgeResource(content=item, title=item))
             if (
-                "sports_results" in res.keys()
-                and "game_spotlight" in res["sports_results"].keys()
+                    "sports_results" in res.keys()
+                    and "game_spotlight" in res["sports_results"].keys()
             ):
                 toret += res["sports_results"]["game_spotlight"] + "\n"
+                chunks.append(KnowledgeResource(content=res["sports_results"]["game_spotlight"],
+                                                title=res["sports_results"]["game_spotlight"]))
             if (
-                "shopping_results" in res.keys()
-                and "title" in res["shopping_results"][0].keys()
+                    "shopping_results" in res.keys()
+                    and "title" in res["shopping_results"][0].keys()
             ):
                 toret += res["shopping_results"][:3] + "\n"
+                chunks.append(KnowledgeResource(content=res["shopping_results"][:3], title=res["shopping_results"][:3]))
             if (
-                "knowledge_graph" in res.keys()
-                and "description" in res["knowledge_graph"].keys()
+                    "knowledge_graph" in res.keys()
+                    and "description" in res["knowledge_graph"].keys()
             ):
                 toret = res["knowledge_graph"]["description"] + "\n"
+                chunks.append(KnowledgeResource(content=res["knowledge_graph"]["description"],
+                                                title=res["knowledge_graph"]["description"]))
             if "snippet" in res["organic_results"][0].keys():
                 for item in res["organic_results"]:
                     toret += "content: " + item["snippet"] + "\n" + "link: " + item["link"] + "\n"
+                    chunks.append(KnowledgeResource(content=item["snippet"], title=item["title"], url=item["link"]))
             if (
-                "images_results" in res.keys()
-                and "thumbnail" in res["images_results"][0].keys()
+                    "images_results" in res.keys()
+                    and "thumbnail" in res["images_results"][0].keys()
             ):
                 thumbnails = [item["thumbnail"] for item in res["images_results"][:10]]
                 toret = thumbnails
+                chunks.append(KnowledgeResource(content=thumbnails, title=thumbnails))
             if toret == "":
                 toret = "No good search result found"
-        elif typ == "link":
+        if typ == "link" or typ == "chunk":
             if "knowledge_graph" in res.keys() and "title" in res["knowledge_graph"].keys() \
                     and "description_link" in res["knowledge_graph"].keys():
                 toret = res["knowledge_graph"]["description_link"]
+                chunks.append(KnowledgeResource(content=res["knowledge_graph"]["description"],
+                                                title=res["knowledge_graph"]["title"],
+                                                url=res["knowledge_graph"]["knowledge_graph_search_link"])
+                              )
             elif "knowledge_graph" in res.keys() and "see_results_about" in res["knowledge_graph"].keys() \
-                and len(res["knowledge_graph"]["see_results_about"]) > 0:
+                    and len(res["knowledge_graph"]["see_results_about"]) > 0:
                 see_result_about = res["knowledge_graph"]["see_results_about"]
                 toret = ""
                 for item in see_result_about:
                     if "name" not in item.keys() or "link" not in item.keys():
                         continue
                     toret += f"[{item['name']}]({item['link']})\n"
+                    chunks.append(KnowledgeResource(content=f"[{item['name']}]({item['link']})\n", title=item['name'], url=item['link']))
             elif "organic_results" in res.keys() and len(res["organic_results"]) > 0:
                 organic_results = res["organic_results"]
                 toret = ""
@@ -128,6 +145,7 @@ class SerpAPI:
                     if "title" not in item.keys() or "link" not in item.keys():
                         continue
                     toret += f"[{item['title']}]({item['link']})\n"
+                    chunks.append(KnowledgeResource(content=f"[{item['title']}]({item['link']})\n", title=item['title'], url=item['link']))
             elif "related_questions" in res.keys() and len(res["related_questions"]) > 0:
                 related_questions = res["related_questions"]
                 toret = ""
@@ -135,6 +153,7 @@ class SerpAPI:
                     if "question" not in item.keys() or "link" not in item.keys():
                         continue
                     toret += f"[{item['question']}]({item['link']})\n"
+                    chunks.append(KnowledgeResource(content=f"[{item['question']}]({item['link']})\n", title=item['title'], url=item['link']))
             elif "related_searches" in res.keys() and len(res["related_searches"]) > 0:
                 related_searches = res["related_searches"]
                 toret = ""
@@ -142,15 +161,19 @@ class SerpAPI:
                     if "query" not in item.keys() or "link" not in item.keys():
                         continue
                     toret += f"[{item['query']}]({item['link']})\n"
+                    chunks.append(KnowledgeResource(content=f"[{item['query']}]({item['link']})\n", title=item['query'], url=item['link']))
             else:
                 toret = "No good search result found"
+        if typ == "chunk":
+            return chunks
         return toret
 
+
 class GoogleSearchTool(BuiltinTool):
-    def _invoke(self, 
+    def _invoke(self,
                 user_id: str,
-               tool_parameters: dict[str, Any], 
-        ) -> Union[ToolInvokeMessage, list[ToolInvokeMessage]]:
+                tool_parameters: dict[str, Any],
+                ) -> Union[ToolInvokeMessage, list[ToolInvokeMessage]]:
         """
             invoke tools
         """
@@ -160,5 +183,9 @@ class GoogleSearchTool(BuiltinTool):
         result = SerpAPI(api_key).run(query, result_type=result_type)
         if result_type == 'text':
             return self.create_text_message(text=result)
-        return self.create_link_message(link=result)
-    
\ No newline at end of file
+        elif result_type == 'link':
+            return self.create_link_message(link=result)
+        elif result_type == 'chunk':
+            return self.create_chunk_message(chunks=result)
+        else:
+            raise ValueError(f"Invalid result type: {result_type}")
diff --git a/api/core/tools/provider/builtin/google/tools/google_search.yaml b/api/core/tools/provider/builtin/google/tools/google_search.yaml
index 9dc5023992..787522b4a5 100644
--- a/api/core/tools/provider/builtin/google/tools/google_search.yaml
+++ b/api/core/tools/provider/builtin/google/tools/google_search.yaml
@@ -39,6 +39,11 @@ parameters:
           en_US: link
           zh_Hans: 链接
           pt_BR: link
+      - value: chunk
+        label:
+          en_US: chunk
+          zh_Hans: 分段
+          pt_BR: chunk
     default: link
     label:
       en_US: Result type
diff --git a/api/core/tools/tool/tool.py b/api/core/tools/tool/tool.py
index 03aa0623fe..0c106d424e 100644
--- a/api/core/tools/tool/tool.py
+++ b/api/core/tools/tool/tool.py
@@ -1,6 +1,6 @@
 from abc import ABC, abstractmethod
 from enum import Enum
-from typing import Any, Optional, Union
+from typing import Any, Optional, Union, List
 
 from pydantic import BaseModel, validator
 
@@ -15,6 +15,7 @@ from core.tools.entities.tool_entities import (
     ToolRuntimeVariablePool,
 )
 from core.tools.tool_file_manager import ToolFileManager
+from core.workflow.nodes.llm.knowledge_resource import KnowledgeResource
 
 
 class Tool(BaseModel, ABC):
@@ -337,6 +338,8 @@ class Tool(BaseModel, ABC):
             create an image message
 
             :param image: the url of the image
+            :param save_as: the save_as
+
             :return: the image message
         """
         return ToolInvokeMessage(type=ToolInvokeMessage.MessageType.IMAGE, 
@@ -348,6 +351,7 @@ class Tool(BaseModel, ABC):
             create a link message
 
             :param link: the url of the link
+            :param save_as: the save_as
             :return: the link message
         """
         return ToolInvokeMessage(type=ToolInvokeMessage.MessageType.LINK, 
@@ -359,21 +363,37 @@ class Tool(BaseModel, ABC):
             create a text message
 
             :param text: the text
+            :param save_as: the save_as
             :return: the text message
         """
         return ToolInvokeMessage(type=ToolInvokeMessage.MessageType.TEXT, 
                                  message=text,
                                  save_as=save_as
                                  )
+
+    def create_chunk_message(self, chunks: List[KnowledgeResource], save_as: str = '') -> ToolInvokeMessage:
+        """
+            create a chunk message
+
+            :param chunks: the chunks
+            :param save_as: the save_as
+            :return: the text message
+        """
+        return ToolInvokeMessage(type=ToolInvokeMessage.MessageType.CHUNK,
+                                 message=chunks,
+                                 save_as=save_as
+                                 )
     
     def create_blob_message(self, blob: bytes, meta: dict = None, save_as: str = '') -> ToolInvokeMessage:
         """
             create a blob message
 
             :param blob: the blob
+            :param meta: the meta
+            :param save_as: the save_as
             :return: the blob message
         """
-        return ToolInvokeMessage(type=ToolInvokeMessage.MessageType.BLOB, 
+        return ToolInvokeMessage(type=ToolInvokeMessage.MessageType.BLOB,
                                  message=blob, meta=meta,
                                  save_as=save_as
                                  )
\ No newline at end of file
diff --git a/api/core/tools/tool_engine.py b/api/core/tools/tool_engine.py
index f96d7940bd..2b56d07fbd 100644
--- a/api/core/tools/tool_engine.py
+++ b/api/core/tools/tool_engine.py
@@ -131,7 +131,7 @@ class ToolEngine:
 
             # hit the callback handler
             workflow_tool_callback.on_tool_end(
-                tool_name=tool.identity.name, 
+                tool_name=tool.identity.name,
                 tool_inputs=tool_parameters, 
                 tool_outputs=response
             )
diff --git a/api/core/workflow/nodes/llm/knowledge_resource.py b/api/core/workflow/nodes/llm/knowledge_resource.py
new file mode 100644
index 0000000000..7ecc97c39b
--- /dev/null
+++ b/api/core/workflow/nodes/llm/knowledge_resource.py
@@ -0,0 +1,16 @@
+from typing import Any, Optional
+
+from pydantic import BaseModel
+
+
+class KnowledgeResource(BaseModel):
+    """
+    Knowledge Resource.
+    """
+    content: str
+    title: str
+    url: Optional[str] = None
+    icon: Optional[str] = None
+    score: Optional[float] = None
+    metadata: Optional[dict[str, Any]] = None
+
diff --git a/api/core/workflow/nodes/llm/llm_node.py b/api/core/workflow/nodes/llm/llm_node.py
index c8b7f279ab..c1f29b272f 100644
--- a/api/core/workflow/nodes/llm/llm_node.py
+++ b/api/core/workflow/nodes/llm/llm_node.py
@@ -311,6 +311,9 @@ class LLMNode(BaseNode):
             }
 
             return source
+        if ('metadata' in context_dict and '_source' in context_dict['metadata']
+                and context_dict['metadata']['_source'] == 'tool'):
+            return context_dict
 
         return None
 
diff --git a/api/core/workflow/nodes/tool/tool_node.py b/api/core/workflow/nodes/tool/tool_node.py
index d183dbe17b..2d6a982f28 100644
--- a/api/core/workflow/nodes/tool/tool_node.py
+++ b/api/core/workflow/nodes/tool/tool_node.py
@@ -70,13 +70,14 @@ class ToolNode(BaseNode):
             )
 
         # convert tool messages
-        plain_text, files = self._convert_tool_messages(messages)
+        plain_text, files, chunks = self._convert_tool_messages(messages)
 
         return NodeRunResult(
             status=WorkflowNodeExecutionStatus.SUCCEEDED,
             outputs={
                 'text': plain_text,
-                'files': files
+                'files': files,
+                'chunks': chunks
             },
             metadata={
                 NodeRunMetadataKey.TOOL_INFO: tool_info
@@ -111,7 +112,7 @@ class ToolNode(BaseNode):
         
         return template_parser.format(inputs)
 
-    def _convert_tool_messages(self, messages: list[ToolInvokeMessage]) -> tuple[str, list[FileVar]]:
+    def _convert_tool_messages(self, messages: list[ToolInvokeMessage]) -> tuple[str, list[FileVar], list]:
         """
         Convert ToolInvokeMessages into tuple[plain_text, files]
         """
@@ -125,8 +126,9 @@ class ToolNode(BaseNode):
         # extract plain text and files
         files = self._extract_tool_response_binary(messages)
         plain_text = self._extract_tool_response_text(messages)
+        chunks = self._extract_tool_response_chunk(messages)
 
-        return plain_text, files
+        return plain_text, files, chunks
 
     def _extract_tool_response_binary(self, tool_response: list[ToolInvokeMessage]) -> list[FileVar]:
         """
@@ -180,6 +182,29 @@ class ToolNode(BaseNode):
             for message in tool_response
         ])
 
+    def _extract_tool_response_chunk(self, tool_response: list[ToolInvokeMessage]) -> list:
+        """
+        Extract tool response text
+        """
+        all_chunks = []
+        node_data = cast(ToolNodeData, self.node_data)
+        icon = ToolManager.get_tool_icon(
+            tenant_id=self.tenant_id,
+            provider_type=node_data.provider_type,
+            provider_id=node_data.provider_id
+        )
+        for message in tool_response:
+            if message.type == ToolInvokeMessage.MessageType.CHUNK:
+                for chunk in message.message:
+                    chunk.icon = icon
+                    chunk.metadata = {
+                        '_source': 'tool'
+                    }
+                    all_chunks.append(chunk)
+        return all_chunks
+
+
+
     @classmethod
     def _extract_variable_selector_to_variable_mapping(cls, node_data: ToolNodeData) -> dict[str, list[str]]:
         """