add tool resource

2024-05-10 18:08:49 +08:00 · 2024-05-10 16:50:19 +08:00 · 2024-05-09 18:24:51 +08:00
10 changed files with 156 additions and 38 deletions
--- a/api/core/tools/entities/tool_entities.py
+++ b/api/core/tools/entities/tool_entities.py
@ -77,12 +77,13 @@ class ToolInvokeMessage(BaseModel):
        LINK = "link"
        BLOB = "blob"
        IMAGE_LINK = "image_link"
+        CHUNK = "chunk"

    type: MessageType = MessageType.TEXT
    """
        plain text, image url or link url
    """
-    message: Union[str, bytes] = None
+    message: Union[str, bytes, list] = None
    meta: dict[str, Any] = None
    save_as: str = ''

--- a/api/core/tools/provider/builtin/bing/tools/bing_web_search.py
+++ b/api/core/tools/provider/builtin/bing/tools/bing_web_search.py
@ -40,7 +40,7 @@ class BingSearchTool(BuiltinTool):
        news = response['news']['value'] if 'news' in response else []
        computation = response['computation']['value'] if 'computation' in response else None

-        if result_type == 'link':
+        if result_type == 'link' or result_type == 'chunk':
            results = []
            if search_results:
                for result in search_results:
@ -72,7 +72,7 @@ class BingSearchTool(BuiltinTool):
                    ))
                    
            return results
-        else:
+        if result_type == 'text' or result_type == 'chunk':
            # construct text
            text = ''
            if search_results:
--- a/api/core/tools/provider/builtin/google/tools/google_search.py
+++ b/api/core/tools/provider/builtin/google/tools/google_search.py
@ -6,6 +6,7 @@ from serpapi import GoogleSearch

 from core.tools.entities.tool_entities import ToolInvokeMessage
 from core.tools.tool.builtin_tool import BuiltinTool
+from core.workflow.nodes.llm.knowledge_resource import KnowledgeResource


 class HiddenPrints:
@ -35,7 +36,7 @@ class SerpAPI:
        self.serpapi_api_key = api_key
        self.search_engine = GoogleSearch

-    def run(self, query: str, **kwargs: Any) -> str:
+    def run(self, query: str, **kwargs: Any) -> str | list[KnowledgeResource]:
        """Run query through SerpAPI and parse result."""
        typ = kwargs.get("result_type", "text")
        return self._process_response(self.results(query), typ=typ)
@ -64,63 +65,79 @@ class SerpAPI:
        return params

    @staticmethod
-    def _process_response(res: dict, typ: str) -> str:
+    def _process_response(res: dict, typ: str) -> str | list[KnowledgeResource]:
        """Process response from SerpAPI."""
        if "error" in res.keys():
            raise ValueError(f"Got error from SerpAPI: {res['error']}")
-        
-        if typ == "text":
-            toret = ""
+        chunks = []
+        toret = ""
+        if typ == "text" or typ == "chunk":
            if "answer_box" in res.keys() and type(res["answer_box"]) == list:
                res["answer_box"] = res["answer_box"][0] + "\n"
            if "answer_box" in res.keys() and "answer" in res["answer_box"].keys():
                toret += res["answer_box"]["answer"] + "\n"
+                chunks.append(KnowledgeResource(content=res["answer_box"]["answer"], title=res["answer_box"]["answer"]))
            if "answer_box" in res.keys() and "snippet" in res["answer_box"].keys():
                toret += res["answer_box"]["snippet"] + "\n"
+                chunks.append(
+                    KnowledgeResource(content=res["answer_box"]["snippet"], title=res["answer_box"]["snippet"]))
            if (
-                "answer_box" in res.keys()
-                and "snippet_highlighted_words" in res["answer_box"].keys()
+                    "answer_box" in res.keys()
+                    and "snippet_highlighted_words" in res["answer_box"].keys()
            ):
                for item in res["answer_box"]["snippet_highlighted_words"]:
                    toret += item + "\n"
+                    chunks.append(KnowledgeResource(content=item, title=item))
            if (
-                "sports_results" in res.keys()
-                and "game_spotlight" in res["sports_results"].keys()
+                    "sports_results" in res.keys()
+                    and "game_spotlight" in res["sports_results"].keys()
            ):
                toret += res["sports_results"]["game_spotlight"] + "\n"
+                chunks.append(KnowledgeResource(content=res["sports_results"]["game_spotlight"],
+                                                title=res["sports_results"]["game_spotlight"]))
            if (
-                "shopping_results" in res.keys()
-                and "title" in res["shopping_results"][0].keys()
+                    "shopping_results" in res.keys()
+                    and "title" in res["shopping_results"][0].keys()
            ):
                toret += res["shopping_results"][:3] + "\n"
+                chunks.append(KnowledgeResource(content=res["shopping_results"][:3], title=res["shopping_results"][:3]))
            if (
-                "knowledge_graph" in res.keys()
-                and "description" in res["knowledge_graph"].keys()
+                    "knowledge_graph" in res.keys()
+                    and "description" in res["knowledge_graph"].keys()
            ):
                toret = res["knowledge_graph"]["description"] + "\n"
+                chunks.append(KnowledgeResource(content=res["knowledge_graph"]["description"],
+                                                title=res["knowledge_graph"]["description"]))
            if "snippet" in res["organic_results"][0].keys():
                for item in res["organic_results"]:
                    toret += "content: " + item["snippet"] + "\n" + "link: " + item["link"] + "\n"
+                    chunks.append(KnowledgeResource(content=item["snippet"], title=item["title"], url=item["link"]))
            if (
-                "images_results" in res.keys()
-                and "thumbnail" in res["images_results"][0].keys()
+                    "images_results" in res.keys()
+                    and "thumbnail" in res["images_results"][0].keys()
            ):
                thumbnails = [item["thumbnail"] for item in res["images_results"][:10]]
                toret = thumbnails
+                chunks.append(KnowledgeResource(content=thumbnails, title=thumbnails))
            if toret == "":
                toret = "No good search result found"
-        elif typ == "link":
+        if typ == "link" or typ == "chunk":
            if "knowledge_graph" in res.keys() and "title" in res["knowledge_graph"].keys() \
                    and "description_link" in res["knowledge_graph"].keys():
                toret = res["knowledge_graph"]["description_link"]
+                chunks.append(KnowledgeResource(content=res["knowledge_graph"]["description"],
+                                                title=res["knowledge_graph"]["title"],
+                                                url=res["knowledge_graph"]["knowledge_graph_search_link"])
+                              )
            elif "knowledge_graph" in res.keys() and "see_results_about" in res["knowledge_graph"].keys() \
-                and len(res["knowledge_graph"]["see_results_about"]) > 0:
+                    and len(res["knowledge_graph"]["see_results_about"]) > 0:
                see_result_about = res["knowledge_graph"]["see_results_about"]
                toret = ""
                for item in see_result_about:
                    if "name" not in item.keys() or "link" not in item.keys():
                        continue
                    toret += f"[{item['name']}]({item['link']})\n"
+                    chunks.append(KnowledgeResource(content=f"[{item['name']}]({item['link']})\n", title=item['name'], url=item['link']))
            elif "organic_results" in res.keys() and len(res["organic_results"]) > 0:
                organic_results = res["organic_results"]
                toret = ""
@ -128,6 +145,7 @@ class SerpAPI:
                    if "title" not in item.keys() or "link" not in item.keys():
                        continue
                    toret += f"[{item['title']}]({item['link']})\n"
+                    chunks.append(KnowledgeResource(content=f"[{item['title']}]({item['link']})\n", title=item['title'], url=item['link']))
            elif "related_questions" in res.keys() and len(res["related_questions"]) > 0:
                related_questions = res["related_questions"]
                toret = ""
@ -135,6 +153,7 @@ class SerpAPI:
                    if "question" not in item.keys() or "link" not in item.keys():
                        continue
                    toret += f"[{item['question']}]({item['link']})\n"
+                    chunks.append(KnowledgeResource(content=f"[{item['question']}]({item['link']})\n", title=item['title'], url=item['link']))
            elif "related_searches" in res.keys() and len(res["related_searches"]) > 0:
                related_searches = res["related_searches"]
                toret = ""
@ -142,15 +161,19 @@ class SerpAPI:
                    if "query" not in item.keys() or "link" not in item.keys():
                        continue
                    toret += f"[{item['query']}]({item['link']})\n"
+                    chunks.append(KnowledgeResource(content=f"[{item['query']}]({item['link']})\n", title=item['query'], url=item['link']))
            else:
                toret = "No good search result found"
+        if typ == "chunk":
+            return chunks
        return toret

+
 class GoogleSearchTool(BuiltinTool):
-    def _invoke(self, 
+    def _invoke(self,
                user_id: str,
-               tool_parameters: dict[str, Any], 
-        ) -> Union[ToolInvokeMessage, list[ToolInvokeMessage]]:
+                tool_parameters: dict[str, Any],
+                ) -> Union[ToolInvokeMessage, list[ToolInvokeMessage]]:
        """
            invoke tools
        """
@ -160,5 +183,9 @@ class GoogleSearchTool(BuiltinTool):
        result = SerpAPI(api_key).run(query, result_type=result_type)
        if result_type == 'text':
            return self.create_text_message(text=result)
-        return self.create_link_message(link=result)
-    
+        elif result_type == 'link':
+            return self.create_link_message(link=result)
+        elif result_type == 'chunk':
+            return self.create_chunk_message(chunks=result)
+        else:
+            raise ValueError(f"Invalid result type: {result_type}")
--- a/api/core/tools/provider/builtin/google/tools/google_search.yaml
+++ b/api/core/tools/provider/builtin/google/tools/google_search.yaml
@ -39,6 +39,11 @@ parameters:
          en_US: link
          zh_Hans: 链接
          pt_BR: link
+      - value: chunk
+        label:
+          en_US: chunk
+          zh_Hans: 分段
+          pt_BR: chunk
    default: link
    label:
      en_US: Result type
--- a/api/core/tools/tool/tool.py
+++ b/api/core/tools/tool/tool.py
@ -1,6 +1,6 @@
 from abc import ABC, abstractmethod
 from enum import Enum
-from typing import Any, Optional, Union
+from typing import Any, Optional, Union, List

 from pydantic import BaseModel, validator

@ -15,6 +15,7 @@ from core.tools.entities.tool_entities import (
    ToolRuntimeVariablePool,
 )
 from core.tools.tool_file_manager import ToolFileManager
+from core.workflow.nodes.llm.knowledge_resource import KnowledgeResource


 class Tool(BaseModel, ABC):
@ -337,6 +338,8 @@ class Tool(BaseModel, ABC):
            create an image message

            :param image: the url of the image
+            :param save_as: the save_as
+
            :return: the image message
        """
        return ToolInvokeMessage(type=ToolInvokeMessage.MessageType.IMAGE, 
@ -348,6 +351,7 @@ class Tool(BaseModel, ABC):
            create a link message

            :param link: the url of the link
+            :param save_as: the save_as
            :return: the link message
        """
        return ToolInvokeMessage(type=ToolInvokeMessage.MessageType.LINK, 
@ -359,21 +363,37 @@ class Tool(BaseModel, ABC):
            create a text message

            :param text: the text
+            :param save_as: the save_as
            :return: the text message
        """
        return ToolInvokeMessage(type=ToolInvokeMessage.MessageType.TEXT, 
                                 message=text,
                                 save_as=save_as
                                 )
+
+    def create_chunk_message(self, chunks: List[KnowledgeResource], save_as: str = '') -> ToolInvokeMessage:
+        """
+            create a chunk message
+
+            :param chunks: the chunks
+            :param save_as: the save_as
+            :return: the text message
+        """
+        return ToolInvokeMessage(type=ToolInvokeMessage.MessageType.CHUNK,
+                                 message=chunks,
+                                 save_as=save_as
+                                 )
    
    def create_blob_message(self, blob: bytes, meta: dict = None, save_as: str = '') -> ToolInvokeMessage:
        """
            create a blob message

            :param blob: the blob
+            :param meta: the meta
+            :param save_as: the save_as
            :return: the blob message
        """
-        return ToolInvokeMessage(type=ToolInvokeMessage.MessageType.BLOB, 
+        return ToolInvokeMessage(type=ToolInvokeMessage.MessageType.BLOB,
                                 message=blob, meta=meta,
                                 save_as=save_as
                                 )
--- a/api/core/tools/tool_engine.py
+++ b/api/core/tools/tool_engine.py
@ -131,7 +131,7 @@ class ToolEngine:

            # hit the callback handler
            workflow_tool_callback.on_tool_end(
-                tool_name=tool.identity.name, 
+                tool_name=tool.identity.name,
                tool_inputs=tool_parameters, 
                tool_outputs=response
            )
--- a/api/core/workflow/nodes/llm/knowledge_resource.py
+++ b/api/core/workflow/nodes/llm/knowledge_resource.py
@ -0,0 +1,27 @@
+from typing import Any, Optional
+
+from pydantic import BaseModel
+
+
+class KnowledgeResource(BaseModel):
+    """
+    Knowledge Resource.
+    """
+    content: str
+    title: str
+    url: Optional[str] = None
+    icon: Optional[str] = None
+    resource_from: Optional[str] = None
+    score: Optional[float] = None
+    metadata: Optional[dict[str, Any]] = None
+
+    def to_dict(self):
+        return {
+            'content': self.content,
+            'title': self.title,
+            'url': self.url,
+            'icon': self.icon,
+            'resource_from': self.resource_from,
+            'score': self.score,
+            'metadata': self.metadata
+        }
--- a/api/core/workflow/nodes/llm/llm_node.py
+++ b/api/core/workflow/nodes/llm/llm_node.py
@ -22,6 +22,7 @@ from core.workflow.entities.node_entities import NodeRunMetadataKey, NodeRunResu
 from core.workflow.entities.variable_pool import VariablePool
 from core.workflow.nodes.base_node import BaseNode
 from core.workflow.nodes.llm.entities import LLMNodeData, ModelConfig
+from core.workflow.nodes.llm.knowledge_resource import KnowledgeResource
 from core.workflow.utils.variable_template_parser import VariableTemplateParser
 from extensions.ext_database import db
 from models.model import Conversation
@ -262,7 +263,7 @@ class LLMNode(BaseNode):
                for item in context_value:
                    if isinstance(item, str):
                        context_str += item + '\n'
-                    else:
+                    elif isinstance(item, dict):
                        if 'content' not in item:
                            raise ValueError(f'Invalid context structure: {item}')

@ -271,6 +272,12 @@ class LLMNode(BaseNode):
                        retriever_resource = self._convert_to_original_retriever_resource(item)
                        if retriever_resource:
                            original_retriever_resource.append(retriever_resource)
+                    elif isinstance(item, KnowledgeResource):
+                        context_str += item.content + '\n'
+
+                        retriever_resource = self._convert_to_original_retriever_resource(item.to_dict())
+                        if retriever_resource:
+                            original_retriever_resource.append(retriever_resource)

                if self.callbacks and original_retriever_resource:
                    for callback in self.callbacks:
@ -311,6 +318,9 @@ class LLMNode(BaseNode):
            }

            return source
+        if ('metadata' in context_dict and '_source' in context_dict['metadata']
+                and context_dict['metadata']['_source'] == 'tool'):
+            return context_dict

        return None

--- a/api/core/workflow/nodes/question_classifier/template_prompts.py
+++ b/api/core/workflow/nodes/question_classifier/template_prompts.py
@ -6,7 +6,7 @@ QUESTION_CLASSIFIER_SYSTEM_PROMPT = """
    ### Task
    Your task is to assign one categories ONLY to the input text and only one category may be assigned returned in the output.Additionally, you need to extract the key words from the text that are related to the classification.
    ### Format
-    The input text is in the variable text_field.Categories are specified as a category list in the variable categories or left empty for automatic determination.Classification instructions may be included to improve the classification accuracy.
+    The input text is in the variable text_field.Categories are specified as a category list with two filed category_id and category_name in the variable categories .Classification instructions may be included to improve the classification accuracy.
    ### Constraint
    DO NOT include anything other than the JSON array in your response.
    ### Memory
@ -24,7 +24,8 @@ QUESTION_CLASSIFIER_USER_PROMPT_1 = """

 QUESTION_CLASSIFIER_ASSISTANT_PROMPT_1 = """
 ```json
-    {"category_id": "f5660049-284f-41a7-b301-fd24176a711c",
+    {"keywords": ["recently", "great experience", "company", "service", "prompt", "staff", "friendly"],
+    "category_id": "f5660049-284f-41a7-b301-fd24176a711c",
    "category_name": "Customer Service"}
 ```
 """
@ -37,7 +38,8 @@ QUESTION_CLASSIFIER_USER_PROMPT_2 = """

 QUESTION_CLASSIFIER_ASSISTANT_PROMPT_2 = """
 ```json
-    {"category_id": "f6ff5bc3-aca0-4e4a-8627-e760d0aca78f",
+    {"keywords": ["bad service", "slow", "food", "tip", "terrible", "waitresses"],
+    "category_id": "f6ff5bc3-aca0-4e4a-8627-e760d0aca78f",
    "category_name": "Experience"}
 ```
 """
@ -61,9 +63,9 @@ DO NOT include anything other than the JSON array in your response.
 Here is the chat example between human and assistant, inside <example></example> XML tags.
 <example>
 User:{{"input_text": ["I recently had a great experience with your company. The service was prompt and the staff was very friendly."], "categories": [{{"category_id":"f5660049-284f-41a7-b301-fd24176a711c","category_name":"Customer Service"}},{{"category_id":"8d007d06-f2c9-4be5-8ff6-cd4381c13c60","category_name":"Satisfaction"}},{{"category_id":"5fbbbb18-9843-466d-9b8e-b9bfbb9482c8","category_name":"Sales"}},{{"category_id":"23623c75-7184-4a2e-8226-466c2e4631e4","category_name":"Product"}}], "classification_instructions": ["classify the text based on the feedback provided by customer"]}}
-Assistant:{{"category_id": "f5660049-284f-41a7-b301-fd24176a711c","category_name": "Customer Service"}}
+Assistant:{{"keywords": ["recently", "great experience", "company", "service", "prompt", "staff", "friendly"],"category_id": "f5660049-284f-41a7-b301-fd24176a711c","category_name": "Customer Service"}}
 User:{{"input_text": ["bad service, slow to bring the food"], "categories": [{{"category_id":"80fb86a0-4454-4bf5-924c-f253fdd83c02","category_name":"Food Quality"}},{{"category_id":"f6ff5bc3-aca0-4e4a-8627-e760d0aca78f","category_name":"Experience"}},{{"category_id":"cc771f63-74e7-4c61-882e-3eda9d8ba5d7","category_name":"Price"}}], "classification_instructions": []}}
-Assistant:{{"category_id": "f6ff5bc3-aca0-4e4a-8627-e760d0aca78f","category_name": "Customer Service"}}
+Assistant:{{"keywords": ["bad service", "slow", "food", "tip", "terrible", "waitresses"],"category_id": "f6ff5bc3-aca0-4e4a-8627-e760d0aca78f","category_name": "Customer Service"}}
 </example> 
 ### Memory
 Here is the chat histories between human and assistant, inside <histories></histories> XML tags.
--- a/api/core/workflow/nodes/tool/tool_node.py
+++ b/api/core/workflow/nodes/tool/tool_node.py
@ -70,13 +70,14 @@ class ToolNode(BaseNode):
            )

        # convert tool messages
-        plain_text, files = self._convert_tool_messages(messages)
+        plain_text, files, chunks = self._convert_tool_messages(messages)

        return NodeRunResult(
            status=WorkflowNodeExecutionStatus.SUCCEEDED,
            outputs={
                'text': plain_text,
-                'files': files
+                'files': files,
+                'chunks': chunks
            },
            metadata={
                NodeRunMetadataKey.TOOL_INFO: tool_info
@ -111,7 +112,7 @@ class ToolNode(BaseNode):
        
        return template_parser.format(inputs)

-    def _convert_tool_messages(self, messages: list[ToolInvokeMessage]) -> tuple[str, list[FileVar]]:
+    def _convert_tool_messages(self, messages: list[ToolInvokeMessage]) -> tuple[str, list[FileVar], list]:
        """
        Convert ToolInvokeMessages into tuple[plain_text, files]
        """
@ -125,8 +126,9 @@ class ToolNode(BaseNode):
        # extract plain text and files
        files = self._extract_tool_response_binary(messages)
        plain_text = self._extract_tool_response_text(messages)
+        chunks = self._extract_tool_response_chunk(messages)

-        return plain_text, files
+        return plain_text, files, chunks

    def _extract_tool_response_binary(self, tool_response: list[ToolInvokeMessage]) -> list[FileVar]:
        """
@ -180,6 +182,30 @@ class ToolNode(BaseNode):
            for message in tool_response
        ])

+    def _extract_tool_response_chunk(self, tool_response: list[ToolInvokeMessage]) -> list:
+        """
+        Extract tool response text
+        """
+        all_chunks = []
+        node_data = cast(ToolNodeData, self.node_data)
+        icon = ToolManager.get_tool_icon(
+            tenant_id=self.tenant_id,
+            provider_type=node_data.provider_type,
+            provider_id=node_data.provider_id
+        )
+        for message in tool_response:
+            if message.type == ToolInvokeMessage.MessageType.CHUNK:
+                for chunk in message.message:
+                    chunk.icon = icon
+                    chunk.resource_from = node_data.title
+                    chunk.metadata = {
+                        '_source': 'tool'
+                    }
+                    all_chunks.append(chunk.to_dict())
+        return all_chunks
+
+
+
    @classmethod
    def _extract_variable_selector_to_variable_mapping(cls, node_data: ToolNodeData) -> dict[str, list[str]]:
        """
Author	SHA1	Message	Date
jyong	bd40d25bc6	add tool resource	2024-05-10 18:08:49 +08:00
jyong	919c45b639	add tool resource	2024-05-10 16:50:19 +08:00
jyong	95fae0438d	add tool resource	2024-05-09 18:24:51 +08:00