Compare commits

...

3 Commits

Author SHA1 Message Date
jyong
bd40d25bc6 add tool resource 2024-05-10 18:08:49 +08:00
jyong
919c45b639 add tool resource 2024-05-10 16:50:19 +08:00
jyong
95fae0438d add tool resource 2024-05-09 18:24:51 +08:00
10 changed files with 156 additions and 38 deletions

View File

@ -77,12 +77,13 @@ class ToolInvokeMessage(BaseModel):
LINK = "link"
BLOB = "blob"
IMAGE_LINK = "image_link"
CHUNK = "chunk"
type: MessageType = MessageType.TEXT
"""
plain text, image url or link url
"""
message: Union[str, bytes] = None
message: Union[str, bytes, list] = None
meta: dict[str, Any] = None
save_as: str = ''

View File

@ -40,7 +40,7 @@ class BingSearchTool(BuiltinTool):
news = response['news']['value'] if 'news' in response else []
computation = response['computation']['value'] if 'computation' in response else None
if result_type == 'link':
if result_type == 'link' or result_type == 'chunk':
results = []
if search_results:
for result in search_results:
@ -72,7 +72,7 @@ class BingSearchTool(BuiltinTool):
))
return results
else:
if result_type == 'text' or result_type == 'chunk':
# construct text
text = ''
if search_results:

View File

@ -6,6 +6,7 @@ from serpapi import GoogleSearch
from core.tools.entities.tool_entities import ToolInvokeMessage
from core.tools.tool.builtin_tool import BuiltinTool
from core.workflow.nodes.llm.knowledge_resource import KnowledgeResource
class HiddenPrints:
@ -35,7 +36,7 @@ class SerpAPI:
self.serpapi_api_key = api_key
self.search_engine = GoogleSearch
def run(self, query: str, **kwargs: Any) -> str:
def run(self, query: str, **kwargs: Any) -> str | list[KnowledgeResource]:
"""Run query through SerpAPI and parse result."""
typ = kwargs.get("result_type", "text")
return self._process_response(self.results(query), typ=typ)
@ -64,63 +65,79 @@ class SerpAPI:
return params
@staticmethod
def _process_response(res: dict, typ: str) -> str:
def _process_response(res: dict, typ: str) -> str | list[KnowledgeResource]:
"""Process response from SerpAPI."""
if "error" in res.keys():
raise ValueError(f"Got error from SerpAPI: {res['error']}")
if typ == "text":
toret = ""
chunks = []
toret = ""
if typ == "text" or typ == "chunk":
if "answer_box" in res.keys() and type(res["answer_box"]) == list:
res["answer_box"] = res["answer_box"][0] + "\n"
if "answer_box" in res.keys() and "answer" in res["answer_box"].keys():
toret += res["answer_box"]["answer"] + "\n"
chunks.append(KnowledgeResource(content=res["answer_box"]["answer"], title=res["answer_box"]["answer"]))
if "answer_box" in res.keys() and "snippet" in res["answer_box"].keys():
toret += res["answer_box"]["snippet"] + "\n"
chunks.append(
KnowledgeResource(content=res["answer_box"]["snippet"], title=res["answer_box"]["snippet"]))
if (
"answer_box" in res.keys()
and "snippet_highlighted_words" in res["answer_box"].keys()
"answer_box" in res.keys()
and "snippet_highlighted_words" in res["answer_box"].keys()
):
for item in res["answer_box"]["snippet_highlighted_words"]:
toret += item + "\n"
chunks.append(KnowledgeResource(content=item, title=item))
if (
"sports_results" in res.keys()
and "game_spotlight" in res["sports_results"].keys()
"sports_results" in res.keys()
and "game_spotlight" in res["sports_results"].keys()
):
toret += res["sports_results"]["game_spotlight"] + "\n"
chunks.append(KnowledgeResource(content=res["sports_results"]["game_spotlight"],
title=res["sports_results"]["game_spotlight"]))
if (
"shopping_results" in res.keys()
and "title" in res["shopping_results"][0].keys()
"shopping_results" in res.keys()
and "title" in res["shopping_results"][0].keys()
):
toret += res["shopping_results"][:3] + "\n"
chunks.append(KnowledgeResource(content=res["shopping_results"][:3], title=res["shopping_results"][:3]))
if (
"knowledge_graph" in res.keys()
and "description" in res["knowledge_graph"].keys()
"knowledge_graph" in res.keys()
and "description" in res["knowledge_graph"].keys()
):
toret = res["knowledge_graph"]["description"] + "\n"
chunks.append(KnowledgeResource(content=res["knowledge_graph"]["description"],
title=res["knowledge_graph"]["description"]))
if "snippet" in res["organic_results"][0].keys():
for item in res["organic_results"]:
toret += "content: " + item["snippet"] + "\n" + "link: " + item["link"] + "\n"
chunks.append(KnowledgeResource(content=item["snippet"], title=item["title"], url=item["link"]))
if (
"images_results" in res.keys()
and "thumbnail" in res["images_results"][0].keys()
"images_results" in res.keys()
and "thumbnail" in res["images_results"][0].keys()
):
thumbnails = [item["thumbnail"] for item in res["images_results"][:10]]
toret = thumbnails
chunks.append(KnowledgeResource(content=thumbnails, title=thumbnails))
if toret == "":
toret = "No good search result found"
elif typ == "link":
if typ == "link" or typ == "chunk":
if "knowledge_graph" in res.keys() and "title" in res["knowledge_graph"].keys() \
and "description_link" in res["knowledge_graph"].keys():
toret = res["knowledge_graph"]["description_link"]
chunks.append(KnowledgeResource(content=res["knowledge_graph"]["description"],
title=res["knowledge_graph"]["title"],
url=res["knowledge_graph"]["knowledge_graph_search_link"])
)
elif "knowledge_graph" in res.keys() and "see_results_about" in res["knowledge_graph"].keys() \
and len(res["knowledge_graph"]["see_results_about"]) > 0:
and len(res["knowledge_graph"]["see_results_about"]) > 0:
see_result_about = res["knowledge_graph"]["see_results_about"]
toret = ""
for item in see_result_about:
if "name" not in item.keys() or "link" not in item.keys():
continue
toret += f"[{item['name']}]({item['link']})\n"
chunks.append(KnowledgeResource(content=f"[{item['name']}]({item['link']})\n", title=item['name'], url=item['link']))
elif "organic_results" in res.keys() and len(res["organic_results"]) > 0:
organic_results = res["organic_results"]
toret = ""
@ -128,6 +145,7 @@ class SerpAPI:
if "title" not in item.keys() or "link" not in item.keys():
continue
toret += f"[{item['title']}]({item['link']})\n"
chunks.append(KnowledgeResource(content=f"[{item['title']}]({item['link']})\n", title=item['title'], url=item['link']))
elif "related_questions" in res.keys() and len(res["related_questions"]) > 0:
related_questions = res["related_questions"]
toret = ""
@ -135,6 +153,7 @@ class SerpAPI:
if "question" not in item.keys() or "link" not in item.keys():
continue
toret += f"[{item['question']}]({item['link']})\n"
chunks.append(KnowledgeResource(content=f"[{item['question']}]({item['link']})\n", title=item['title'], url=item['link']))
elif "related_searches" in res.keys() and len(res["related_searches"]) > 0:
related_searches = res["related_searches"]
toret = ""
@ -142,15 +161,19 @@ class SerpAPI:
if "query" not in item.keys() or "link" not in item.keys():
continue
toret += f"[{item['query']}]({item['link']})\n"
chunks.append(KnowledgeResource(content=f"[{item['query']}]({item['link']})\n", title=item['query'], url=item['link']))
else:
toret = "No good search result found"
if typ == "chunk":
return chunks
return toret
class GoogleSearchTool(BuiltinTool):
def _invoke(self,
def _invoke(self,
user_id: str,
tool_parameters: dict[str, Any],
) -> Union[ToolInvokeMessage, list[ToolInvokeMessage]]:
tool_parameters: dict[str, Any],
) -> Union[ToolInvokeMessage, list[ToolInvokeMessage]]:
"""
invoke tools
"""
@ -160,5 +183,9 @@ class GoogleSearchTool(BuiltinTool):
result = SerpAPI(api_key).run(query, result_type=result_type)
if result_type == 'text':
return self.create_text_message(text=result)
return self.create_link_message(link=result)
elif result_type == 'link':
return self.create_link_message(link=result)
elif result_type == 'chunk':
return self.create_chunk_message(chunks=result)
else:
raise ValueError(f"Invalid result type: {result_type}")

View File

@ -39,6 +39,11 @@ parameters:
en_US: link
zh_Hans: 链接
pt_BR: link
- value: chunk
label:
en_US: chunk
zh_Hans: 分段
pt_BR: chunk
default: link
label:
en_US: Result type

View File

@ -1,6 +1,6 @@
from abc import ABC, abstractmethod
from enum import Enum
from typing import Any, Optional, Union
from typing import Any, Optional, Union, List
from pydantic import BaseModel, validator
@ -15,6 +15,7 @@ from core.tools.entities.tool_entities import (
ToolRuntimeVariablePool,
)
from core.tools.tool_file_manager import ToolFileManager
from core.workflow.nodes.llm.knowledge_resource import KnowledgeResource
class Tool(BaseModel, ABC):
@ -337,6 +338,8 @@ class Tool(BaseModel, ABC):
create an image message
:param image: the url of the image
:param save_as: the save_as
:return: the image message
"""
return ToolInvokeMessage(type=ToolInvokeMessage.MessageType.IMAGE,
@ -348,6 +351,7 @@ class Tool(BaseModel, ABC):
create a link message
:param link: the url of the link
:param save_as: the save_as
:return: the link message
"""
return ToolInvokeMessage(type=ToolInvokeMessage.MessageType.LINK,
@ -359,21 +363,37 @@ class Tool(BaseModel, ABC):
create a text message
:param text: the text
:param save_as: the save_as
:return: the text message
"""
return ToolInvokeMessage(type=ToolInvokeMessage.MessageType.TEXT,
message=text,
save_as=save_as
)
def create_chunk_message(self, chunks: List[KnowledgeResource], save_as: str = '') -> ToolInvokeMessage:
"""
create a chunk message
:param chunks: the chunks
:param save_as: the save_as
:return: the text message
"""
return ToolInvokeMessage(type=ToolInvokeMessage.MessageType.CHUNK,
message=chunks,
save_as=save_as
)
def create_blob_message(self, blob: bytes, meta: dict = None, save_as: str = '') -> ToolInvokeMessage:
"""
create a blob message
:param blob: the blob
:param meta: the meta
:param save_as: the save_as
:return: the blob message
"""
return ToolInvokeMessage(type=ToolInvokeMessage.MessageType.BLOB,
return ToolInvokeMessage(type=ToolInvokeMessage.MessageType.BLOB,
message=blob, meta=meta,
save_as=save_as
)

View File

@ -131,7 +131,7 @@ class ToolEngine:
# hit the callback handler
workflow_tool_callback.on_tool_end(
tool_name=tool.identity.name,
tool_name=tool.identity.name,
tool_inputs=tool_parameters,
tool_outputs=response
)

View File

@ -0,0 +1,27 @@
from typing import Any, Optional
from pydantic import BaseModel
class KnowledgeResource(BaseModel):
"""
Knowledge Resource.
"""
content: str
title: str
url: Optional[str] = None
icon: Optional[str] = None
resource_from: Optional[str] = None
score: Optional[float] = None
metadata: Optional[dict[str, Any]] = None
def to_dict(self):
return {
'content': self.content,
'title': self.title,
'url': self.url,
'icon': self.icon,
'resource_from': self.resource_from,
'score': self.score,
'metadata': self.metadata
}

View File

@ -22,6 +22,7 @@ from core.workflow.entities.node_entities import NodeRunMetadataKey, NodeRunResu
from core.workflow.entities.variable_pool import VariablePool
from core.workflow.nodes.base_node import BaseNode
from core.workflow.nodes.llm.entities import LLMNodeData, ModelConfig
from core.workflow.nodes.llm.knowledge_resource import KnowledgeResource
from core.workflow.utils.variable_template_parser import VariableTemplateParser
from extensions.ext_database import db
from models.model import Conversation
@ -262,7 +263,7 @@ class LLMNode(BaseNode):
for item in context_value:
if isinstance(item, str):
context_str += item + '\n'
else:
elif isinstance(item, dict):
if 'content' not in item:
raise ValueError(f'Invalid context structure: {item}')
@ -271,6 +272,12 @@ class LLMNode(BaseNode):
retriever_resource = self._convert_to_original_retriever_resource(item)
if retriever_resource:
original_retriever_resource.append(retriever_resource)
elif isinstance(item, KnowledgeResource):
context_str += item.content + '\n'
retriever_resource = self._convert_to_original_retriever_resource(item.to_dict())
if retriever_resource:
original_retriever_resource.append(retriever_resource)
if self.callbacks and original_retriever_resource:
for callback in self.callbacks:
@ -311,6 +318,9 @@ class LLMNode(BaseNode):
}
return source
if ('metadata' in context_dict and '_source' in context_dict['metadata']
and context_dict['metadata']['_source'] == 'tool'):
return context_dict
return None

View File

@ -6,7 +6,7 @@ QUESTION_CLASSIFIER_SYSTEM_PROMPT = """
### Task
Your task is to assign one categories ONLY to the input text and only one category may be assigned returned in the output.Additionally, you need to extract the key words from the text that are related to the classification.
### Format
The input text is in the variable text_field.Categories are specified as a category list in the variable categories or left empty for automatic determination.Classification instructions may be included to improve the classification accuracy.
The input text is in the variable text_field.Categories are specified as a category list with two filed category_id and category_name in the variable categories .Classification instructions may be included to improve the classification accuracy.
### Constraint
DO NOT include anything other than the JSON array in your response.
### Memory
@ -24,7 +24,8 @@ QUESTION_CLASSIFIER_USER_PROMPT_1 = """
QUESTION_CLASSIFIER_ASSISTANT_PROMPT_1 = """
```json
{"category_id": "f5660049-284f-41a7-b301-fd24176a711c",
{"keywords": ["recently", "great experience", "company", "service", "prompt", "staff", "friendly"],
"category_id": "f5660049-284f-41a7-b301-fd24176a711c",
"category_name": "Customer Service"}
```
"""
@ -37,7 +38,8 @@ QUESTION_CLASSIFIER_USER_PROMPT_2 = """
QUESTION_CLASSIFIER_ASSISTANT_PROMPT_2 = """
```json
{"category_id": "f6ff5bc3-aca0-4e4a-8627-e760d0aca78f",
{"keywords": ["bad service", "slow", "food", "tip", "terrible", "waitresses"],
"category_id": "f6ff5bc3-aca0-4e4a-8627-e760d0aca78f",
"category_name": "Experience"}
```
"""
@ -61,9 +63,9 @@ DO NOT include anything other than the JSON array in your response.
Here is the chat example between human and assistant, inside <example></example> XML tags.
<example>
User:{{"input_text": ["I recently had a great experience with your company. The service was prompt and the staff was very friendly."], "categories": [{{"category_id":"f5660049-284f-41a7-b301-fd24176a711c","category_name":"Customer Service"}},{{"category_id":"8d007d06-f2c9-4be5-8ff6-cd4381c13c60","category_name":"Satisfaction"}},{{"category_id":"5fbbbb18-9843-466d-9b8e-b9bfbb9482c8","category_name":"Sales"}},{{"category_id":"23623c75-7184-4a2e-8226-466c2e4631e4","category_name":"Product"}}], "classification_instructions": ["classify the text based on the feedback provided by customer"]}}
Assistant:{{"category_id": "f5660049-284f-41a7-b301-fd24176a711c","category_name": "Customer Service"}}
Assistant:{{"keywords": ["recently", "great experience", "company", "service", "prompt", "staff", "friendly"],"category_id": "f5660049-284f-41a7-b301-fd24176a711c","category_name": "Customer Service"}}
User:{{"input_text": ["bad service, slow to bring the food"], "categories": [{{"category_id":"80fb86a0-4454-4bf5-924c-f253fdd83c02","category_name":"Food Quality"}},{{"category_id":"f6ff5bc3-aca0-4e4a-8627-e760d0aca78f","category_name":"Experience"}},{{"category_id":"cc771f63-74e7-4c61-882e-3eda9d8ba5d7","category_name":"Price"}}], "classification_instructions": []}}
Assistant:{{"category_id": "f6ff5bc3-aca0-4e4a-8627-e760d0aca78f","category_name": "Customer Service"}}
Assistant:{{"keywords": ["bad service", "slow", "food", "tip", "terrible", "waitresses"],"category_id": "f6ff5bc3-aca0-4e4a-8627-e760d0aca78f","category_name": "Customer Service"}}
</example>
### Memory
Here is the chat histories between human and assistant, inside <histories></histories> XML tags.

View File

@ -70,13 +70,14 @@ class ToolNode(BaseNode):
)
# convert tool messages
plain_text, files = self._convert_tool_messages(messages)
plain_text, files, chunks = self._convert_tool_messages(messages)
return NodeRunResult(
status=WorkflowNodeExecutionStatus.SUCCEEDED,
outputs={
'text': plain_text,
'files': files
'files': files,
'chunks': chunks
},
metadata={
NodeRunMetadataKey.TOOL_INFO: tool_info
@ -111,7 +112,7 @@ class ToolNode(BaseNode):
return template_parser.format(inputs)
def _convert_tool_messages(self, messages: list[ToolInvokeMessage]) -> tuple[str, list[FileVar]]:
def _convert_tool_messages(self, messages: list[ToolInvokeMessage]) -> tuple[str, list[FileVar], list]:
"""
Convert ToolInvokeMessages into tuple[plain_text, files]
"""
@ -125,8 +126,9 @@ class ToolNode(BaseNode):
# extract plain text and files
files = self._extract_tool_response_binary(messages)
plain_text = self._extract_tool_response_text(messages)
chunks = self._extract_tool_response_chunk(messages)
return plain_text, files
return plain_text, files, chunks
def _extract_tool_response_binary(self, tool_response: list[ToolInvokeMessage]) -> list[FileVar]:
"""
@ -180,6 +182,30 @@ class ToolNode(BaseNode):
for message in tool_response
])
def _extract_tool_response_chunk(self, tool_response: list[ToolInvokeMessage]) -> list:
"""
Extract tool response text
"""
all_chunks = []
node_data = cast(ToolNodeData, self.node_data)
icon = ToolManager.get_tool_icon(
tenant_id=self.tenant_id,
provider_type=node_data.provider_type,
provider_id=node_data.provider_id
)
for message in tool_response:
if message.type == ToolInvokeMessage.MessageType.CHUNK:
for chunk in message.message:
chunk.icon = icon
chunk.resource_from = node_data.title
chunk.metadata = {
'_source': 'tool'
}
all_chunks.append(chunk.to_dict())
return all_chunks
@classmethod
def _extract_variable_selector_to_variable_mapping(cls, node_data: ToolNodeData) -> dict[str, list[str]]:
"""