diff --git a/api/core/rag/retrieval/template_prompts.py b/api/core/rag/retrieval/template_prompts.py
new file mode 100644
index 0000000000..7abd55d798
--- /dev/null
+++ b/api/core/rag/retrieval/template_prompts.py
@@ -0,0 +1,66 @@
+METADATA_FILTER_SYSTEM_PROMPT = """
+ ### Job Description',
+ You are a text metadata extract engine that extract text's metadata based on user input and set the metadata value
+ ### Task
+ Your task is to ONLY extract the metadatas that exist in the input text from the provided metadata list and Use the following operators ["=", "!=", ">", "<", ">=", "<="] to express logical relationships, then return result in JSON format with the key "metadata_fields" and value "metadata_field_value" and comparison operator "comparison_operator".
+ ### Format
+ The input text is in the variable input_text. Metadata are specified as a list in the variable metadata_fields.
+ ### Constraint
+ DO NOT include anything other than the JSON array in your response.
+""" # noqa: E501
+
+METADATA_FILTER_USER_PROMPT_1 = """
+ { "input_text": "I want to know which company’s email address test@example.com is?",
+ "metadata_fields": ["filename", "email", "phone", "address"]
+ }
+"""
+
+METADATA_FILTER_ASSISTANT_PROMPT_1 = """
+```json
+ {"metadata_map": [
+ {"metadata_field_name": "email", "metadata_field_value": "test@example.com", "comparison_operator": "="}
+ ]
+ }
+```
+"""
+
+METADATA_FILTER_USER_PROMPT_2 = """
+ {"input_text": "What are the movies with a score of more than 9 in 2024?",
+ "metadata_fields": ["name", "year", "rating", "country"]}
+"""
+
+METADATA_FILTER_ASSISTANT_PROMPT_2 = """
+```json
+ {"metadata_map": [
+ {"metadata_field_name": "year", "metadata_field_value": "2024", "comparison_operator": "="},
+ {"metadata_field_name": "rating", "metadata_field_value": "9", "comparison_operator": ">"},
+ ]}
+```
+"""
+
+METADATA_FILTER_USER_PROMPT_3 = """
+ '{{"input_text": "{input_text}",',
+ '"metadata_fields": {metadata_fields}}}'
+"""
+
+METADATA_FILTER_COMPLETION_PROMPT = """
+### Job Description
+You are a text metadata extract engine that extract text's metadata based on user input and set the metadata value
+### Task
+# Your task is to ONLY extract the metadatas that exist in the input text from the provided metadata list and Use the following operators ["=", "!=", ">", "<", ">=", "<="] to express logical relationships, then return result in JSON format with the key "metadata_fields" and value "metadata_field_value" and comparison operator "comparison_operator".
+### Format
+The input text is in the variable input_text. Metadata are specified as a list in the variable metadata_fields.
+### Constraint
+DO NOT include anything other than the JSON array in your response.
+### Example
+Here is the chat example between human and assistant, inside XML tags.
+
+User:{{"input_text": ["I want to know which company’s email address test@example.com is?"], "metadata_fields": ["filename", "email", "phone", "address"]}}
+Assistant:{{"metadata_map": [{{"metadata_field_name": "email", "metadata_field_value": "test@example.com", "comparison_operator": "="}}]}}
+User:{{"input_text": "What are the movies with a score of more than 9 in 2024?", "metadata_fields": ["name", "year", "rating", "country"]}}
+Assistant:{{"metadata_map": [{{"metadata_field_name": "year", "metadata_field_value": "2024", "comparison_operator": "="}, {{"metadata_field_name": "rating", "metadata_field_value": "9", "comparison_operator": ">"}}]}}
+
+### User Input
+{{"input_text" : "{input_text}", "metadata_fields" : {metadata_fields}}}
+### Assistant Output
+""" # noqa: E501
diff --git a/api/core/workflow/nodes/knowledge_retrieval/template_prompts.py b/api/core/workflow/nodes/knowledge_retrieval/template_prompts.py
new file mode 100644
index 0000000000..7abd55d798
--- /dev/null
+++ b/api/core/workflow/nodes/knowledge_retrieval/template_prompts.py
@@ -0,0 +1,66 @@
+METADATA_FILTER_SYSTEM_PROMPT = """
+ ### Job Description',
+ You are a text metadata extract engine that extract text's metadata based on user input and set the metadata value
+ ### Task
+ Your task is to ONLY extract the metadatas that exist in the input text from the provided metadata list and Use the following operators ["=", "!=", ">", "<", ">=", "<="] to express logical relationships, then return result in JSON format with the key "metadata_fields" and value "metadata_field_value" and comparison operator "comparison_operator".
+ ### Format
+ The input text is in the variable input_text. Metadata are specified as a list in the variable metadata_fields.
+ ### Constraint
+ DO NOT include anything other than the JSON array in your response.
+""" # noqa: E501
+
+METADATA_FILTER_USER_PROMPT_1 = """
+ { "input_text": "I want to know which company’s email address test@example.com is?",
+ "metadata_fields": ["filename", "email", "phone", "address"]
+ }
+"""
+
+METADATA_FILTER_ASSISTANT_PROMPT_1 = """
+```json
+ {"metadata_map": [
+ {"metadata_field_name": "email", "metadata_field_value": "test@example.com", "comparison_operator": "="}
+ ]
+ }
+```
+"""
+
+METADATA_FILTER_USER_PROMPT_2 = """
+ {"input_text": "What are the movies with a score of more than 9 in 2024?",
+ "metadata_fields": ["name", "year", "rating", "country"]}
+"""
+
+METADATA_FILTER_ASSISTANT_PROMPT_2 = """
+```json
+ {"metadata_map": [
+ {"metadata_field_name": "year", "metadata_field_value": "2024", "comparison_operator": "="},
+ {"metadata_field_name": "rating", "metadata_field_value": "9", "comparison_operator": ">"},
+ ]}
+```
+"""
+
+METADATA_FILTER_USER_PROMPT_3 = """
+ '{{"input_text": "{input_text}",',
+ '"metadata_fields": {metadata_fields}}}'
+"""
+
+METADATA_FILTER_COMPLETION_PROMPT = """
+### Job Description
+You are a text metadata extract engine that extract text's metadata based on user input and set the metadata value
+### Task
+# Your task is to ONLY extract the metadatas that exist in the input text from the provided metadata list and Use the following operators ["=", "!=", ">", "<", ">=", "<="] to express logical relationships, then return result in JSON format with the key "metadata_fields" and value "metadata_field_value" and comparison operator "comparison_operator".
+### Format
+The input text is in the variable input_text. Metadata are specified as a list in the variable metadata_fields.
+### Constraint
+DO NOT include anything other than the JSON array in your response.
+### Example
+Here is the chat example between human and assistant, inside XML tags.
+
+User:{{"input_text": ["I want to know which company’s email address test@example.com is?"], "metadata_fields": ["filename", "email", "phone", "address"]}}
+Assistant:{{"metadata_map": [{{"metadata_field_name": "email", "metadata_field_value": "test@example.com", "comparison_operator": "="}}]}}
+User:{{"input_text": "What are the movies with a score of more than 9 in 2024?", "metadata_fields": ["name", "year", "rating", "country"]}}
+Assistant:{{"metadata_map": [{{"metadata_field_name": "year", "metadata_field_value": "2024", "comparison_operator": "="}, {{"metadata_field_name": "rating", "metadata_field_value": "9", "comparison_operator": ">"}}]}}
+
+### User Input
+{{"input_text" : "{input_text}", "metadata_fields" : {metadata_fields}}}
+### Assistant Output
+""" # noqa: E501
diff --git a/api/models/dataset.py b/api/models/dataset.py
index 0e722a19d7..865ac92036 100644
--- a/api/models/dataset.py
+++ b/api/models/dataset.py
@@ -442,7 +442,7 @@ class Document(db.Model): # type: ignore[name-defined]
"id": metadata.id,
"name": metadata.name,
"type": metadata.type,
- "value": self.doc_metadata.get(metadata.type),
+ "value": self.doc_metadata.get(metadata.name),
}
metadata_list.append(metadata_dict)
# deal built-in fields
diff --git a/api/services/entities/knowledge_entities/knowledge_entities.py b/api/services/entities/knowledge_entities/knowledge_entities.py
index 7d0f545f9e..29e00ab68a 100644
--- a/api/services/entities/knowledge_entities/knowledge_entities.py
+++ b/api/services/entities/knowledge_entities/knowledge_entities.py
@@ -133,7 +133,7 @@ class MetadataArgs(BaseModel):
class MetadataUpdateArgs(BaseModel):
name: str
- value: str
+ value: Optional[str | int | float] = None
class MetadataValueUpdateArgs(BaseModel):
@@ -143,7 +143,7 @@ class MetadataValueUpdateArgs(BaseModel):
class MetadataDetail(BaseModel):
id: str
name: str
- value: str
+ value: Optional[str | int | float] = None
class DocumentMetadataOperation(BaseModel):
diff --git a/api/services/metadata_service.py b/api/services/metadata_service.py
index 45814ee066..fd0738772b 100644
--- a/api/services/metadata_service.py
+++ b/api/services/metadata_service.py
@@ -105,12 +105,15 @@ class MetadataService:
if documents:
for document in documents:
if not document.doc_metadata:
- document.doc_metadata = {}
- document.doc_metadata[BuiltInField.document_name] = document.name
- document.doc_metadata[BuiltInField.uploader] = document.uploader
- document.doc_metadata[BuiltInField.upload_date] = document.upload_date.timestamp()
- document.doc_metadata[BuiltInField.last_update_date] = document.last_update_date.timestamp()
- document.doc_metadata[BuiltInField.source] = document.data_source_type
+ doc_metadata = {}
+ else:
+ doc_metadata = document.doc_metadata
+ doc_metadata[BuiltInField.document_name.value] = document.name
+ doc_metadata[BuiltInField.uploader.value] = document.uploader
+ doc_metadata[BuiltInField.upload_date.value] = document.upload_date.timestamp()
+ doc_metadata[BuiltInField.last_update_date.value] = document.last_update_date.timestamp()
+ doc_metadata[BuiltInField.source.value] = document.data_source_type
+ document.doc_metadata = doc_metadata
db.session.add(document)
db.session.commit()
except Exception:
@@ -131,11 +134,13 @@ class MetadataService:
document_ids = []
if documents:
for document in documents:
- document.doc_metadata.pop(BuiltInField.document_name)
- document.doc_metadata.pop(BuiltInField.uploader)
- document.doc_metadata.pop(BuiltInField.upload_date)
- document.doc_metadata.pop(BuiltInField.last_update_date)
- document.doc_metadata.pop(BuiltInField.source)
+ doc_metadata = document.doc_metadata
+ doc_metadata.pop(BuiltInField.document_name)
+ doc_metadata.pop(BuiltInField.uploader)
+ doc_metadata.pop(BuiltInField.upload_date)
+ doc_metadata.pop(BuiltInField.last_update_date)
+ doc_metadata.pop(BuiltInField.source)
+ document.doc_metadata = doc_metadata
db.session.add(document)
document_ids.append(document.id)
db.session.commit()
@@ -150,18 +155,21 @@ class MetadataService:
lock_key = f"document_metadata_lock_{operation.document_id}"
try:
MetadataService.knowledge_base_metadata_lock_check(None, operation.document_id)
- document = DocumentService.get_document(operation.document_id)
+ document = DocumentService.get_document(dataset.id, operation.document_id)
if document is None:
raise ValueError("Document not found.")
- document.doc_metadata = {}
- for metadata_value in metadata_args.fields:
- document.doc_metadata[metadata_value.name] = metadata_value.value
- if dataset.built_in_fields:
- document.doc_metadata[BuiltInField.document_name] = document.name
- document.doc_metadata[BuiltInField.uploader] = document.uploader
- document.doc_metadata[BuiltInField.upload_date] = document.upload_date.timestamp()
- document.doc_metadata[BuiltInField.last_update_date] = document.last_update_date.timestamp()
- document.doc_metadata[BuiltInField.source] = document.data_source_type
+ doc_metadata = {}
+ for metadata_value in operation.metadata_list:
+ doc_metadata[metadata_value.name] = metadata_value.value
+ if dataset.built_in_field_enabled:
+ doc_metadata[BuiltInField.document_name.value] = document.name
+ doc_metadata[BuiltInField.uploader.value] = document.uploader
+ doc_metadata[BuiltInField.upload_date.value] = document.upload_date.timestamp()
+ doc_metadata[BuiltInField.last_update_date.value] = document.last_update_date.timestamp()
+ doc_metadata[BuiltInField.source.value] = document.data_source_type
+ document.doc_metadata = doc_metadata
+ db.session.add(document)
+ db.session.commit()
# deal metadata bindding
DatasetMetadataBinding.query.filter_by(document_id=operation.document_id).delete()
for metadata_value in operation.metadata_list:
@@ -173,7 +181,6 @@ class MetadataService:
created_by=current_user.id,
)
db.session.add(dataset_metadata_binding)
- db.session.add(document)
db.session.commit()
except Exception:
logging.exception("Update documents metadata failed")