Merge branch 'feat/support-knowledge-metadata' into deploy/dev

This commit is contained in:
jyong 2025-03-05 16:11:04 +08:00
commit 732b56bf3b
5 changed files with 164 additions and 25 deletions

View File

@ -0,0 +1,66 @@
METADATA_FILTER_SYSTEM_PROMPT = """
### Job Description',
You are a text metadata extract engine that extract text's metadata based on user input and set the metadata value
### Task
Your task is to ONLY extract the metadatas that exist in the input text from the provided metadata list and Use the following operators ["=", "!=", ">", "<", ">=", "<="] to express logical relationships, then return result in JSON format with the key "metadata_fields" and value "metadata_field_value" and comparison operator "comparison_operator".
### Format
The input text is in the variable input_text. Metadata are specified as a list in the variable metadata_fields.
### Constraint
DO NOT include anything other than the JSON array in your response.
""" # noqa: E501
METADATA_FILTER_USER_PROMPT_1 = """
{ "input_text": "I want to know which companys email address test@example.com is?",
"metadata_fields": ["filename", "email", "phone", "address"]
}
"""
METADATA_FILTER_ASSISTANT_PROMPT_1 = """
```json
{"metadata_map": [
{"metadata_field_name": "email", "metadata_field_value": "test@example.com", "comparison_operator": "="}
]
}
```
"""
METADATA_FILTER_USER_PROMPT_2 = """
{"input_text": "What are the movies with a score of more than 9 in 2024?",
"metadata_fields": ["name", "year", "rating", "country"]}
"""
METADATA_FILTER_ASSISTANT_PROMPT_2 = """
```json
{"metadata_map": [
{"metadata_field_name": "year", "metadata_field_value": "2024", "comparison_operator": "="},
{"metadata_field_name": "rating", "metadata_field_value": "9", "comparison_operator": ">"},
]}
```
"""
METADATA_FILTER_USER_PROMPT_3 = """
'{{"input_text": "{input_text}",',
'"metadata_fields": {metadata_fields}}}'
"""
METADATA_FILTER_COMPLETION_PROMPT = """
### Job Description
You are a text metadata extract engine that extract text's metadata based on user input and set the metadata value
### Task
# Your task is to ONLY extract the metadatas that exist in the input text from the provided metadata list and Use the following operators ["=", "!=", ">", "<", ">=", "<="] to express logical relationships, then return result in JSON format with the key "metadata_fields" and value "metadata_field_value" and comparison operator "comparison_operator".
### Format
The input text is in the variable input_text. Metadata are specified as a list in the variable metadata_fields.
### Constraint
DO NOT include anything other than the JSON array in your response.
### Example
Here is the chat example between human and assistant, inside <example></example> XML tags.
<example>
User:{{"input_text": ["I want to know which companys email address test@example.com is?"], "metadata_fields": ["filename", "email", "phone", "address"]}}
Assistant:{{"metadata_map": [{{"metadata_field_name": "email", "metadata_field_value": "test@example.com", "comparison_operator": "="}}]}}
User:{{"input_text": "What are the movies with a score of more than 9 in 2024?", "metadata_fields": ["name", "year", "rating", "country"]}}
Assistant:{{"metadata_map": [{{"metadata_field_name": "year", "metadata_field_value": "2024", "comparison_operator": "="}, {{"metadata_field_name": "rating", "metadata_field_value": "9", "comparison_operator": ">"}}]}}
</example>
### User Input
{{"input_text" : "{input_text}", "metadata_fields" : {metadata_fields}}}
### Assistant Output
""" # noqa: E501

View File

@ -0,0 +1,66 @@
METADATA_FILTER_SYSTEM_PROMPT = """
### Job Description',
You are a text metadata extract engine that extract text's metadata based on user input and set the metadata value
### Task
Your task is to ONLY extract the metadatas that exist in the input text from the provided metadata list and Use the following operators ["=", "!=", ">", "<", ">=", "<="] to express logical relationships, then return result in JSON format with the key "metadata_fields" and value "metadata_field_value" and comparison operator "comparison_operator".
### Format
The input text is in the variable input_text. Metadata are specified as a list in the variable metadata_fields.
### Constraint
DO NOT include anything other than the JSON array in your response.
""" # noqa: E501
METADATA_FILTER_USER_PROMPT_1 = """
{ "input_text": "I want to know which companys email address test@example.com is?",
"metadata_fields": ["filename", "email", "phone", "address"]
}
"""
METADATA_FILTER_ASSISTANT_PROMPT_1 = """
```json
{"metadata_map": [
{"metadata_field_name": "email", "metadata_field_value": "test@example.com", "comparison_operator": "="}
]
}
```
"""
METADATA_FILTER_USER_PROMPT_2 = """
{"input_text": "What are the movies with a score of more than 9 in 2024?",
"metadata_fields": ["name", "year", "rating", "country"]}
"""
METADATA_FILTER_ASSISTANT_PROMPT_2 = """
```json
{"metadata_map": [
{"metadata_field_name": "year", "metadata_field_value": "2024", "comparison_operator": "="},
{"metadata_field_name": "rating", "metadata_field_value": "9", "comparison_operator": ">"},
]}
```
"""
METADATA_FILTER_USER_PROMPT_3 = """
'{{"input_text": "{input_text}",',
'"metadata_fields": {metadata_fields}}}'
"""
METADATA_FILTER_COMPLETION_PROMPT = """
### Job Description
You are a text metadata extract engine that extract text's metadata based on user input and set the metadata value
### Task
# Your task is to ONLY extract the metadatas that exist in the input text from the provided metadata list and Use the following operators ["=", "!=", ">", "<", ">=", "<="] to express logical relationships, then return result in JSON format with the key "metadata_fields" and value "metadata_field_value" and comparison operator "comparison_operator".
### Format
The input text is in the variable input_text. Metadata are specified as a list in the variable metadata_fields.
### Constraint
DO NOT include anything other than the JSON array in your response.
### Example
Here is the chat example between human and assistant, inside <example></example> XML tags.
<example>
User:{{"input_text": ["I want to know which companys email address test@example.com is?"], "metadata_fields": ["filename", "email", "phone", "address"]}}
Assistant:{{"metadata_map": [{{"metadata_field_name": "email", "metadata_field_value": "test@example.com", "comparison_operator": "="}}]}}
User:{{"input_text": "What are the movies with a score of more than 9 in 2024?", "metadata_fields": ["name", "year", "rating", "country"]}}
Assistant:{{"metadata_map": [{{"metadata_field_name": "year", "metadata_field_value": "2024", "comparison_operator": "="}, {{"metadata_field_name": "rating", "metadata_field_value": "9", "comparison_operator": ">"}}]}}
</example>
### User Input
{{"input_text" : "{input_text}", "metadata_fields" : {metadata_fields}}}
### Assistant Output
""" # noqa: E501

View File

@ -442,7 +442,7 @@ class Document(db.Model): # type: ignore[name-defined]
"id": metadata.id, "id": metadata.id,
"name": metadata.name, "name": metadata.name,
"type": metadata.type, "type": metadata.type,
"value": self.doc_metadata.get(metadata.type), "value": self.doc_metadata.get(metadata.name),
} }
metadata_list.append(metadata_dict) metadata_list.append(metadata_dict)
# deal built-in fields # deal built-in fields

View File

@ -133,7 +133,7 @@ class MetadataArgs(BaseModel):
class MetadataUpdateArgs(BaseModel): class MetadataUpdateArgs(BaseModel):
name: str name: str
value: str value: Optional[str | int | float] = None
class MetadataValueUpdateArgs(BaseModel): class MetadataValueUpdateArgs(BaseModel):
@ -143,7 +143,7 @@ class MetadataValueUpdateArgs(BaseModel):
class MetadataDetail(BaseModel): class MetadataDetail(BaseModel):
id: str id: str
name: str name: str
value: str value: Optional[str | int | float] = None
class DocumentMetadataOperation(BaseModel): class DocumentMetadataOperation(BaseModel):

View File

@ -105,12 +105,15 @@ class MetadataService:
if documents: if documents:
for document in documents: for document in documents:
if not document.doc_metadata: if not document.doc_metadata:
document.doc_metadata = {} doc_metadata = {}
document.doc_metadata[BuiltInField.document_name] = document.name else:
document.doc_metadata[BuiltInField.uploader] = document.uploader doc_metadata = document.doc_metadata
document.doc_metadata[BuiltInField.upload_date] = document.upload_date.timestamp() doc_metadata[BuiltInField.document_name.value] = document.name
document.doc_metadata[BuiltInField.last_update_date] = document.last_update_date.timestamp() doc_metadata[BuiltInField.uploader.value] = document.uploader
document.doc_metadata[BuiltInField.source] = document.data_source_type doc_metadata[BuiltInField.upload_date.value] = document.upload_date.timestamp()
doc_metadata[BuiltInField.last_update_date.value] = document.last_update_date.timestamp()
doc_metadata[BuiltInField.source.value] = document.data_source_type
document.doc_metadata = doc_metadata
db.session.add(document) db.session.add(document)
db.session.commit() db.session.commit()
except Exception: except Exception:
@ -131,11 +134,13 @@ class MetadataService:
document_ids = [] document_ids = []
if documents: if documents:
for document in documents: for document in documents:
document.doc_metadata.pop(BuiltInField.document_name) doc_metadata = document.doc_metadata
document.doc_metadata.pop(BuiltInField.uploader) doc_metadata.pop(BuiltInField.document_name)
document.doc_metadata.pop(BuiltInField.upload_date) doc_metadata.pop(BuiltInField.uploader)
document.doc_metadata.pop(BuiltInField.last_update_date) doc_metadata.pop(BuiltInField.upload_date)
document.doc_metadata.pop(BuiltInField.source) doc_metadata.pop(BuiltInField.last_update_date)
doc_metadata.pop(BuiltInField.source)
document.doc_metadata = doc_metadata
db.session.add(document) db.session.add(document)
document_ids.append(document.id) document_ids.append(document.id)
db.session.commit() db.session.commit()
@ -150,18 +155,21 @@ class MetadataService:
lock_key = f"document_metadata_lock_{operation.document_id}" lock_key = f"document_metadata_lock_{operation.document_id}"
try: try:
MetadataService.knowledge_base_metadata_lock_check(None, operation.document_id) MetadataService.knowledge_base_metadata_lock_check(None, operation.document_id)
document = DocumentService.get_document(operation.document_id) document = DocumentService.get_document(dataset.id, operation.document_id)
if document is None: if document is None:
raise ValueError("Document not found.") raise ValueError("Document not found.")
document.doc_metadata = {} doc_metadata = {}
for metadata_value in metadata_args.fields: for metadata_value in operation.metadata_list:
document.doc_metadata[metadata_value.name] = metadata_value.value doc_metadata[metadata_value.name] = metadata_value.value
if dataset.built_in_fields: if dataset.built_in_field_enabled:
document.doc_metadata[BuiltInField.document_name] = document.name doc_metadata[BuiltInField.document_name.value] = document.name
document.doc_metadata[BuiltInField.uploader] = document.uploader doc_metadata[BuiltInField.uploader.value] = document.uploader
document.doc_metadata[BuiltInField.upload_date] = document.upload_date.timestamp() doc_metadata[BuiltInField.upload_date.value] = document.upload_date.timestamp()
document.doc_metadata[BuiltInField.last_update_date] = document.last_update_date.timestamp() doc_metadata[BuiltInField.last_update_date.value] = document.last_update_date.timestamp()
document.doc_metadata[BuiltInField.source] = document.data_source_type doc_metadata[BuiltInField.source.value] = document.data_source_type
document.doc_metadata = doc_metadata
db.session.add(document)
db.session.commit()
# deal metadata bindding # deal metadata bindding
DatasetMetadataBinding.query.filter_by(document_id=operation.document_id).delete() DatasetMetadataBinding.query.filter_by(document_id=operation.document_id).delete()
for metadata_value in operation.metadata_list: for metadata_value in operation.metadata_list:
@ -173,7 +181,6 @@ class MetadataService:
created_by=current_user.id, created_by=current_user.id,
) )
db.session.add(dataset_metadata_binding) db.session.add(dataset_metadata_binding)
db.session.add(document)
db.session.commit() db.session.commit()
except Exception: except Exception:
logging.exception("Update documents metadata failed") logging.exception("Update documents metadata failed")