Merge branch 'feat/support-knowledge-metadata' into deploy/dev
This commit is contained in:
commit
732b56bf3b
66
api/core/rag/retrieval/template_prompts.py
Normal file
66
api/core/rag/retrieval/template_prompts.py
Normal file
@ -0,0 +1,66 @@
|
|||||||
|
METADATA_FILTER_SYSTEM_PROMPT = """
|
||||||
|
### Job Description',
|
||||||
|
You are a text metadata extract engine that extract text's metadata based on user input and set the metadata value
|
||||||
|
### Task
|
||||||
|
Your task is to ONLY extract the metadatas that exist in the input text from the provided metadata list and Use the following operators ["=", "!=", ">", "<", ">=", "<="] to express logical relationships, then return result in JSON format with the key "metadata_fields" and value "metadata_field_value" and comparison operator "comparison_operator".
|
||||||
|
### Format
|
||||||
|
The input text is in the variable input_text. Metadata are specified as a list in the variable metadata_fields.
|
||||||
|
### Constraint
|
||||||
|
DO NOT include anything other than the JSON array in your response.
|
||||||
|
""" # noqa: E501
|
||||||
|
|
||||||
|
METADATA_FILTER_USER_PROMPT_1 = """
|
||||||
|
{ "input_text": "I want to know which company’s email address test@example.com is?",
|
||||||
|
"metadata_fields": ["filename", "email", "phone", "address"]
|
||||||
|
}
|
||||||
|
"""
|
||||||
|
|
||||||
|
METADATA_FILTER_ASSISTANT_PROMPT_1 = """
|
||||||
|
```json
|
||||||
|
{"metadata_map": [
|
||||||
|
{"metadata_field_name": "email", "metadata_field_value": "test@example.com", "comparison_operator": "="}
|
||||||
|
]
|
||||||
|
}
|
||||||
|
```
|
||||||
|
"""
|
||||||
|
|
||||||
|
METADATA_FILTER_USER_PROMPT_2 = """
|
||||||
|
{"input_text": "What are the movies with a score of more than 9 in 2024?",
|
||||||
|
"metadata_fields": ["name", "year", "rating", "country"]}
|
||||||
|
"""
|
||||||
|
|
||||||
|
METADATA_FILTER_ASSISTANT_PROMPT_2 = """
|
||||||
|
```json
|
||||||
|
{"metadata_map": [
|
||||||
|
{"metadata_field_name": "year", "metadata_field_value": "2024", "comparison_operator": "="},
|
||||||
|
{"metadata_field_name": "rating", "metadata_field_value": "9", "comparison_operator": ">"},
|
||||||
|
]}
|
||||||
|
```
|
||||||
|
"""
|
||||||
|
|
||||||
|
METADATA_FILTER_USER_PROMPT_3 = """
|
||||||
|
'{{"input_text": "{input_text}",',
|
||||||
|
'"metadata_fields": {metadata_fields}}}'
|
||||||
|
"""
|
||||||
|
|
||||||
|
METADATA_FILTER_COMPLETION_PROMPT = """
|
||||||
|
### Job Description
|
||||||
|
You are a text metadata extract engine that extract text's metadata based on user input and set the metadata value
|
||||||
|
### Task
|
||||||
|
# Your task is to ONLY extract the metadatas that exist in the input text from the provided metadata list and Use the following operators ["=", "!=", ">", "<", ">=", "<="] to express logical relationships, then return result in JSON format with the key "metadata_fields" and value "metadata_field_value" and comparison operator "comparison_operator".
|
||||||
|
### Format
|
||||||
|
The input text is in the variable input_text. Metadata are specified as a list in the variable metadata_fields.
|
||||||
|
### Constraint
|
||||||
|
DO NOT include anything other than the JSON array in your response.
|
||||||
|
### Example
|
||||||
|
Here is the chat example between human and assistant, inside <example></example> XML tags.
|
||||||
|
<example>
|
||||||
|
User:{{"input_text": ["I want to know which company’s email address test@example.com is?"], "metadata_fields": ["filename", "email", "phone", "address"]}}
|
||||||
|
Assistant:{{"metadata_map": [{{"metadata_field_name": "email", "metadata_field_value": "test@example.com", "comparison_operator": "="}}]}}
|
||||||
|
User:{{"input_text": "What are the movies with a score of more than 9 in 2024?", "metadata_fields": ["name", "year", "rating", "country"]}}
|
||||||
|
Assistant:{{"metadata_map": [{{"metadata_field_name": "year", "metadata_field_value": "2024", "comparison_operator": "="}, {{"metadata_field_name": "rating", "metadata_field_value": "9", "comparison_operator": ">"}}]}}
|
||||||
|
</example>
|
||||||
|
### User Input
|
||||||
|
{{"input_text" : "{input_text}", "metadata_fields" : {metadata_fields}}}
|
||||||
|
### Assistant Output
|
||||||
|
""" # noqa: E501
|
@ -0,0 +1,66 @@
|
|||||||
|
METADATA_FILTER_SYSTEM_PROMPT = """
|
||||||
|
### Job Description',
|
||||||
|
You are a text metadata extract engine that extract text's metadata based on user input and set the metadata value
|
||||||
|
### Task
|
||||||
|
Your task is to ONLY extract the metadatas that exist in the input text from the provided metadata list and Use the following operators ["=", "!=", ">", "<", ">=", "<="] to express logical relationships, then return result in JSON format with the key "metadata_fields" and value "metadata_field_value" and comparison operator "comparison_operator".
|
||||||
|
### Format
|
||||||
|
The input text is in the variable input_text. Metadata are specified as a list in the variable metadata_fields.
|
||||||
|
### Constraint
|
||||||
|
DO NOT include anything other than the JSON array in your response.
|
||||||
|
""" # noqa: E501
|
||||||
|
|
||||||
|
METADATA_FILTER_USER_PROMPT_1 = """
|
||||||
|
{ "input_text": "I want to know which company’s email address test@example.com is?",
|
||||||
|
"metadata_fields": ["filename", "email", "phone", "address"]
|
||||||
|
}
|
||||||
|
"""
|
||||||
|
|
||||||
|
METADATA_FILTER_ASSISTANT_PROMPT_1 = """
|
||||||
|
```json
|
||||||
|
{"metadata_map": [
|
||||||
|
{"metadata_field_name": "email", "metadata_field_value": "test@example.com", "comparison_operator": "="}
|
||||||
|
]
|
||||||
|
}
|
||||||
|
```
|
||||||
|
"""
|
||||||
|
|
||||||
|
METADATA_FILTER_USER_PROMPT_2 = """
|
||||||
|
{"input_text": "What are the movies with a score of more than 9 in 2024?",
|
||||||
|
"metadata_fields": ["name", "year", "rating", "country"]}
|
||||||
|
"""
|
||||||
|
|
||||||
|
METADATA_FILTER_ASSISTANT_PROMPT_2 = """
|
||||||
|
```json
|
||||||
|
{"metadata_map": [
|
||||||
|
{"metadata_field_name": "year", "metadata_field_value": "2024", "comparison_operator": "="},
|
||||||
|
{"metadata_field_name": "rating", "metadata_field_value": "9", "comparison_operator": ">"},
|
||||||
|
]}
|
||||||
|
```
|
||||||
|
"""
|
||||||
|
|
||||||
|
METADATA_FILTER_USER_PROMPT_3 = """
|
||||||
|
'{{"input_text": "{input_text}",',
|
||||||
|
'"metadata_fields": {metadata_fields}}}'
|
||||||
|
"""
|
||||||
|
|
||||||
|
METADATA_FILTER_COMPLETION_PROMPT = """
|
||||||
|
### Job Description
|
||||||
|
You are a text metadata extract engine that extract text's metadata based on user input and set the metadata value
|
||||||
|
### Task
|
||||||
|
# Your task is to ONLY extract the metadatas that exist in the input text from the provided metadata list and Use the following operators ["=", "!=", ">", "<", ">=", "<="] to express logical relationships, then return result in JSON format with the key "metadata_fields" and value "metadata_field_value" and comparison operator "comparison_operator".
|
||||||
|
### Format
|
||||||
|
The input text is in the variable input_text. Metadata are specified as a list in the variable metadata_fields.
|
||||||
|
### Constraint
|
||||||
|
DO NOT include anything other than the JSON array in your response.
|
||||||
|
### Example
|
||||||
|
Here is the chat example between human and assistant, inside <example></example> XML tags.
|
||||||
|
<example>
|
||||||
|
User:{{"input_text": ["I want to know which company’s email address test@example.com is?"], "metadata_fields": ["filename", "email", "phone", "address"]}}
|
||||||
|
Assistant:{{"metadata_map": [{{"metadata_field_name": "email", "metadata_field_value": "test@example.com", "comparison_operator": "="}}]}}
|
||||||
|
User:{{"input_text": "What are the movies with a score of more than 9 in 2024?", "metadata_fields": ["name", "year", "rating", "country"]}}
|
||||||
|
Assistant:{{"metadata_map": [{{"metadata_field_name": "year", "metadata_field_value": "2024", "comparison_operator": "="}, {{"metadata_field_name": "rating", "metadata_field_value": "9", "comparison_operator": ">"}}]}}
|
||||||
|
</example>
|
||||||
|
### User Input
|
||||||
|
{{"input_text" : "{input_text}", "metadata_fields" : {metadata_fields}}}
|
||||||
|
### Assistant Output
|
||||||
|
""" # noqa: E501
|
@ -442,7 +442,7 @@ class Document(db.Model): # type: ignore[name-defined]
|
|||||||
"id": metadata.id,
|
"id": metadata.id,
|
||||||
"name": metadata.name,
|
"name": metadata.name,
|
||||||
"type": metadata.type,
|
"type": metadata.type,
|
||||||
"value": self.doc_metadata.get(metadata.type),
|
"value": self.doc_metadata.get(metadata.name),
|
||||||
}
|
}
|
||||||
metadata_list.append(metadata_dict)
|
metadata_list.append(metadata_dict)
|
||||||
# deal built-in fields
|
# deal built-in fields
|
||||||
|
@ -133,7 +133,7 @@ class MetadataArgs(BaseModel):
|
|||||||
|
|
||||||
class MetadataUpdateArgs(BaseModel):
|
class MetadataUpdateArgs(BaseModel):
|
||||||
name: str
|
name: str
|
||||||
value: str
|
value: Optional[str | int | float] = None
|
||||||
|
|
||||||
|
|
||||||
class MetadataValueUpdateArgs(BaseModel):
|
class MetadataValueUpdateArgs(BaseModel):
|
||||||
@ -143,7 +143,7 @@ class MetadataValueUpdateArgs(BaseModel):
|
|||||||
class MetadataDetail(BaseModel):
|
class MetadataDetail(BaseModel):
|
||||||
id: str
|
id: str
|
||||||
name: str
|
name: str
|
||||||
value: str
|
value: Optional[str | int | float] = None
|
||||||
|
|
||||||
|
|
||||||
class DocumentMetadataOperation(BaseModel):
|
class DocumentMetadataOperation(BaseModel):
|
||||||
|
@ -105,12 +105,15 @@ class MetadataService:
|
|||||||
if documents:
|
if documents:
|
||||||
for document in documents:
|
for document in documents:
|
||||||
if not document.doc_metadata:
|
if not document.doc_metadata:
|
||||||
document.doc_metadata = {}
|
doc_metadata = {}
|
||||||
document.doc_metadata[BuiltInField.document_name] = document.name
|
else:
|
||||||
document.doc_metadata[BuiltInField.uploader] = document.uploader
|
doc_metadata = document.doc_metadata
|
||||||
document.doc_metadata[BuiltInField.upload_date] = document.upload_date.timestamp()
|
doc_metadata[BuiltInField.document_name.value] = document.name
|
||||||
document.doc_metadata[BuiltInField.last_update_date] = document.last_update_date.timestamp()
|
doc_metadata[BuiltInField.uploader.value] = document.uploader
|
||||||
document.doc_metadata[BuiltInField.source] = document.data_source_type
|
doc_metadata[BuiltInField.upload_date.value] = document.upload_date.timestamp()
|
||||||
|
doc_metadata[BuiltInField.last_update_date.value] = document.last_update_date.timestamp()
|
||||||
|
doc_metadata[BuiltInField.source.value] = document.data_source_type
|
||||||
|
document.doc_metadata = doc_metadata
|
||||||
db.session.add(document)
|
db.session.add(document)
|
||||||
db.session.commit()
|
db.session.commit()
|
||||||
except Exception:
|
except Exception:
|
||||||
@ -131,11 +134,13 @@ class MetadataService:
|
|||||||
document_ids = []
|
document_ids = []
|
||||||
if documents:
|
if documents:
|
||||||
for document in documents:
|
for document in documents:
|
||||||
document.doc_metadata.pop(BuiltInField.document_name)
|
doc_metadata = document.doc_metadata
|
||||||
document.doc_metadata.pop(BuiltInField.uploader)
|
doc_metadata.pop(BuiltInField.document_name)
|
||||||
document.doc_metadata.pop(BuiltInField.upload_date)
|
doc_metadata.pop(BuiltInField.uploader)
|
||||||
document.doc_metadata.pop(BuiltInField.last_update_date)
|
doc_metadata.pop(BuiltInField.upload_date)
|
||||||
document.doc_metadata.pop(BuiltInField.source)
|
doc_metadata.pop(BuiltInField.last_update_date)
|
||||||
|
doc_metadata.pop(BuiltInField.source)
|
||||||
|
document.doc_metadata = doc_metadata
|
||||||
db.session.add(document)
|
db.session.add(document)
|
||||||
document_ids.append(document.id)
|
document_ids.append(document.id)
|
||||||
db.session.commit()
|
db.session.commit()
|
||||||
@ -150,18 +155,21 @@ class MetadataService:
|
|||||||
lock_key = f"document_metadata_lock_{operation.document_id}"
|
lock_key = f"document_metadata_lock_{operation.document_id}"
|
||||||
try:
|
try:
|
||||||
MetadataService.knowledge_base_metadata_lock_check(None, operation.document_id)
|
MetadataService.knowledge_base_metadata_lock_check(None, operation.document_id)
|
||||||
document = DocumentService.get_document(operation.document_id)
|
document = DocumentService.get_document(dataset.id, operation.document_id)
|
||||||
if document is None:
|
if document is None:
|
||||||
raise ValueError("Document not found.")
|
raise ValueError("Document not found.")
|
||||||
document.doc_metadata = {}
|
doc_metadata = {}
|
||||||
for metadata_value in metadata_args.fields:
|
for metadata_value in operation.metadata_list:
|
||||||
document.doc_metadata[metadata_value.name] = metadata_value.value
|
doc_metadata[metadata_value.name] = metadata_value.value
|
||||||
if dataset.built_in_fields:
|
if dataset.built_in_field_enabled:
|
||||||
document.doc_metadata[BuiltInField.document_name] = document.name
|
doc_metadata[BuiltInField.document_name.value] = document.name
|
||||||
document.doc_metadata[BuiltInField.uploader] = document.uploader
|
doc_metadata[BuiltInField.uploader.value] = document.uploader
|
||||||
document.doc_metadata[BuiltInField.upload_date] = document.upload_date.timestamp()
|
doc_metadata[BuiltInField.upload_date.value] = document.upload_date.timestamp()
|
||||||
document.doc_metadata[BuiltInField.last_update_date] = document.last_update_date.timestamp()
|
doc_metadata[BuiltInField.last_update_date.value] = document.last_update_date.timestamp()
|
||||||
document.doc_metadata[BuiltInField.source] = document.data_source_type
|
doc_metadata[BuiltInField.source.value] = document.data_source_type
|
||||||
|
document.doc_metadata = doc_metadata
|
||||||
|
db.session.add(document)
|
||||||
|
db.session.commit()
|
||||||
# deal metadata bindding
|
# deal metadata bindding
|
||||||
DatasetMetadataBinding.query.filter_by(document_id=operation.document_id).delete()
|
DatasetMetadataBinding.query.filter_by(document_id=operation.document_id).delete()
|
||||||
for metadata_value in operation.metadata_list:
|
for metadata_value in operation.metadata_list:
|
||||||
@ -173,7 +181,6 @@ class MetadataService:
|
|||||||
created_by=current_user.id,
|
created_by=current_user.id,
|
||||||
)
|
)
|
||||||
db.session.add(dataset_metadata_binding)
|
db.session.add(dataset_metadata_binding)
|
||||||
db.session.add(document)
|
|
||||||
db.session.commit()
|
db.session.commit()
|
||||||
except Exception:
|
except Exception:
|
||||||
logging.exception("Update documents metadata failed")
|
logging.exception("Update documents metadata failed")
|
||||||
|
Loading…
Reference in New Issue
Block a user