From 7a8f4bef16b35e0782d0061f66fa96b421fe3eb3 Mon Sep 17 00:00:00 2001 From: jyong <718720800@qq.com> Date: Mon, 10 Mar 2025 19:31:53 +0800 Subject: [PATCH] fix metadata --- api/core/rag/entities/metadata_entities.py | 4 +-- api/core/rag/retrieval/dataset_retrieval.py | 32 +++++++++++++------ .../knowledge_retrieval_node.py | 18 +++++++---- api/services/external_knowledge_service.py | 6 +++- 4 files changed, 41 insertions(+), 19 deletions(-) diff --git a/api/core/rag/entities/metadata_entities.py b/api/core/rag/entities/metadata_entities.py index 9ff32b98c0..6ef932ad22 100644 --- a/api/core/rag/entities/metadata_entities.py +++ b/api/core/rag/entities/metadata_entities.py @@ -13,14 +13,14 @@ SupportedComparisonOperator = Literal[ "is not", "empty", "not empty", - # for number + # for number "=", "≠", ">", "<", "≥", "≤", - # for time + # for time "before", "after", ] diff --git a/api/core/rag/retrieval/dataset_retrieval.py b/api/core/rag/retrieval/dataset_retrieval.py index 45e520d323..0b56e7438c 100644 --- a/api/core/rag/retrieval/dataset_retrieval.py +++ b/api/core/rag/retrieval/dataset_retrieval.py @@ -547,8 +547,16 @@ class DatasetRetrieval: db.session.add_all(dataset_queries) db.session.commit() - def _retriever(self, flask_app: Flask, dataset_id: str, query: str, top_k: int, all_documents: list, - document_ids_filter: Optional[list[str]] = None, metadata_condition: Optional[MetadataCondition] = None): + def _retriever( + self, + flask_app: Flask, + dataset_id: str, + query: str, + top_k: int, + all_documents: list, + document_ids_filter: Optional[list[str]] = None, + metadata_condition: Optional[MetadataCondition] = None, + ): with flask_app.app_context(): dataset = db.session.query(Dataset).filter(Dataset.id == dataset_id).first() @@ -822,11 +830,13 @@ class DatasetRetrieval: self._process_metadata_filter_func( filter.get("condition"), filter.get("metadata_name"), filter.get("value"), filters ) - conditions.append(Condition( - name=filter.get("metadata_name"), - comparison_operator=filter.get("condition"), - value=filter.get("value"), - )) + conditions.append( + Condition( + name=filter.get("metadata_name"), + comparison_operator=filter.get("condition"), + value=filter.get("value"), + ) + ) metadata_condition = MetadataCondition( logical_operator=metadata_filtering_conditions.logical_operator, conditions=conditions, @@ -935,12 +945,16 @@ class DatasetRetrieval: if isinstance(value, str): filters.append(DatasetDocument.doc_metadata[metadata_name] == f'"{value}"') else: - filters.append(sqlalchemy_cast(DatasetDocument.doc_metadata[metadata_name].astext, Integer) == value) + filters.append( + sqlalchemy_cast(DatasetDocument.doc_metadata[metadata_name].astext, Integer) == value + ) case "is not" | "≠": if isinstance(value, str): filters.append(DatasetDocument.doc_metadata[metadata_name] != f'"{value}"') else: - filters.append(sqlalchemy_cast(DatasetDocument.doc_metadata[metadata_name].astext, Integer) != value) + filters.append( + sqlalchemy_cast(DatasetDocument.doc_metadata[metadata_name].astext, Integer) != value + ) case "is empty": filters.append(DatasetDocument.doc_metadata[metadata_name].is_(None)) case "is not empty": diff --git a/api/core/workflow/nodes/knowledge_retrieval/knowledge_retrieval_node.py b/api/core/workflow/nodes/knowledge_retrieval/knowledge_retrieval_node.py index fedf00458b..a99a8d6a22 100644 --- a/api/core/workflow/nodes/knowledge_retrieval/knowledge_retrieval_node.py +++ b/api/core/workflow/nodes/knowledge_retrieval/knowledge_retrieval_node.py @@ -304,11 +304,13 @@ class KnowledgeRetrievalNode(LLMNode): self._process_metadata_filter_func( filter.get("condition"), filter.get("metadata_name"), filter.get("value"), filters ) - conditions.append(Condition( - name=filter.get("metadata_name"), - comparison_operator=filter.get("condition"), - value=filter.get("value"), - )) + conditions.append( + Condition( + name=filter.get("metadata_name"), + comparison_operator=filter.get("condition"), + value=filter.get("value"), + ) + ) metadata_condition = MetadataCondition( logical_operator="or", conditions=conditions, @@ -321,7 +323,9 @@ class KnowledgeRetrievalNode(LLMNode): expected_value = condition.value if expected_value: if isinstance(expected_value, str): - expected_value = self.graph_runtime_state.variable_pool.convert_template(expected_value).text + expected_value = self.graph_runtime_state.variable_pool.convert_template( + expected_value + ).text filters = self._process_metadata_filter_func( condition.comparison_operator, metadata_name, expected_value, filters @@ -438,7 +442,7 @@ class KnowledgeRetrievalNode(LLMNode): case _: pass return filters - + @classmethod def _extract_variable_selector_to_variable_mapping( cls, diff --git a/api/services/external_knowledge_service.py b/api/services/external_knowledge_service.py index 822b458189..d9ee221a3c 100644 --- a/api/services/external_knowledge_service.py +++ b/api/services/external_knowledge_service.py @@ -246,7 +246,11 @@ class ExternalDatasetService: @staticmethod def fetch_external_knowledge_retrieval( - tenant_id: str, dataset_id: str, query: str, external_retrieval_parameters: dict, metadata_condition: Optional[MetadataCondition] = None + tenant_id: str, + dataset_id: str, + query: str, + external_retrieval_parameters: dict, + metadata_condition: Optional[MetadataCondition] = None, ) -> list: external_knowledge_binding = ExternalKnowledgeBindings.query.filter_by( dataset_id=dataset_id, tenant_id=tenant_id