fix metadata

This commit is contained in:
jyong 2025-03-10 16:25:38 +08:00
parent 07e3805da7
commit 778c246c68
5 changed files with 637 additions and 259 deletions

View File

@ -140,12 +140,12 @@ SupportedComparisonOperator = Literal[
# for string or array
"contains",
"not contains",
"starts with",
"ends with",
"start with",
"end with",
"is",
"is not",
"empty",
"is not empty",
"not empty",
# for number
"=",
"",

View File

@ -796,7 +796,7 @@ class DatasetRetrieval:
)
if automatic_metadata_filters:
for filter in automatic_metadata_filters:
self._process_metadata_filter_func(
document_query = self._process_metadata_filter_func(
filter.get("condition"), filter.get("metadata_name"), filter.get("value"), document_query
)
elif metadata_filtering_mode == "manual":
@ -805,7 +805,7 @@ class DatasetRetrieval:
expected_value = condition.value
if isinstance(expected_value, str):
expected_value = self._replace_metadata_filter_value(expected_value, inputs)
self._process_metadata_filter_func(
document_query = self._process_metadata_filter_func(
condition.comparison_operator, metadata_name, expected_value, document_query
)
else:
@ -883,31 +883,38 @@ class DatasetRetrieval:
def _process_metadata_filter_func(self, condition: str, metadata_name: str, value: str, query):
match condition:
case "contains":
query = query.filter(Document.doc_metadata[metadata_name].like(f"%{value}%"))
query = query.filter(DatasetDocument.doc_metadata[metadata_name].like(f'"%{value}%"'))
case "not contains":
query = query.filter(Document.doc_metadata[metadata_name].notlike(f"%{value}%"))
query = query.filter(DatasetDocument.doc_metadata[metadata_name].notlike(f'"%{value}%"'))
case "start with":
query = query.filter(Document.doc_metadata[metadata_name].like(f"{value}%"))
query = query.filter(DatasetDocument.doc_metadata[metadata_name].like(f'"{value}%"'))
case "end with":
query = query.filter(Document.doc_metadata[metadata_name].like(f"%{value}"))
case "is", "=":
query = query.filter(Document.doc_metadata[metadata_name] == value)
case "is not", "":
query = query.filter(Document.doc_metadata[metadata_name] != value)
query = query.filter(DatasetDocument.doc_metadata[metadata_name].like(f'"%{value}"'))
case "is" | "=":
if isinstance(value, str):
query = query.filter(DatasetDocument.doc_metadata[metadata_name] == f'"{value}"')
else:
query = query.filter(DatasetDocument.doc_metadata[metadata_name] == value)
case "is not" | "":
if isinstance(value, str):
query = query.filter(DatasetDocument.doc_metadata[metadata_name] != f'"{value}"')
else:
query = query.filter(DatasetDocument.doc_metadata[metadata_name] != value)
case "is empty":
query = query.filter(Document.doc_metadata[metadata_name].is_(None))
query = query.filter(DatasetDocument.doc_metadata[metadata_name].is_(None))
case "is not empty":
query = query.filter(Document.doc_metadata[metadata_name].isnot(None))
case "before", "<":
query = query.filter(Document.doc_metadata[metadata_name] < value)
case "after", ">":
query = query.filter(Document.doc_metadata[metadata_name] > value)
case "", ">=":
query = query.filter(Document.doc_metadata[metadata_name] <= value)
case "", ">=":
query = query.filter(Document.doc_metadata[metadata_name] >= value)
query = query.filter(DatasetDocument.doc_metadata[metadata_name].isnot(None))
case "before" | "<":
query = query.filter(DatasetDocument.doc_metadata[metadata_name] < value)
case "after" | ">":
query = query.filter(DatasetDocument.doc_metadata[metadata_name] > value)
case "" | ">=":
query = query.filter(DatasetDocument.doc_metadata[metadata_name] <= value)
case "" | ">=":
query = query.filter(DatasetDocument.doc_metadata[metadata_name] >= value)
case _:
pass
return query
def _fetch_model_config(
self, tenant_id: str, model: ModelConfig

View File

@ -79,12 +79,12 @@ SupportedComparisonOperator = Literal[
# for string or array
"contains",
"not contains",
"starts with",
"ends with",
"start with",
"end with",
"is",
"is not",
"empty",
"is not empty",
"not empty",
# for number
"=",
"",

View File

@ -296,7 +296,7 @@ class KnowledgeRetrievalNode(LLMNode):
automatic_metadata_filters = self._automatic_metadata_filter_func(dataset_ids, query, node_data)
if automatic_metadata_filters:
for filter in automatic_metadata_filters:
self._process_metadata_filter_func(
document_query = self._process_metadata_filter_func(
filter.get("condition"), filter.get("metadata_name"), filter.get("value"), document_query
)
elif node_data.metadata_filtering_mode == "manual":
@ -305,7 +305,7 @@ class KnowledgeRetrievalNode(LLMNode):
expected_value = condition.value
if isinstance(expected_value, str):
expected_value = self.graph_runtime_state.variable_pool.convert_template(expected_value).text
self._process_metadata_filter_func(
document_query = self._process_metadata_filter_func(
condition.comparison_operator, metadata_name, expected_value, document_query
)
else:
@ -383,17 +383,23 @@ class KnowledgeRetrievalNode(LLMNode):
def _process_metadata_filter_func(self, condition: str, metadata_name: str, value: str, query):
match condition:
case "contains":
query = query.filter(Document.doc_metadata[metadata_name].like(f"%{value}%"))
query = query.filter(Document.doc_metadata[metadata_name].like(f'"%{value}%"'))
case "not contains":
query = query.filter(Document.doc_metadata[metadata_name].notlike(f"%{value}%"))
query = query.filter(Document.doc_metadata[metadata_name].notlike(f'"%{value}%"'))
case "start with":
query = query.filter(Document.doc_metadata[metadata_name].like(f"{value}%"))
query = query.filter(Document.doc_metadata[metadata_name].like(f'"{value}%"'))
case "end with":
query = query.filter(Document.doc_metadata[metadata_name].like(f"%{value}"))
query = query.filter(Document.doc_metadata[metadata_name].like(f'"%{value}"'))
case "=" | "is":
query = query.filter(Document.doc_metadata[metadata_name] == value)
if isinstance(value, str):
query = query.filter(Document.doc_metadata[metadata_name] == f'"{value}"')
else:
query = query.filter(Document.doc_metadata[metadata_name] == value)
case "is not" | "":
query = query.filter(Document.doc_metadata[metadata_name] != value)
if isinstance(value, str):
query = query.filter(Document.doc_metadata[metadata_name] != f'"{value}"')
else:
query = query.filter(Document.doc_metadata[metadata_name] != value)
case "is empty":
query = query.filter(Document.doc_metadata[metadata_name].is_(None))
case "is not empty":
@ -408,7 +414,7 @@ class KnowledgeRetrievalNode(LLMNode):
query = query.filter(Document.doc_metadata[metadata_name] >= value)
case _:
pass
return query
@classmethod
def _extract_variable_selector_to_variable_mapping(
cls,

813
api/poetry.lock generated

File diff suppressed because it is too large Load Diff