add vdb document id index

This commit is contained in:
jyong 2025-03-19 21:26:08 +08:00
parent 106169ed7f
commit b89c9a61c9
5 changed files with 14 additions and 1 deletions

View File

@ -196,9 +196,11 @@ class ElasticSearchVector(BaseVector):
Field.METADATA_KEY.value: { Field.METADATA_KEY.value: {
"type": "object", "type": "object",
"properties": { "properties": {
"doc_id": {"type": "keyword"} # Map doc_id to keyword type "doc_id": {"type": "keyword"}, # Map doc_id to keyword type
"document_id": {"type": "keyword"} # Map doc_id to keyword type
}, },
}, },
} }
} }
self._client.indices.create(index=self._collection_name, mappings=mappings) self._client.indices.create(index=self._collection_name, mappings=mappings)

View File

@ -11,3 +11,4 @@ class Field(Enum):
TEXT_KEY = "text" TEXT_KEY = "text"
PRIMARY_KEY = "id" PRIMARY_KEY = "id"
DOC_ID = "metadata.doc_id" DOC_ID = "metadata.doc_id"
DOCUMENT_ID = "metadata.document_id"

View File

@ -134,6 +134,10 @@ class QdrantVector(BaseVector):
self._client.create_payload_index( self._client.create_payload_index(
collection_name, Field.DOC_ID.value, field_schema=PayloadSchemaType.KEYWORD collection_name, Field.DOC_ID.value, field_schema=PayloadSchemaType.KEYWORD
) )
# create document_id payload index
self._client.create_payload_index(
collection_name, Field.DOCUMENT_ID.value, field_schema=PayloadSchemaType.KEYWORD
)
# create full text index # create full text index
text_index_params = TextIndexParams( text_index_params = TextIndexParams(
type=TextIndexType.TEXT, type=TextIndexType.TEXT,

View File

@ -144,6 +144,10 @@ class TidbOnQdrantVector(BaseVector):
self._client.create_payload_index( self._client.create_payload_index(
collection_name, Field.DOC_ID.value, field_schema=PayloadSchemaType.KEYWORD collection_name, Field.DOC_ID.value, field_schema=PayloadSchemaType.KEYWORD
) )
# create document_id payload index
self._client.create_payload_index(
collection_name, Field.DOCUMENT_ID.value, field_schema=PayloadSchemaType.KEYWORD
)
# create full text index # create full text index
text_index_params = TextIndexParams( text_index_params = TextIndexParams(
type=TextIndexType.TEXT, type=TextIndexType.TEXT,

View File

@ -105,10 +105,12 @@ class TiDBVector(BaseVector):
text TEXT NOT NULL, text TEXT NOT NULL,
meta JSON NOT NULL, meta JSON NOT NULL,
doc_id VARCHAR(64) AS (JSON_UNQUOTE(JSON_EXTRACT(meta, '$.doc_id'))) STORED, doc_id VARCHAR(64) AS (JSON_UNQUOTE(JSON_EXTRACT(meta, '$.doc_id'))) STORED,
document_id VARCHAR(64) AS (JSON_UNQUOTE(JSON_EXTRACT(meta, '$.document_id'))) STORED,
vector VECTOR<FLOAT>({dimension}) NOT NULL, vector VECTOR<FLOAT>({dimension}) NOT NULL,
create_time DATETIME DEFAULT CURRENT_TIMESTAMP, create_time DATETIME DEFAULT CURRENT_TIMESTAMP,
update_time DATETIME DEFAULT CURRENT_TIMESTAMP ON UPDATE CURRENT_TIMESTAMP, update_time DATETIME DEFAULT CURRENT_TIMESTAMP ON UPDATE CURRENT_TIMESTAMP,
KEY (doc_id), KEY (doc_id),
KEY (document_id),
VECTOR INDEX idx_vector (({tidb_dist_func}(vector))) USING HNSW VECTOR INDEX idx_vector (({tidb_dist_func}(vector))) USING HNSW
); );
""") """)