diff --git a/api/.env.example b/api/.env.example index 24f7829f54..e0fc825779 100644 --- a/api/.env.example +++ b/api/.env.example @@ -86,7 +86,7 @@ RELYT_PASSWORD=postgres RELYT_DATABASE=postgres # Tencent configuration -TENCENT_URL=http://127.0.0.1 +TENCENT_VECTOR_DB_URL=http://127.0.0.1 TENCENT_API_KEY=dify TENCENT_TIMEOUT=30 TENCENT_USERNAME=dify diff --git a/api/config.py b/api/config.py index 261c54952f..fb66ddf12a 100644 --- a/api/config.py +++ b/api/config.py @@ -229,7 +229,7 @@ class Config: self.RELYT_DATABASE = get_env('RELYT_DATABASE') # tencent settings - self.TENCENT_URL = get_env('TENCENT_URL') + self.TENCENT_VECTOR_DB_URL = get_env('TENCENT_VECTOR_DB_URL') self.TENCENT_API_KEY = get_env('TENCENT_API_KEY') self.TENCENT_TIMEOUT = get_env('TENCENT_TIMEOUT') self.TENCENT_USERNAME = get_env('TENCENT_USERNAME') diff --git a/api/controllers/console/datasets/datasets.py b/api/controllers/console/datasets/datasets.py index d656baa135..c125b29389 100644 --- a/api/controllers/console/datasets/datasets.py +++ b/api/controllers/console/datasets/datasets.py @@ -469,13 +469,13 @@ class DatasetRetrievalSettingApi(Resource): @account_initialization_required def get(self): vector_type = current_app.config['VECTOR_STORE'] - if vector_type == 'milvus': + if vector_type == 'milvus' or vector_type == 'tencent': return { 'retrieval_method': [ 'semantic_search' ] } - elif vector_type == 'qdrant' or vector_type == 'weaviate' or vector_type == 'tencent': + elif vector_type == 'qdrant' or vector_type == 'weaviate': return { 'retrieval_method': [ 'semantic_search', 'full_text_search', 'hybrid_search' @@ -491,13 +491,13 @@ class DatasetRetrievalSettingMockApi(Resource): @account_initialization_required def get(self, vector_type): - if vector_type == 'milvus': + if vector_type == 'milvus' or vector_type == 'tencent': return { 'retrieval_method': [ 'semantic_search' ] } - elif vector_type == 'qdrant' or vector_type == 'weaviate' or vector_type == 'tencent': + elif vector_type == 'qdrant' or vector_type == 'weaviate': return { 'retrieval_method': [ 'semantic_search', 'full_text_search', 'hybrid_search' diff --git a/api/core/rag/datasource/vdb/tencent/tencent_vector.py b/api/core/rag/datasource/vdb/tencent/tencent_vector.py index a2826fe9a9..c1ebf9b7c7 100644 --- a/api/core/rag/datasource/vdb/tencent/tencent_vector.py +++ b/api/core/rag/datasource/vdb/tencent/tencent_vector.py @@ -156,14 +156,15 @@ class TencentVector(BaseVector): return self._get_search_res(res) def search_by_full_text(self, query: str, **kwargs: Any) -> list[Document]: - res = (self._db.collection(self._collection_name) - .searchByText(embeddingItems=[query], - params=document.HNSWSearchParams(ef=kwargs.get("ef", 10)), - retrieve_vector=False, - limit=kwargs.get('top_k', 4), - timeout=self._client_config.timeout, - )) - return self._get_search_res(res) + # res = (self._db.collection(self._collection_name) + # .searchByText(embeddingItems=[query], + # params=document.HNSWSearchParams(ef=kwargs.get("ef", 10)), + # retrieve_vector=False, + # limit=kwargs.get('top_k', 4), + # timeout=self._client_config.timeout, + # )) + # must deploy embedding model in tencent vector db , for now not support + return [] def _get_search_res(self, res): docs = [] diff --git a/api/core/rag/datasource/vdb/vector_factory.py b/api/core/rag/datasource/vdb/vector_factory.py index 1d0d79064e..cbccf822b1 100644 --- a/api/core/rag/datasource/vdb/vector_factory.py +++ b/api/core/rag/datasource/vdb/vector_factory.py @@ -153,7 +153,7 @@ class Vector: return TencentVector( collection_name=collection_name, config=TencentConfig( - url=config.get('TENCENT_URL'), + url=config.get('TENCENT_VECTOR_DB_URL'), api_key=config.get('TENCENT_API_KEY'), timeout=config.get('TENCENT_TIMEOUT'), username=config.get('TENCENT_USERNAME'), diff --git a/api/tests/unit_tests/core/rag/datasource/vdb/tencent/test_tencent.py b/api/tests/unit_tests/core/rag/datasource/vdb/tencent/test_tencent.py index e69de29bb2..7906fad3b2 100644 --- a/api/tests/unit_tests/core/rag/datasource/vdb/tencent/test_tencent.py +++ b/api/tests/unit_tests/core/rag/datasource/vdb/tencent/test_tencent.py @@ -0,0 +1,57 @@ +import pytest +from extensions.ext_redis import redis_client +from core.rag.datasource.vdb.tencent.tencent_vector import TencentConfig, TencentVector +from core.rag.models.document import Document + + +def _create_tencent_vector() -> TencentVector: + tencent_vector = TencentVector( + collection_name='test-001', + config=TencentConfig( + url="http://10.6.x.x", + api_key="nTZ**********************", + timeout=30, + username="dify", + database="dify", + shard=1, + replicas=2, + ) + ) + documents = [ + Document(page_content="This is document 1", metadata={"doc_id": "doc1", "document_id": "foo1"}), + Document(page_content="This is document 2", metadata={"doc_id": "doc2", "document_id": "foo2"}), + ] + embeddings = [[0.2123, 0.23, 0.213], [0.2123, 0.22, 0.213]] + tencent_vector.create(texts=documents, embeddings=embeddings) + + return tencent_vector + + +@pytest.fixture(autouse=True) +def mock_redis_lock(mocker): + mocker.patch.object(redis_client, "lock") + + +def test_text_exists(): + tencent_vector = _create_tencent_vector() + assert tencent_vector.text_exists(id="doc1") is True + + +def test_delete_by_ids(): + tencent_vector = _create_tencent_vector() + tencent_vector.delete_by_ids(ids=['doc2']) + + +def test_delete_by_metadata_field(): + tencent_vector = _create_tencent_vector() + tencent_vector.delete_by_metadata_field(key="document_id", value="foo1") + + +def test_search_by_vector(): + tencent_vector = _create_tencent_vector() + res = tencent_vector.search_by_vector(query_vector=[0.3123, 0.43, 0.213]) + assert len(res) > 0 + +def test_delete(): + tencent_vector = _create_tencent_vector() + tencent_vector.delete() diff --git a/docker/docker-compose.yaml b/docker/docker-compose.yaml index b894ecce2e..4a7dd1fcb6 100644 --- a/docker/docker-compose.yaml +++ b/docker/docker-compose.yaml @@ -230,7 +230,7 @@ services: RELYT_PASSWORD: difyai123456 RELYT_DATABASE: postgres # tencent configurations - TENCENT_URL: http://127.0.0.1 + TENCENT_VECTOR_DB_URL: http://127.0.0.1 TENCENT_API_KEY: dify TENCENT_TIMEOUT: 30 TENCENT_USERNAME: dify