From 20d16d7b310aa8e3bf5f865a805841fb2cbb8293 Mon Sep 17 00:00:00 2001 From: crazywoola <100913391+crazywoola@users.noreply.github.com> Date: Thu, 28 Mar 2024 13:02:41 +0800 Subject: [PATCH 1/8] doc: update helm charts (#3012) --- README.md | 9 ++++++--- README_CN.md | 8 +++++--- 2 files changed, 11 insertions(+), 6 deletions(-) diff --git a/README.md b/README.md index 207f312946..80a60e9cad 100644 --- a/README.md +++ b/README.md @@ -100,10 +100,12 @@ docker compose up -d After running, you can access the Dify dashboard in your browser at [http://localhost/install](http://localhost/install) and start the initialization installation process. -### Helm Chart +#### Deploy with Helm Chart -Big thanks to @BorisPolonsky for providing us with a [Helm Chart](https://helm.sh/) version, which allows Dify to be deployed on Kubernetes. -You can go to https://github.com/BorisPolonsky/dify-helm for deployment information. +[Helm Chart](https://helm.sh/) version, which allows Dify to be deployed on Kubernetes. + +- [Helm Chart by @LeoQuote](https://github.com/douban/charts/tree/master/charts/dify) +- [Helm Chart by @BorisPolonsky](https://github.com/BorisPolonsky/dify-helm) ### Configuration @@ -120,6 +122,7 @@ For those who'd like to contribute code, see our [Contribution Guide](https://gi At the same time, please consider supporting Dify by sharing it on social media and at events and conferences. + ### Contributors diff --git a/README_CN.md b/README_CN.md index 6d33095d9c..81ba87f70e 100644 --- a/README_CN.md +++ b/README_CN.md @@ -94,10 +94,12 @@ docker compose up -d 运行后,可以在浏览器上访问 [http://localhost/install](http://localhost/install) 进入 Dify 控制台并开始初始化安装操作。 -### Helm Chart +#### 使用 Helm Chart 部署 -非常感谢 @BorisPolonsky 为我们提供了一个 [Helm Chart](https://helm.sh/) 版本,可以在 Kubernetes 上部署 Dify。 -您可以前往 https://github.com/BorisPolonsky/dify-helm 来获取部署信息。 +使用 [Helm Chart](https://helm.sh/) 版本,可以在 Kubernetes 上部署 Dify。 + +- [Helm Chart by @LeoQuote](https://github.com/douban/charts/tree/master/charts/dify) +- [Helm Chart by @BorisPolonsky](https://github.com/BorisPolonsky/dify-helm) ### 配置 From b0b0cc045f637893629efded805d465277ff1b8a Mon Sep 17 00:00:00 2001 From: Jyong <76649700+JohnJyong@users.noreply.github.com> Date: Thu, 28 Mar 2024 17:02:35 +0800 Subject: [PATCH 2/8] add mutil-thread document embedding (#3016) Co-authored-by: jyong --- api/core/indexing_runner.py | 49 +++++++++++++------ .../unstructured_doc_extractor.py | 2 +- .../unstructured_eml_extractor.py | 2 +- .../unstructured_markdown_extractor.py | 2 +- .../unstructured_msg_extractor.py | 2 +- .../unstructured_text_extractor.py | 2 +- .../unstructured_xml_extractor.py | 2 +- 7 files changed, 39 insertions(+), 22 deletions(-) diff --git a/api/core/indexing_runner.py b/api/core/indexing_runner.py index dd46aa27dc..94c7d18c55 100644 --- a/api/core/indexing_runner.py +++ b/api/core/indexing_runner.py @@ -1,3 +1,4 @@ +import concurrent.futures import datetime import json import logging @@ -650,17 +651,44 @@ class IndexingRunner: # chunk nodes by chunk size indexing_start_at = time.perf_counter() tokens = 0 - chunk_size = 100 + chunk_size = 10 embedding_model_type_instance = None if embedding_model_instance: embedding_model_type_instance = embedding_model_instance.model_type_instance embedding_model_type_instance = cast(TextEmbeddingModel, embedding_model_type_instance) + with concurrent.futures.ThreadPoolExecutor(max_workers=10) as executor: + futures = [] + for i in range(0, len(documents), chunk_size): + chunk_documents = documents[i:i + chunk_size] + futures.append(executor.submit(self._process_chunk, current_app._get_current_object(), index_processor, + chunk_documents, dataset, + dataset_document, embedding_model_instance, + embedding_model_type_instance)) - for i in range(0, len(documents), chunk_size): + for future in futures: + tokens += future.result() + + indexing_end_at = time.perf_counter() + + # update document status to completed + self._update_document_index_status( + document_id=dataset_document.id, + after_indexing_status="completed", + extra_update_params={ + DatasetDocument.tokens: tokens, + DatasetDocument.completed_at: datetime.datetime.utcnow(), + DatasetDocument.indexing_latency: indexing_end_at - indexing_start_at, + } + ) + + def _process_chunk(self, flask_app, index_processor, chunk_documents, dataset, dataset_document, + embedding_model_instance, embedding_model_type_instance): + with flask_app.app_context(): # check document is paused self._check_document_paused_status(dataset_document.id) - chunk_documents = documents[i:i + chunk_size] + + tokens = 0 if dataset.indexing_technique == 'high_quality' or embedding_model_type_instance: tokens += sum( embedding_model_type_instance.get_num_tokens( @@ -670,9 +698,9 @@ class IndexingRunner: ) for document in chunk_documents ) + # load index index_processor.load(dataset, chunk_documents) - db.session.add(dataset) document_ids = [document.metadata['doc_id'] for document in chunk_documents] db.session.query(DocumentSegment).filter( @@ -687,18 +715,7 @@ class IndexingRunner: db.session.commit() - indexing_end_at = time.perf_counter() - - # update document status to completed - self._update_document_index_status( - document_id=dataset_document.id, - after_indexing_status="completed", - extra_update_params={ - DatasetDocument.tokens: tokens, - DatasetDocument.completed_at: datetime.datetime.utcnow(), - DatasetDocument.indexing_latency: indexing_end_at - indexing_start_at, - } - ) + return tokens def _check_document_paused_status(self, document_id: str): indexing_cache_key = 'document_{}_is_paused'.format(document_id) diff --git a/api/core/rag/extractor/unstructured/unstructured_doc_extractor.py b/api/core/rag/extractor/unstructured/unstructured_doc_extractor.py index b37981a30d..34a4e85e97 100644 --- a/api/core/rag/extractor/unstructured/unstructured_doc_extractor.py +++ b/api/core/rag/extractor/unstructured/unstructured_doc_extractor.py @@ -53,7 +53,7 @@ class UnstructuredWordExtractor(BaseExtractor): elements = partition_docx(filename=self._file_path) from unstructured.chunking.title import chunk_by_title - chunks = chunk_by_title(elements, max_characters=2000, combine_text_under_n_chars=0) + chunks = chunk_by_title(elements, max_characters=2000, combine_text_under_n_chars=2000) documents = [] for chunk in chunks: text = chunk.text.strip() diff --git a/api/core/rag/extractor/unstructured/unstructured_eml_extractor.py b/api/core/rag/extractor/unstructured/unstructured_eml_extractor.py index 1d92bbbee6..f6ae8fad53 100644 --- a/api/core/rag/extractor/unstructured/unstructured_eml_extractor.py +++ b/api/core/rag/extractor/unstructured/unstructured_eml_extractor.py @@ -43,7 +43,7 @@ class UnstructuredEmailExtractor(BaseExtractor): pass from unstructured.chunking.title import chunk_by_title - chunks = chunk_by_title(elements, max_characters=2000, combine_text_under_n_chars=0) + chunks = chunk_by_title(elements, max_characters=2000, combine_text_under_n_chars=2000) documents = [] for chunk in chunks: text = chunk.text.strip() diff --git a/api/core/rag/extractor/unstructured/unstructured_markdown_extractor.py b/api/core/rag/extractor/unstructured/unstructured_markdown_extractor.py index 3ac04ddc17..3d63446fef 100644 --- a/api/core/rag/extractor/unstructured/unstructured_markdown_extractor.py +++ b/api/core/rag/extractor/unstructured/unstructured_markdown_extractor.py @@ -38,7 +38,7 @@ class UnstructuredMarkdownExtractor(BaseExtractor): elements = partition_md(filename=self._file_path, api_url=self._api_url) from unstructured.chunking.title import chunk_by_title - chunks = chunk_by_title(elements, max_characters=2000, combine_text_under_n_chars=0) + chunks = chunk_by_title(elements, max_characters=2000, combine_text_under_n_chars=2000) documents = [] for chunk in chunks: text = chunk.text.strip() diff --git a/api/core/rag/extractor/unstructured/unstructured_msg_extractor.py b/api/core/rag/extractor/unstructured/unstructured_msg_extractor.py index d4b72e37eb..34d3e8021a 100644 --- a/api/core/rag/extractor/unstructured/unstructured_msg_extractor.py +++ b/api/core/rag/extractor/unstructured/unstructured_msg_extractor.py @@ -28,7 +28,7 @@ class UnstructuredMsgExtractor(BaseExtractor): elements = partition_msg(filename=self._file_path, api_url=self._api_url) from unstructured.chunking.title import chunk_by_title - chunks = chunk_by_title(elements, max_characters=2000, combine_text_under_n_chars=0) + chunks = chunk_by_title(elements, max_characters=2000, combine_text_under_n_chars=2000) documents = [] for chunk in chunks: text = chunk.text.strip() diff --git a/api/core/rag/extractor/unstructured/unstructured_text_extractor.py b/api/core/rag/extractor/unstructured/unstructured_text_extractor.py index 5af21b2b1d..cc67f2b866 100644 --- a/api/core/rag/extractor/unstructured/unstructured_text_extractor.py +++ b/api/core/rag/extractor/unstructured/unstructured_text_extractor.py @@ -28,7 +28,7 @@ class UnstructuredTextExtractor(BaseExtractor): elements = partition_text(filename=self._file_path, api_url=self._api_url) from unstructured.chunking.title import chunk_by_title - chunks = chunk_by_title(elements, max_characters=2000, combine_text_under_n_chars=0) + chunks = chunk_by_title(elements, max_characters=2000, combine_text_under_n_chars=2000) documents = [] for chunk in chunks: text = chunk.text.strip() diff --git a/api/core/rag/extractor/unstructured/unstructured_xml_extractor.py b/api/core/rag/extractor/unstructured/unstructured_xml_extractor.py index b08ff63a1c..5600fb075d 100644 --- a/api/core/rag/extractor/unstructured/unstructured_xml_extractor.py +++ b/api/core/rag/extractor/unstructured/unstructured_xml_extractor.py @@ -28,7 +28,7 @@ class UnstructuredXmlExtractor(BaseExtractor): elements = partition_xml(filename=self._file_path, xml_keep_tags=True, api_url=self._api_url) from unstructured.chunking.title import chunk_by_title - chunks = chunk_by_title(elements, max_characters=2000, combine_text_under_n_chars=0) + chunks = chunk_by_title(elements, max_characters=2000, combine_text_under_n_chars=2000) documents = [] for chunk in chunks: text = chunk.text.strip() From 669c8c3cca76a01384eae7010db510a03f3894d5 Mon Sep 17 00:00:00 2001 From: Jyong <76649700+JohnJyong@users.noreply.github.com> Date: Thu, 28 Mar 2024 17:02:52 +0800 Subject: [PATCH 3/8] some optimization for admin api key, create tenant and reset-encrypt-key-pair command (#3013) Co-authored-by: jyong --- api/commands.py | 21 +++++++++++---------- api/libs/login.py | 2 +- api/services/account_service.py | 11 +++++------ 3 files changed, 17 insertions(+), 17 deletions(-) diff --git a/api/commands.py b/api/commands.py index b82d4d5d5d..9f1dc95281 100644 --- a/api/commands.py +++ b/api/commands.py @@ -109,19 +109,20 @@ def reset_encrypt_key_pair(): click.echo(click.style('Sorry, only support SELF_HOSTED mode.', fg='red')) return - tenant = db.session.query(Tenant).first() - if not tenant: - click.echo(click.style('Sorry, no workspace found. Please enter /install to initialize.', fg='red')) - return + tenants = db.session.query(Tenant).all() + for tenant in tenants: + if not tenant: + click.echo(click.style('Sorry, no workspace found. Please enter /install to initialize.', fg='red')) + return - tenant.encrypt_public_key = generate_key_pair(tenant.id) + tenant.encrypt_public_key = generate_key_pair(tenant.id) - db.session.query(Provider).filter(Provider.provider_type == 'custom').delete() - db.session.query(ProviderModel).delete() - db.session.commit() + db.session.query(Provider).filter(Provider.provider_type == 'custom', Provider.tenant_id == tenant.id).delete() + db.session.query(ProviderModel).filter(ProviderModel.tenant_id == tenant.id).delete() + db.session.commit() - click.echo(click.style('Congratulations! ' - 'the asymmetric key pair of workspace {} has been reset.'.format(tenant.id), fg='green')) + click.echo(click.style('Congratulations! ' + 'the asymmetric key pair of workspace {} has been reset.'.format(tenant.id), fg='green')) @click.command('vdb-migrate', help='migrate vector db.') diff --git a/api/libs/login.py b/api/libs/login.py index 5c03cfe957..14085fe603 100644 --- a/api/libs/login.py +++ b/api/libs/login.py @@ -53,7 +53,7 @@ def login_required(func): def decorated_view(*args, **kwargs): auth_header = request.headers.get('Authorization') admin_api_key_enable = os.getenv('ADMIN_API_KEY_ENABLE', default='False') - if admin_api_key_enable: + if admin_api_key_enable.lower() == 'true': if auth_header: if ' ' not in auth_header: raise Unauthorized('Invalid Authorization header format. Expected \'Bearer \' format.') diff --git a/api/services/account_service.py b/api/services/account_service.py index 103af7f79c..7fc61e40e3 100644 --- a/api/services/account_service.py +++ b/api/services/account_service.py @@ -435,11 +435,13 @@ class RegisterService: if open_id is not None or provider is not None: AccountService.link_account_integrate(provider, open_id, account) + if current_app.config['EDITION'] != 'SELF_HOSTED': + tenant = TenantService.create_tenant(f"{account.name}'s Workspace") - tenant = TenantService.create_tenant(f"{account.name}'s Workspace") + TenantService.create_tenant_member(tenant, account, role='owner') + account.current_tenant = tenant - TenantService.create_tenant_member(tenant, account, role='owner') - account.current_tenant = tenant + tenant_was_created.send(tenant) db.session.commit() except Exception as e: @@ -447,8 +449,6 @@ class RegisterService: logging.error(f'Register failed: {e}') raise AccountRegisterError(f'Registration failed: {e}') from e - tenant_was_created.send(tenant) - return account @classmethod @@ -461,7 +461,6 @@ class RegisterService: name = email.split('@')[0] account = cls.register(email=email, name=name, language=language, status=AccountStatus.PENDING) - # Create new tenant member for invited tenant TenantService.create_tenant_member(tenant, account, role) TenantService.switch_tenant(account, tenant.id) From 2c43393bf1a44999eaa862f7f5ba9efacc01d7b8 Mon Sep 17 00:00:00 2001 From: Richards Tu <142148415+richards199999@users.noreply.github.com> Date: Fri, 29 Mar 2024 11:21:02 +0800 Subject: [PATCH 4/8] Add New Tool: DevDocs (#2993) --- .../provider/builtin/devdocs/_assets/icon.svg | 4 ++ .../tools/provider/builtin/devdocs/devdocs.py | 21 ++++++++++ .../provider/builtin/devdocs/devdocs.yaml | 10 +++++ .../builtin/devdocs/tools/searchDevDocs.py | 42 +++++++++++++++++++ .../builtin/devdocs/tools/searchDevDocs.yaml | 34 +++++++++++++++ 5 files changed, 111 insertions(+) create mode 100644 api/core/tools/provider/builtin/devdocs/_assets/icon.svg create mode 100644 api/core/tools/provider/builtin/devdocs/devdocs.py create mode 100644 api/core/tools/provider/builtin/devdocs/devdocs.yaml create mode 100644 api/core/tools/provider/builtin/devdocs/tools/searchDevDocs.py create mode 100644 api/core/tools/provider/builtin/devdocs/tools/searchDevDocs.yaml diff --git a/api/core/tools/provider/builtin/devdocs/_assets/icon.svg b/api/core/tools/provider/builtin/devdocs/_assets/icon.svg new file mode 100644 index 0000000000..c7a19fabfb --- /dev/null +++ b/api/core/tools/provider/builtin/devdocs/_assets/icon.svg @@ -0,0 +1,4 @@ + + + + \ No newline at end of file diff --git a/api/core/tools/provider/builtin/devdocs/devdocs.py b/api/core/tools/provider/builtin/devdocs/devdocs.py new file mode 100644 index 0000000000..25cbe4d053 --- /dev/null +++ b/api/core/tools/provider/builtin/devdocs/devdocs.py @@ -0,0 +1,21 @@ +from core.tools.errors import ToolProviderCredentialValidationError +from core.tools.provider.builtin.devdocs.tools.searchDevDocs import SearchDevDocsTool +from core.tools.provider.builtin_tool_provider import BuiltinToolProviderController + + +class DevDocsProvider(BuiltinToolProviderController): + def _validate_credentials(self, credentials: dict) -> None: + try: + SearchDevDocsTool().fork_tool_runtime( + meta={ + "credentials": credentials, + } + ).invoke( + user_id='', + tool_parameters={ + "doc": "python~3.12", + "topic": "library/code", + }, + ) + except Exception as e: + raise ToolProviderCredentialValidationError(str(e)) \ No newline at end of file diff --git a/api/core/tools/provider/builtin/devdocs/devdocs.yaml b/api/core/tools/provider/builtin/devdocs/devdocs.yaml new file mode 100644 index 0000000000..1db226fc4b --- /dev/null +++ b/api/core/tools/provider/builtin/devdocs/devdocs.yaml @@ -0,0 +1,10 @@ +identity: + author: Richards Tu + name: devdocs + label: + en_US: DevDocs + zh_Hans: DevDocs + description: + en_US: Get official developer documentations on DevDocs. + zh_Hans: 从DevDocs获取官方开发者文档。 + icon: icon.svg diff --git a/api/core/tools/provider/builtin/devdocs/tools/searchDevDocs.py b/api/core/tools/provider/builtin/devdocs/tools/searchDevDocs.py new file mode 100644 index 0000000000..1a244c5db3 --- /dev/null +++ b/api/core/tools/provider/builtin/devdocs/tools/searchDevDocs.py @@ -0,0 +1,42 @@ +from typing import Any, Union + +import requests +from pydantic import BaseModel, Field + +from core.tools.entities.tool_entities import ToolInvokeMessage +from core.tools.tool.builtin_tool import BuiltinTool + + +class SearchDevDocsInput(BaseModel): + doc: str = Field(..., description="The name of the documentation.") + topic: str = Field(..., description="The path of the section/topic.") + + +class SearchDevDocsTool(BuiltinTool): + def _invoke(self, user_id: str, tool_parameters: dict[str, Any]) -> Union[ToolInvokeMessage, list[ToolInvokeMessage]]: + """ + Invokes the DevDocs search tool with the given user ID and tool parameters. + + Args: + user_id (str): The ID of the user invoking the tool. + tool_parameters (dict[str, Any]): The parameters for the tool, including 'doc' and 'topic'. + + Returns: + ToolInvokeMessage | list[ToolInvokeMessage]: The result of the tool invocation, which can be a single message or a list of messages. + """ + doc = tool_parameters.get('doc', '') + topic = tool_parameters.get('topic', '') + + if not doc: + return self.create_text_message('Please provide the documentation name.') + if not topic: + return self.create_text_message('Please provide the topic path.') + + url = f"https://documents.devdocs.io/{doc}/{topic}.html" + response = requests.get(url) + + if response.status_code == 200: + content = response.text + return self.create_text_message(self.summary(user_id=user_id, content=content)) + else: + return self.create_text_message(f"Failed to retrieve the documentation. Status code: {response.status_code}") \ No newline at end of file diff --git a/api/core/tools/provider/builtin/devdocs/tools/searchDevDocs.yaml b/api/core/tools/provider/builtin/devdocs/tools/searchDevDocs.yaml new file mode 100644 index 0000000000..2476db9da4 --- /dev/null +++ b/api/core/tools/provider/builtin/devdocs/tools/searchDevDocs.yaml @@ -0,0 +1,34 @@ +identity: + name: searchDevDocs + author: Richards Tu + label: + en_US: Search Developer Docs + zh_Hans: 搜索开发者文档 +description: + human: + en_US: A tools for searching for a specific topic and path in DevDocs based on the provided documentation name and topic. Don't for get to add some shots in the system prompt; for example, the documentation name should be like \"vuex~4\", \"css\", or \"python~3.12\", while the topic should be like \"guide/actions\" for Vuex 4, \"display-box\" for CSS, or \"library/code\" for Python 3.12. + zh_Hans: 一个用于根据提供的文档名称和主题,在DevDocs中搜索特定主题和路径的工具。不要忘记在系统提示词中添加一些示例;例如,文档名称应该是\"vuex~4\"、\"css\"或\"python~3.12\",而主题应该是\"guide/actions\"用于Vuex 4,\"display-box\"用于CSS,或\"library/code\"用于Python 3.12。 + llm: A tools for searching for specific developer documentation in DevDocs based on the provided documentation name and topic. +parameters: + - name: doc + type: string + required: true + label: + en_US: Documentation name + zh_Hans: 文档名称 + human_description: + en_US: The name of the documentation. + zh_Hans: 文档名称。 + llm_description: The name of the documentation, such as \"vuex~4\", \"css\", or \"python~3.12\". The exact value should be identified by the user. + form: llm + - name: topic + type: string + required: true + label: + en_US: Topic name + zh_Hans: 主题名称 + human_description: + en_US: The path of the section/topic. + zh_Hans: 文档主题的路径。 + llm_description: The path of the section/topic, such as \"guide/actions\" for Vuex 4, \"display-box\" for CSS, or \"library/code\" for Python 3.12. + form: llm From a6cd0f0e73865be3ebbae0a5bdbb129a2f9d10f2 Mon Sep 17 00:00:00 2001 From: Jyong <76649700+JohnJyong@users.noreply.github.com> Date: Fri, 29 Mar 2024 13:06:00 +0800 Subject: [PATCH 5/8] fix add segment when dataset and document is empty (#3021) Co-authored-by: jyong --- api/core/rag/datasource/vdb/milvus/milvus_vector.py | 10 ++++++++++ api/core/rag/datasource/vdb/qdrant/qdrant_vector.py | 7 +++++++ api/core/rag/datasource/vdb/vector_factory.py | 4 ++-- .../rag/datasource/vdb/weaviate/weaviate_vector.py | 5 +++++ 4 files changed, 24 insertions(+), 2 deletions(-) diff --git a/api/core/rag/datasource/vdb/milvus/milvus_vector.py b/api/core/rag/datasource/vdb/milvus/milvus_vector.py index 203b7eff37..f62d603d8d 100644 --- a/api/core/rag/datasource/vdb/milvus/milvus_vector.py +++ b/api/core/rag/datasource/vdb/milvus/milvus_vector.py @@ -144,6 +144,16 @@ class MilvusVector(BaseVector): utility.drop_collection(self._collection_name, None, using=alias) def text_exists(self, id: str) -> bool: + alias = uuid4().hex + if self._client_config.secure: + uri = "https://" + str(self._client_config.host) + ":" + str(self._client_config.port) + else: + uri = "http://" + str(self._client_config.host) + ":" + str(self._client_config.port) + connections.connect(alias=alias, uri=uri, user=self._client_config.user, password=self._client_config.password) + + from pymilvus import utility + if not utility.has_collection(self._collection_name, using=alias): + return False result = self._client.query(collection_name=self._collection_name, filter=f'metadata["doc_id"] == "{id}"', diff --git a/api/core/rag/datasource/vdb/qdrant/qdrant_vector.py b/api/core/rag/datasource/vdb/qdrant/qdrant_vector.py index 6bd4b5c340..436e6b5f6a 100644 --- a/api/core/rag/datasource/vdb/qdrant/qdrant_vector.py +++ b/api/core/rag/datasource/vdb/qdrant/qdrant_vector.py @@ -275,6 +275,13 @@ class QdrantVector(BaseVector): ) def text_exists(self, id: str) -> bool: + all_collection_name = [] + collections_response = self._client.get_collections() + collection_list = collections_response.collections + for collection in collection_list: + all_collection_name.append(collection.name) + if self._collection_name not in all_collection_name: + return False response = self._client.retrieve( collection_name=self._collection_name, ids=[id] diff --git a/api/core/rag/datasource/vdb/vector_factory.py b/api/core/rag/datasource/vdb/vector_factory.py index 27ae15a025..71fc07967c 100644 --- a/api/core/rag/datasource/vdb/vector_factory.py +++ b/api/core/rag/datasource/vdb/vector_factory.py @@ -128,8 +128,8 @@ class Vector: if kwargs.get('duplicate_check', False): documents = self._filter_duplicate_texts(documents) embeddings = self._embeddings.embed_documents([document.page_content for document in documents]) - self._vector_processor.add_texts( - documents=documents, + self._vector_processor.create( + texts=documents, embeddings=embeddings, **kwargs ) diff --git a/api/core/rag/datasource/vdb/weaviate/weaviate_vector.py b/api/core/rag/datasource/vdb/weaviate/weaviate_vector.py index 6e317115b8..5d24ee9fd2 100644 --- a/api/core/rag/datasource/vdb/weaviate/weaviate_vector.py +++ b/api/core/rag/datasource/vdb/weaviate/weaviate_vector.py @@ -134,6 +134,11 @@ class WeaviateVector(BaseVector): def text_exists(self, id: str) -> bool: collection_name = self._collection_name + schema = self._default_schema(self._collection_name) + + # check whether the index already exists + if not self._client.schema.contains(schema): + return False result = self._client.query.get(collection_name).with_additional(["id"]).with_where({ "path": ["doc_id"], "operator": "Equal", From 59909b5ca767d7c0e88191dc9462cff486393f16 Mon Sep 17 00:00:00 2001 From: kun321 <124553455+kun321@users.noreply.github.com> Date: Fri, 29 Mar 2024 13:16:52 +0800 Subject: [PATCH 6/8] update the discord Invalid invite (#3028) --- CONTRIBUTING.md | 2 +- CONTRIBUTING_CN.md | 2 +- README.md | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index e1c087a6cd..992126551c 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -155,4 +155,4 @@ And that's it! Once your PR is merged, you will be featured as a contributor in ## Getting Help -If you ever get stuck or got a burning question while contributing, simply shoot your queries our way via the related GitHub issue, or hop onto our [Discord](https://discord.gg/AhzKf7dNgk) for a quick chat. +If you ever get stuck or got a burning question while contributing, simply shoot your queries our way via the related GitHub issue, or hop onto our [Discord](https://discord.gg/8Tpq4AcN9c) for a quick chat. diff --git a/CONTRIBUTING_CN.md b/CONTRIBUTING_CN.md index 6adfed6b6c..08c5a0a4bd 100644 --- a/CONTRIBUTING_CN.md +++ b/CONTRIBUTING_CN.md @@ -152,4 +152,4 @@ Dify的后端使用Python编写,使用[Flask](https://flask.palletsprojects.co ## 获取帮助 -如果你在贡献过程中遇到困难或者有任何问题,可以通过相关的 GitHub 问题提出你的疑问,或者加入我们的 [Discord](https://discord.gg/AhzKf7dNgk) 进行快速交流。 +如果你在贡献过程中遇到困难或者有任何问题,可以通过相关的 GitHub 问题提出你的疑问,或者加入我们的 [Discord](https://discord.gg/8Tpq4AcN9c) 进行快速交流。 diff --git a/README.md b/README.md index 80a60e9cad..154fdd8adb 100644 --- a/README.md +++ b/README.md @@ -131,7 +131,7 @@ At the same time, please consider supporting Dify by sharing it on social media ### Translations -We are looking for contributors to help with translating Dify to languages other than Mandarin or English. If you are interested in helping, please see the [i18n README](https://github.com/langgenius/dify/blob/main/web/i18n/README.md) for more information, and leave us a comment in the `global-users` channel of our [Discord Community Server](https://discord.gg/AhzKf7dNgk). +We are looking for contributors to help with translating Dify to languages other than Mandarin or English. If you are interested in helping, please see the [i18n README](https://github.com/langgenius/dify/blob/main/web/i18n/README.md) for more information, and leave us a comment in the `global-users` channel of our [Discord Community Server](https://discord.gg/8Tpq4AcN9c). ## Community & Support From 1294ce40410d77bfdc7c7a8c98e658c62461e592 Mon Sep 17 00:00:00 2001 From: chenhe Date: Mon, 5 Feb 2024 16:59:18 +0800 Subject: [PATCH 7/8] create launch.json config --- .vscode/launch.json | 30 ++++++++++++++++++++++++++++++ 1 file changed, 30 insertions(+) create mode 100644 .vscode/launch.json diff --git a/.vscode/launch.json b/.vscode/launch.json new file mode 100644 index 0000000000..181ae6f1b4 --- /dev/null +++ b/.vscode/launch.json @@ -0,0 +1,30 @@ +{ + // Use IntelliSense to learn about possible attributes. + // Hover to view descriptions of existing attributes. + // For more information, visit: https://go.microsoft.com/fwlink/?linkid=830387 + "version": "0.2.0", + "configurations": [ + { + // set root directory to api/ folder + "cwd": "${workspaceFolder}/api", + "name": "Python: Flask", + "type": "python", + "request": "launch", + "module": "flask", + "env": { + "FLASK_APP": "app.py", + "FLASK_DEBUG": "1", + "GEVENT_SUPPORT": "True" + }, + "args": [ + "run", + "--no-debugger", + "--no-reload", + "--host=0.0.0.0", + "--port=5001" + ], + "jinja": true, + "justMyCode": true + } + ] +} \ No newline at end of file From 0f94e4cd011111dbbb716dfb633a9b48b74d3039 Mon Sep 17 00:00:00 2001 From: chenhe Date: Sat, 16 Mar 2024 07:44:49 -0700 Subject: [PATCH 8/8] optionally specify available bedrock model used in validation --- .vscode/launch.json | 30 ------------------- .../model_providers/bedrock/bedrock.py | 6 ++-- .../model_providers/bedrock/bedrock.yaml | 3 +- 3 files changed, 6 insertions(+), 33 deletions(-) delete mode 100644 .vscode/launch.json diff --git a/.vscode/launch.json b/.vscode/launch.json deleted file mode 100644 index 181ae6f1b4..0000000000 --- a/.vscode/launch.json +++ /dev/null @@ -1,30 +0,0 @@ -{ - // Use IntelliSense to learn about possible attributes. - // Hover to view descriptions of existing attributes. - // For more information, visit: https://go.microsoft.com/fwlink/?linkid=830387 - "version": "0.2.0", - "configurations": [ - { - // set root directory to api/ folder - "cwd": "${workspaceFolder}/api", - "name": "Python: Flask", - "type": "python", - "request": "launch", - "module": "flask", - "env": { - "FLASK_APP": "app.py", - "FLASK_DEBUG": "1", - "GEVENT_SUPPORT": "True" - }, - "args": [ - "run", - "--no-debugger", - "--no-reload", - "--host=0.0.0.0", - "--port=5001" - ], - "jinja": true, - "justMyCode": true - } - ] -} \ No newline at end of file diff --git a/api/core/model_runtime/model_providers/bedrock/bedrock.py b/api/core/model_runtime/model_providers/bedrock/bedrock.py index 96cb90280e..e99bc52ff8 100644 --- a/api/core/model_runtime/model_providers/bedrock/bedrock.py +++ b/api/core/model_runtime/model_providers/bedrock/bedrock.py @@ -17,9 +17,11 @@ class BedrockProvider(ModelProvider): """ try: model_instance = self.get_model_instance(ModelType.LLM) - bedrock_validate_model_name = credentials.get('model_for_validation', 'amazon.titan-text-lite-v1') + + # Use `amazon.titan-text-lite-v1` model by default for validating credentials + model_for_validation = credentials.get('model_for_validation', 'amazon.titan-text-lite-v1') model_instance.validate_credentials( - model=bedrock_validate_model_name, + model=model_for_validation, credentials=credentials ) except CredentialsValidateFailedError as ex: diff --git a/api/core/model_runtime/model_providers/bedrock/bedrock.yaml b/api/core/model_runtime/model_providers/bedrock/bedrock.yaml index e1923f8f8a..19ce51ddcd 100644 --- a/api/core/model_runtime/model_providers/bedrock/bedrock.yaml +++ b/api/core/model_runtime/model_providers/bedrock/bedrock.yaml @@ -74,7 +74,8 @@ provider_credential_schema: label: en_US: Available Model Name zh_Hans: 可用模型名称 - type: text-input + type: secret-input placeholder: en_US: A model you have access to (e.g. amazon.titan-text-lite-v1) for validation. zh_Hans: 为了进行验证,请输入一个您可用的模型名称 (例如:amazon.titan-text-lite-v1) +