From 20d16d7b310aa8e3bf5f865a805841fb2cbb8293 Mon Sep 17 00:00:00 2001
From: crazywoola <100913391+crazywoola@users.noreply.github.com>
Date: Thu, 28 Mar 2024 13:02:41 +0800
Subject: [PATCH 1/8] doc: update helm charts (#3012)

---
 README.md    | 9 ++++++---
 README_CN.md | 8 +++++---
 2 files changed, 11 insertions(+), 6 deletions(-)
diff --git a/README.md b/README.md
index 207f312946..80a60e9cad 100644
--- a/README.md
+++ b/README.md
@@ -100,10 +100,12 @@ docker compose up -d
 
 After running, you can access the Dify dashboard in your browser at [http://localhost/install](http://localhost/install) and start the initialization installation process.
 
-### Helm Chart
+#### Deploy with Helm Chart
 
-Big thanks to @BorisPolonsky for providing us with a [Helm Chart](https://helm.sh/) version, which allows Dify to be deployed on Kubernetes.
-You can go to https://github.com/BorisPolonsky/dify-helm for deployment information.
+[Helm Chart](https://helm.sh/) version, which allows Dify to be deployed on Kubernetes.
+
+- [Helm Chart by @LeoQuote](https://github.com/douban/charts/tree/master/charts/dify)
+- [Helm Chart by @BorisPolonsky](https://github.com/BorisPolonsky/dify-helm)
 
 ### Configuration
 
@@ -120,6 +122,7 @@ For those who'd like to contribute code, see our [Contribution Guide](https://gi
 
 At the same time, please consider supporting Dify by sharing it on social media and at events and conferences.
 
+
 ### Contributors
 
 <a href="https://github.com/langgenius/dify/graphs/contributors">
diff --git a/README_CN.md b/README_CN.md
index 6d33095d9c..81ba87f70e 100644
--- a/README_CN.md
+++ b/README_CN.md
@@ -94,10 +94,12 @@ docker compose up -d
 
 运行后，可以在浏览器上访问 [http://localhost/install](http://localhost/install) 进入 Dify 控制台并开始初始化安装操作。
 
-### Helm Chart
+#### 使用 Helm Chart 部署
 
-非常感谢 @BorisPolonsky 为我们提供了一个 [Helm Chart](https://helm.sh/) 版本，可以在 Kubernetes 上部署 Dify。
-您可以前往 https://github.com/BorisPolonsky/dify-helm 来获取部署信息。
+使用 [Helm Chart](https://helm.sh/) 版本，可以在 Kubernetes 上部署 Dify。
+
+- [Helm Chart by @LeoQuote](https://github.com/douban/charts/tree/master/charts/dify)
+- [Helm Chart by @BorisPolonsky](https://github.com/BorisPolonsky/dify-helm)
 
 ### 配置
 

From b0b0cc045f637893629efded805d465277ff1b8a Mon Sep 17 00:00:00 2001
From: Jyong <76649700+JohnJyong@users.noreply.github.com>
Date: Thu, 28 Mar 2024 17:02:35 +0800
Subject: [PATCH 2/8] add mutil-thread document embedding (#3016)

Co-authored-by: jyong <jyong@dify.ai>
---
 api/core/indexing_runner.py                   | 49 +++++++++++++------
 .../unstructured_doc_extractor.py             |  2 +-
 .../unstructured_eml_extractor.py             |  2 +-
 .../unstructured_markdown_extractor.py        |  2 +-
 .../unstructured_msg_extractor.py             |  2 +-
 .../unstructured_text_extractor.py            |  2 +-
 .../unstructured_xml_extractor.py             |  2 +-
 7 files changed, 39 insertions(+), 22 deletions(-)

diff --git a/api/core/indexing_runner.py b/api/core/indexing_runner.py
index dd46aa27dc..94c7d18c55 100644
--- a/api/core/indexing_runner.py
+++ b/api/core/indexing_runner.py
@@ -1,3 +1,4 @@
+import concurrent.futures
 import datetime
 import json
 import logging
@@ -650,17 +651,44 @@ class IndexingRunner:
         # chunk nodes by chunk size
         indexing_start_at = time.perf_counter()
         tokens = 0
-        chunk_size = 100
+        chunk_size = 10
 
         embedding_model_type_instance = None
         if embedding_model_instance:
             embedding_model_type_instance = embedding_model_instance.model_type_instance
             embedding_model_type_instance = cast(TextEmbeddingModel, embedding_model_type_instance)
+        with concurrent.futures.ThreadPoolExecutor(max_workers=10) as executor:
+            futures = []
+            for i in range(0, len(documents), chunk_size):
+                chunk_documents = documents[i:i + chunk_size]
+                futures.append(executor.submit(self._process_chunk, current_app._get_current_object(), index_processor,
+                                               chunk_documents, dataset,
+                                               dataset_document, embedding_model_instance,
+                                               embedding_model_type_instance))
 
-        for i in range(0, len(documents), chunk_size):
+            for future in futures:
+                tokens += future.result()
+
+        indexing_end_at = time.perf_counter()
+
+        # update document status to completed
+        self._update_document_index_status(
+            document_id=dataset_document.id,
+            after_indexing_status="completed",
+            extra_update_params={
+                DatasetDocument.tokens: tokens,
+                DatasetDocument.completed_at: datetime.datetime.utcnow(),
+                DatasetDocument.indexing_latency: indexing_end_at - indexing_start_at,
+            }
+        )
+
+    def _process_chunk(self, flask_app, index_processor, chunk_documents, dataset, dataset_document,
+                       embedding_model_instance, embedding_model_type_instance):
+        with flask_app.app_context():
             # check document is paused
             self._check_document_paused_status(dataset_document.id)
-            chunk_documents = documents[i:i + chunk_size]
+
+            tokens = 0
             if dataset.indexing_technique == 'high_quality' or embedding_model_type_instance:
                 tokens += sum(
                     embedding_model_type_instance.get_num_tokens(
@@ -670,9 +698,9 @@ class IndexingRunner:
                     )
                     for document in chunk_documents
                 )
+
             # load index
             index_processor.load(dataset, chunk_documents)
-            db.session.add(dataset)
 
             document_ids = [document.metadata['doc_id'] for document in chunk_documents]
             db.session.query(DocumentSegment).filter(
@@ -687,18 +715,7 @@ class IndexingRunner:
 
             db.session.commit()
 
-        indexing_end_at = time.perf_counter()
-
-        # update document status to completed
-        self._update_document_index_status(
-            document_id=dataset_document.id,
-            after_indexing_status="completed",
-            extra_update_params={
-                DatasetDocument.tokens: tokens,
-                DatasetDocument.completed_at: datetime.datetime.utcnow(),
-                DatasetDocument.indexing_latency: indexing_end_at - indexing_start_at,
-            }
-        )
+            return tokens
 
     def _check_document_paused_status(self, document_id: str):
         indexing_cache_key = 'document_{}_is_paused'.format(document_id)
diff --git a/api/core/rag/extractor/unstructured/unstructured_doc_extractor.py b/api/core/rag/extractor/unstructured/unstructured_doc_extractor.py
index b37981a30d..34a4e85e97 100644
--- a/api/core/rag/extractor/unstructured/unstructured_doc_extractor.py
+++ b/api/core/rag/extractor/unstructured/unstructured_doc_extractor.py
@@ -53,7 +53,7 @@ class UnstructuredWordExtractor(BaseExtractor):
             elements = partition_docx(filename=self._file_path)
 
         from unstructured.chunking.title import chunk_by_title
-        chunks = chunk_by_title(elements, max_characters=2000, combine_text_under_n_chars=0)
+        chunks = chunk_by_title(elements, max_characters=2000, combine_text_under_n_chars=2000)
         documents = []
         for chunk in chunks:
             text = chunk.text.strip()
diff --git a/api/core/rag/extractor/unstructured/unstructured_eml_extractor.py b/api/core/rag/extractor/unstructured/unstructured_eml_extractor.py
index 1d92bbbee6..f6ae8fad53 100644
--- a/api/core/rag/extractor/unstructured/unstructured_eml_extractor.py
+++ b/api/core/rag/extractor/unstructured/unstructured_eml_extractor.py
@@ -43,7 +43,7 @@ class UnstructuredEmailExtractor(BaseExtractor):
             pass
 
         from unstructured.chunking.title import chunk_by_title
-        chunks = chunk_by_title(elements, max_characters=2000, combine_text_under_n_chars=0)
+        chunks = chunk_by_title(elements, max_characters=2000, combine_text_under_n_chars=2000)
         documents = []
         for chunk in chunks:
             text = chunk.text.strip()
diff --git a/api/core/rag/extractor/unstructured/unstructured_markdown_extractor.py b/api/core/rag/extractor/unstructured/unstructured_markdown_extractor.py
index 3ac04ddc17..3d63446fef 100644
--- a/api/core/rag/extractor/unstructured/unstructured_markdown_extractor.py
+++ b/api/core/rag/extractor/unstructured/unstructured_markdown_extractor.py
@@ -38,7 +38,7 @@ class UnstructuredMarkdownExtractor(BaseExtractor):
 
         elements = partition_md(filename=self._file_path, api_url=self._api_url)
         from unstructured.chunking.title import chunk_by_title
-        chunks = chunk_by_title(elements, max_characters=2000, combine_text_under_n_chars=0)
+        chunks = chunk_by_title(elements, max_characters=2000, combine_text_under_n_chars=2000)
         documents = []
         for chunk in chunks:
             text = chunk.text.strip()
diff --git a/api/core/rag/extractor/unstructured/unstructured_msg_extractor.py b/api/core/rag/extractor/unstructured/unstructured_msg_extractor.py
index d4b72e37eb..34d3e8021a 100644
--- a/api/core/rag/extractor/unstructured/unstructured_msg_extractor.py
+++ b/api/core/rag/extractor/unstructured/unstructured_msg_extractor.py
@@ -28,7 +28,7 @@ class UnstructuredMsgExtractor(BaseExtractor):
 
         elements = partition_msg(filename=self._file_path, api_url=self._api_url)
         from unstructured.chunking.title import chunk_by_title
-        chunks = chunk_by_title(elements, max_characters=2000, combine_text_under_n_chars=0)
+        chunks = chunk_by_title(elements, max_characters=2000, combine_text_under_n_chars=2000)
         documents = []
         for chunk in chunks:
             text = chunk.text.strip()
diff --git a/api/core/rag/extractor/unstructured/unstructured_text_extractor.py b/api/core/rag/extractor/unstructured/unstructured_text_extractor.py
index 5af21b2b1d..cc67f2b866 100644
--- a/api/core/rag/extractor/unstructured/unstructured_text_extractor.py
+++ b/api/core/rag/extractor/unstructured/unstructured_text_extractor.py
@@ -28,7 +28,7 @@ class UnstructuredTextExtractor(BaseExtractor):
 
         elements = partition_text(filename=self._file_path, api_url=self._api_url)
         from unstructured.chunking.title import chunk_by_title
-        chunks = chunk_by_title(elements, max_characters=2000, combine_text_under_n_chars=0)
+        chunks = chunk_by_title(elements, max_characters=2000, combine_text_under_n_chars=2000)
         documents = []
         for chunk in chunks:
             text = chunk.text.strip()
diff --git a/api/core/rag/extractor/unstructured/unstructured_xml_extractor.py b/api/core/rag/extractor/unstructured/unstructured_xml_extractor.py
index b08ff63a1c..5600fb075d 100644
--- a/api/core/rag/extractor/unstructured/unstructured_xml_extractor.py
+++ b/api/core/rag/extractor/unstructured/unstructured_xml_extractor.py
@@ -28,7 +28,7 @@ class UnstructuredXmlExtractor(BaseExtractor):
 
         elements = partition_xml(filename=self._file_path, xml_keep_tags=True, api_url=self._api_url)
         from unstructured.chunking.title import chunk_by_title
-        chunks = chunk_by_title(elements, max_characters=2000, combine_text_under_n_chars=0)
+        chunks = chunk_by_title(elements, max_characters=2000, combine_text_under_n_chars=2000)
         documents = []
         for chunk in chunks:
             text = chunk.text.strip()

From 669c8c3cca76a01384eae7010db510a03f3894d5 Mon Sep 17 00:00:00 2001
From: Jyong <76649700+JohnJyong@users.noreply.github.com>
Date: Thu, 28 Mar 2024 17:02:52 +0800
Subject: [PATCH 3/8] some optimization for admin api key, create tenant and
 reset-encrypt-key-pair command (#3013)

Co-authored-by: jyong <jyong@dify.ai>
---
 api/commands.py                 | 21 +++++++++++----------
 api/libs/login.py               |  2 +-
 api/services/account_service.py | 11 +++++------
 3 files changed, 17 insertions(+), 17 deletions(-)

diff --git a/api/commands.py b/api/commands.py
index b82d4d5d5d..9f1dc95281 100644
--- a/api/commands.py
+++ b/api/commands.py
@@ -109,19 +109,20 @@ def reset_encrypt_key_pair():
         click.echo(click.style('Sorry, only support SELF_HOSTED mode.', fg='red'))
         return
 
-    tenant = db.session.query(Tenant).first()
-    if not tenant:
-        click.echo(click.style('Sorry, no workspace found. Please enter /install to initialize.', fg='red'))
-        return
+    tenants = db.session.query(Tenant).all()
+    for tenant in tenants:
+        if not tenant:
+            click.echo(click.style('Sorry, no workspace found. Please enter /install to initialize.', fg='red'))
+            return
 
-    tenant.encrypt_public_key = generate_key_pair(tenant.id)
+        tenant.encrypt_public_key = generate_key_pair(tenant.id)
 
-    db.session.query(Provider).filter(Provider.provider_type == 'custom').delete()
-    db.session.query(ProviderModel).delete()
-    db.session.commit()
+        db.session.query(Provider).filter(Provider.provider_type == 'custom', Provider.tenant_id == tenant.id).delete()
+        db.session.query(ProviderModel).filter(ProviderModel.tenant_id == tenant.id).delete()
+        db.session.commit()
 
-    click.echo(click.style('Congratulations! '
-                           'the asymmetric key pair of workspace {} has been reset.'.format(tenant.id), fg='green'))
+        click.echo(click.style('Congratulations! '
+                               'the asymmetric key pair of workspace {} has been reset.'.format(tenant.id), fg='green'))
 
 
 @click.command('vdb-migrate', help='migrate vector db.')
diff --git a/api/libs/login.py b/api/libs/login.py
index 5c03cfe957..14085fe603 100644
--- a/api/libs/login.py
+++ b/api/libs/login.py
@@ -53,7 +53,7 @@ def login_required(func):
     def decorated_view(*args, **kwargs):
         auth_header = request.headers.get('Authorization')
         admin_api_key_enable = os.getenv('ADMIN_API_KEY_ENABLE', default='False')
-        if admin_api_key_enable:
+        if admin_api_key_enable.lower() == 'true':
             if auth_header:
                 if ' ' not in auth_header:
                     raise Unauthorized('Invalid Authorization header format. Expected \'Bearer <api-key>\' format.')
diff --git a/api/services/account_service.py b/api/services/account_service.py
index 103af7f79c..7fc61e40e3 100644
--- a/api/services/account_service.py
+++ b/api/services/account_service.py
@@ -435,11 +435,13 @@ class RegisterService:
 
             if open_id is not None or provider is not None:
                 AccountService.link_account_integrate(provider, open_id, account)
+            if current_app.config['EDITION'] != 'SELF_HOSTED':
+                tenant = TenantService.create_tenant(f"{account.name}'s Workspace")
 
-            tenant = TenantService.create_tenant(f"{account.name}'s Workspace")
+                TenantService.create_tenant_member(tenant, account, role='owner')
+                account.current_tenant = tenant
 
-            TenantService.create_tenant_member(tenant, account, role='owner')
-            account.current_tenant = tenant
+                tenant_was_created.send(tenant)
 
             db.session.commit()
         except Exception as e:
@@ -447,8 +449,6 @@ class RegisterService:
             logging.error(f'Register failed: {e}')
             raise AccountRegisterError(f'Registration failed: {e}') from e
 
-        tenant_was_created.send(tenant)
-
         return account
 
     @classmethod
@@ -461,7 +461,6 @@ class RegisterService:
             name = email.split('@')[0]
 
             account = cls.register(email=email, name=name, language=language, status=AccountStatus.PENDING)
-
             # Create new tenant member for invited tenant
             TenantService.create_tenant_member(tenant, account, role)
             TenantService.switch_tenant(account, tenant.id)

From 2c43393bf1a44999eaa862f7f5ba9efacc01d7b8 Mon Sep 17 00:00:00 2001
From: Richards Tu <142148415+richards199999@users.noreply.github.com>
Date: Fri, 29 Mar 2024 11:21:02 +0800
Subject: [PATCH 4/8] Add New Tool: DevDocs (#2993)

---
 .../provider/builtin/devdocs/_assets/icon.svg |  4 ++
 .../tools/provider/builtin/devdocs/devdocs.py | 21 ++++++++++
 .../provider/builtin/devdocs/devdocs.yaml     | 10 +++++
 .../builtin/devdocs/tools/searchDevDocs.py    | 42 +++++++++++++++++++
 .../builtin/devdocs/tools/searchDevDocs.yaml  | 34 +++++++++++++++
 5 files changed, 111 insertions(+)
 create mode 100644 api/core/tools/provider/builtin/devdocs/_assets/icon.svg
 create mode 100644 api/core/tools/provider/builtin/devdocs/devdocs.py
 create mode 100644 api/core/tools/provider/builtin/devdocs/devdocs.yaml
 create mode 100644 api/core/tools/provider/builtin/devdocs/tools/searchDevDocs.py
 create mode 100644 api/core/tools/provider/builtin/devdocs/tools/searchDevDocs.yaml

diff --git a/api/core/tools/provider/builtin/devdocs/_assets/icon.svg b/api/core/tools/provider/builtin/devdocs/_assets/icon.svg
new file mode 100644
index 0000000000..c7a19fabfb
--- /dev/null
+++ b/api/core/tools/provider/builtin/devdocs/_assets/icon.svg
@@ -0,0 +1,4 @@
+<?xml version="1.0" encoding="utf-8"?>
+<svg width="800px" height="800px" viewBox="0 0 24 24" fill="none" xmlns="http://www.w3.org/2000/svg">
+<path fill-rule="evenodd" clip-rule="evenodd" d="M15.6111 1.5837C17.2678 1.34703 18.75 2.63255 18.75 4.30606V5.68256C19.9395 6.31131 20.75 7.56102 20.75 9.00004V19C20.75 21.0711 19.0711 22.75 17 22.75H7C4.92893 22.75 3.25 21.0711 3.25 19V5.00004C3.25 4.99074 3.25017 4.98148 3.2505 4.97227C3.25017 4.95788 3.25 4.94344 3.25 4.92897C3.25 4.02272 3.91638 3.25437 4.81353 3.12621L15.6111 1.5837ZM4.75 6.75004V19C4.75 20.2427 5.75736 21.25 7 21.25H17C18.2426 21.25 19.25 20.2427 19.25 19V9.00004C19.25 7.7574 18.2426 6.75004 17 6.75004H4.75ZM5.07107 5.25004H17.25V4.30606C17.25 3.54537 16.5763 2.96104 15.8232 3.06862L5.02566 4.61113C4.86749 4.63373 4.75 4.76919 4.75 4.92897C4.75 5.10629 4.89375 5.25004 5.07107 5.25004ZM7.25 12C7.25 11.5858 7.58579 11.25 8 11.25H16C16.4142 11.25 16.75 11.5858 16.75 12C16.75 12.4143 16.4142 12.75 16 12.75H8C7.58579 12.75 7.25 12.4143 7.25 12ZM7.25 15.5C7.25 15.0858 7.58579 14.75 8 14.75H13.5C13.9142 14.75 14.25 15.0858 14.25 15.5C14.25 15.9143 13.9142 16.25 13.5 16.25H8C7.58579 16.25 7.25 15.9143 7.25 15.5Z" fill="#1C274D"/>
+</svg>
\ No newline at end of file
diff --git a/api/core/tools/provider/builtin/devdocs/devdocs.py b/api/core/tools/provider/builtin/devdocs/devdocs.py
new file mode 100644
index 0000000000..25cbe4d053
--- /dev/null
+++ b/api/core/tools/provider/builtin/devdocs/devdocs.py
@@ -0,0 +1,21 @@
+from core.tools.errors import ToolProviderCredentialValidationError
+from core.tools.provider.builtin.devdocs.tools.searchDevDocs import SearchDevDocsTool
+from core.tools.provider.builtin_tool_provider import BuiltinToolProviderController
+
+
+class DevDocsProvider(BuiltinToolProviderController):
+    def _validate_credentials(self, credentials: dict) -> None:
+        try:
+            SearchDevDocsTool().fork_tool_runtime(
+                meta={
+                    "credentials": credentials,
+                }
+            ).invoke(
+                user_id='',
+                tool_parameters={
+                    "doc": "python~3.12",
+                    "topic": "library/code",
+                },
+            )
+        except Exception as e:
+            raise ToolProviderCredentialValidationError(str(e))
\ No newline at end of file
diff --git a/api/core/tools/provider/builtin/devdocs/devdocs.yaml b/api/core/tools/provider/builtin/devdocs/devdocs.yaml
new file mode 100644
index 0000000000..1db226fc4b
--- /dev/null
+++ b/api/core/tools/provider/builtin/devdocs/devdocs.yaml
@@ -0,0 +1,10 @@
+identity:
+  author: Richards Tu
+  name: devdocs
+  label:
+    en_US: DevDocs
+    zh_Hans: DevDocs
+  description:
+    en_US: Get official developer documentations on DevDocs.
+    zh_Hans: 从DevDocs获取官方开发者文档。
+  icon: icon.svg
diff --git a/api/core/tools/provider/builtin/devdocs/tools/searchDevDocs.py b/api/core/tools/provider/builtin/devdocs/tools/searchDevDocs.py
new file mode 100644
index 0000000000..1a244c5db3
--- /dev/null
+++ b/api/core/tools/provider/builtin/devdocs/tools/searchDevDocs.py
@@ -0,0 +1,42 @@
+from typing import Any, Union
+
+import requests
+from pydantic import BaseModel, Field
+
+from core.tools.entities.tool_entities import ToolInvokeMessage
+from core.tools.tool.builtin_tool import BuiltinTool
+
+
+class SearchDevDocsInput(BaseModel):
+    doc: str = Field(..., description="The name of the documentation.")
+    topic: str = Field(..., description="The path of the section/topic.")
+
+
+class SearchDevDocsTool(BuiltinTool):
+    def _invoke(self, user_id: str, tool_parameters: dict[str, Any]) -> Union[ToolInvokeMessage, list[ToolInvokeMessage]]:
+        """
+        Invokes the DevDocs search tool with the given user ID and tool parameters.
+
+        Args:
+            user_id (str): The ID of the user invoking the tool.
+            tool_parameters (dict[str, Any]): The parameters for the tool, including 'doc' and 'topic'.
+
+        Returns:
+            ToolInvokeMessage | list[ToolInvokeMessage]: The result of the tool invocation, which can be a single message or a list of messages.
+        """
+        doc = tool_parameters.get('doc', '')
+        topic = tool_parameters.get('topic', '')
+
+        if not doc:
+            return self.create_text_message('Please provide the documentation name.')
+        if not topic:
+            return self.create_text_message('Please provide the topic path.')
+
+        url = f"https://documents.devdocs.io/{doc}/{topic}.html"
+        response = requests.get(url)
+
+        if response.status_code == 200:
+            content = response.text
+            return self.create_text_message(self.summary(user_id=user_id, content=content))
+        else:
+            return self.create_text_message(f"Failed to retrieve the documentation. Status code: {response.status_code}")
\ No newline at end of file
diff --git a/api/core/tools/provider/builtin/devdocs/tools/searchDevDocs.yaml b/api/core/tools/provider/builtin/devdocs/tools/searchDevDocs.yaml
new file mode 100644
index 0000000000..2476db9da4
--- /dev/null
+++ b/api/core/tools/provider/builtin/devdocs/tools/searchDevDocs.yaml
@@ -0,0 +1,34 @@
+identity:
+  name: searchDevDocs
+  author: Richards Tu
+  label:
+    en_US: Search Developer Docs
+    zh_Hans: 搜索开发者文档
+description:
+  human:
+    en_US: A tools for searching for a specific topic and path in DevDocs based on the provided documentation name and topic. Don't for get to add some shots in the system prompt; for example, the documentation name should be like \"vuex~4\", \"css\", or \"python~3.12\", while the topic should be like \"guide/actions\" for Vuex 4, \"display-box\" for CSS, or \"library/code\" for Python 3.12.
+    zh_Hans: 一个用于根据提供的文档名称和主题，在DevDocs中搜索特定主题和路径的工具。不要忘记在系统提示词中添加一些示例；例如，文档名称应该是\"vuex~4\"、\"css\"或\"python~3.12\"，而主题应该是\"guide/actions\"用于Vuex 4，\"display-box\"用于CSS，或\"library/code\"用于Python 3.12。
+  llm: A tools for searching for specific developer documentation in DevDocs based on the provided documentation name and topic.
+parameters:
+  - name: doc
+    type: string
+    required: true
+    label:
+      en_US: Documentation name
+      zh_Hans: 文档名称
+    human_description:
+      en_US: The name of the documentation.
+      zh_Hans: 文档名称。
+    llm_description: The name of the documentation, such as \"vuex~4\", \"css\", or \"python~3.12\". The exact value should be identified by the user.
+    form: llm
+  - name: topic
+    type: string
+    required: true
+    label:
+      en_US: Topic name
+      zh_Hans: 主题名称
+    human_description:
+      en_US: The path of the section/topic.
+      zh_Hans: 文档主题的路径。
+    llm_description: The path of the section/topic, such as \"guide/actions\" for Vuex 4, \"display-box\" for CSS, or \"library/code\" for Python 3.12.
+    form: llm

From a6cd0f0e73865be3ebbae0a5bdbb129a2f9d10f2 Mon Sep 17 00:00:00 2001
From: Jyong <76649700+JohnJyong@users.noreply.github.com>
Date: Fri, 29 Mar 2024 13:06:00 +0800
Subject: [PATCH 5/8] fix add segment when dataset and document is empty
 (#3021)

Co-authored-by: jyong <jyong@dify.ai>
---
 api/core/rag/datasource/vdb/milvus/milvus_vector.py    | 10 ++++++++++
 api/core/rag/datasource/vdb/qdrant/qdrant_vector.py    |  7 +++++++
 api/core/rag/datasource/vdb/vector_factory.py          |  4 ++--
 .../rag/datasource/vdb/weaviate/weaviate_vector.py     |  5 +++++
 4 files changed, 24 insertions(+), 2 deletions(-)

diff --git a/api/core/rag/datasource/vdb/milvus/milvus_vector.py b/api/core/rag/datasource/vdb/milvus/milvus_vector.py
index 203b7eff37..f62d603d8d 100644
--- a/api/core/rag/datasource/vdb/milvus/milvus_vector.py
+++ b/api/core/rag/datasource/vdb/milvus/milvus_vector.py
@@ -144,6 +144,16 @@ class MilvusVector(BaseVector):
             utility.drop_collection(self._collection_name, None, using=alias)
 
     def text_exists(self, id: str) -> bool:
+        alias = uuid4().hex
+        if self._client_config.secure:
+            uri = "https://" + str(self._client_config.host) + ":" + str(self._client_config.port)
+        else:
+            uri = "http://" + str(self._client_config.host) + ":" + str(self._client_config.port)
+        connections.connect(alias=alias, uri=uri, user=self._client_config.user, password=self._client_config.password)
+
+        from pymilvus import utility
+        if not utility.has_collection(self._collection_name, using=alias):
+            return False
 
         result = self._client.query(collection_name=self._collection_name,
                                     filter=f'metadata["doc_id"] == "{id}"',
diff --git a/api/core/rag/datasource/vdb/qdrant/qdrant_vector.py b/api/core/rag/datasource/vdb/qdrant/qdrant_vector.py
index 6bd4b5c340..436e6b5f6a 100644
--- a/api/core/rag/datasource/vdb/qdrant/qdrant_vector.py
+++ b/api/core/rag/datasource/vdb/qdrant/qdrant_vector.py
@@ -275,6 +275,13 @@ class QdrantVector(BaseVector):
             )
 
     def text_exists(self, id: str) -> bool:
+        all_collection_name = []
+        collections_response = self._client.get_collections()
+        collection_list = collections_response.collections
+        for collection in collection_list:
+            all_collection_name.append(collection.name)
+        if self._collection_name not in all_collection_name:
+            return False
         response = self._client.retrieve(
             collection_name=self._collection_name,
             ids=[id]
diff --git a/api/core/rag/datasource/vdb/vector_factory.py b/api/core/rag/datasource/vdb/vector_factory.py
index 27ae15a025..71fc07967c 100644
--- a/api/core/rag/datasource/vdb/vector_factory.py
+++ b/api/core/rag/datasource/vdb/vector_factory.py
@@ -128,8 +128,8 @@ class Vector:
         if kwargs.get('duplicate_check', False):
             documents = self._filter_duplicate_texts(documents)
         embeddings = self._embeddings.embed_documents([document.page_content for document in documents])
-        self._vector_processor.add_texts(
-            documents=documents,
+        self._vector_processor.create(
+            texts=documents,
             embeddings=embeddings,
             **kwargs
         )
diff --git a/api/core/rag/datasource/vdb/weaviate/weaviate_vector.py b/api/core/rag/datasource/vdb/weaviate/weaviate_vector.py
index 6e317115b8..5d24ee9fd2 100644
--- a/api/core/rag/datasource/vdb/weaviate/weaviate_vector.py
+++ b/api/core/rag/datasource/vdb/weaviate/weaviate_vector.py
@@ -134,6 +134,11 @@ class WeaviateVector(BaseVector):
 
     def text_exists(self, id: str) -> bool:
         collection_name = self._collection_name
+        schema = self._default_schema(self._collection_name)
+
+        # check whether the index already exists
+        if not self._client.schema.contains(schema):
+            return False
         result = self._client.query.get(collection_name).with_additional(["id"]).with_where({
             "path": ["doc_id"],
             "operator": "Equal",

From 59909b5ca767d7c0e88191dc9462cff486393f16 Mon Sep 17 00:00:00 2001
From: kun321 <124553455+kun321@users.noreply.github.com>
Date: Fri, 29 Mar 2024 13:16:52 +0800
Subject: [PATCH 6/8] update the discord Invalid invite (#3028)

---
 CONTRIBUTING.md    | 2 +-
 CONTRIBUTING_CN.md | 2 +-
 README.md          | 2 +-
 3 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md
index e1c087a6cd..992126551c 100644
--- a/CONTRIBUTING.md
+++ b/CONTRIBUTING.md
@@ -155,4 +155,4 @@ And that's it! Once your PR is merged, you will be featured as a contributor in
 
 ## Getting Help
 
-If you ever get stuck or got a burning question while contributing, simply shoot your queries our way via the related GitHub issue, or hop onto our [Discord](https://discord.gg/AhzKf7dNgk) for a quick chat. 
+If you ever get stuck or got a burning question while contributing, simply shoot your queries our way via the related GitHub issue, or hop onto our [Discord](https://discord.gg/8Tpq4AcN9c) for a quick chat. 
diff --git a/CONTRIBUTING_CN.md b/CONTRIBUTING_CN.md
index 6adfed6b6c..08c5a0a4bd 100644
--- a/CONTRIBUTING_CN.md
+++ b/CONTRIBUTING_CN.md
@@ -152,4 +152,4 @@ Dify的后端使用Python编写，使用[Flask](https://flask.palletsprojects.co
 
 ## 获取帮助
 
-如果你在贡献过程中遇到困难或者有任何问题，可以通过相关的 GitHub 问题提出你的疑问，或者加入我们的 [Discord](https://discord.gg/AhzKf7dNgk) 进行快速交流。
+如果你在贡献过程中遇到困难或者有任何问题，可以通过相关的 GitHub 问题提出你的疑问，或者加入我们的 [Discord](https://discord.gg/8Tpq4AcN9c) 进行快速交流。
diff --git a/README.md b/README.md
index 80a60e9cad..154fdd8adb 100644
--- a/README.md
+++ b/README.md
@@ -131,7 +131,7 @@ At the same time, please consider supporting Dify by sharing it on social media
 
 ### Translations
 
-We are looking for contributors to help with translating Dify to languages other than Mandarin or English. If you are interested in helping, please see the [i18n README](https://github.com/langgenius/dify/blob/main/web/i18n/README.md) for more information, and leave us a comment in the `global-users` channel of our [Discord Community Server](https://discord.gg/AhzKf7dNgk).
+We are looking for contributors to help with translating Dify to languages other than Mandarin or English. If you are interested in helping, please see the [i18n README](https://github.com/langgenius/dify/blob/main/web/i18n/README.md) for more information, and leave us a comment in the `global-users` channel of our [Discord Community Server](https://discord.gg/8Tpq4AcN9c).
 
 ## Community & Support
 

From 1294ce40410d77bfdc7c7a8c98e658c62461e592 Mon Sep 17 00:00:00 2001
From: chenhe <guchenhe@gmail.com>
Date: Mon, 5 Feb 2024 16:59:18 +0800
Subject: [PATCH 7/8] create launch.json config

---
 .vscode/launch.json | 30 ++++++++++++++++++++++++++++++
 1 file changed, 30 insertions(+)
 create mode 100644 .vscode/launch.json

diff --git a/.vscode/launch.json b/.vscode/launch.json
new file mode 100644
index 0000000000..181ae6f1b4
--- /dev/null
+++ b/.vscode/launch.json
@@ -0,0 +1,30 @@
+{
+  // Use IntelliSense to learn about possible attributes.
+  // Hover to view descriptions of existing attributes.
+  // For more information, visit: https://go.microsoft.com/fwlink/?linkid=830387
+  "version": "0.2.0",
+  "configurations": [
+    {
+      // set root directory to api/ folder
+      "cwd": "${workspaceFolder}/api",
+      "name": "Python: Flask",
+      "type": "python",
+      "request": "launch",
+      "module": "flask",
+      "env": {
+        "FLASK_APP": "app.py",
+        "FLASK_DEBUG": "1",
+        "GEVENT_SUPPORT": "True"
+      },
+      "args": [
+        "run",
+        "--no-debugger",
+        "--no-reload",
+        "--host=0.0.0.0",
+        "--port=5001"
+      ],
+      "jinja": true,
+      "justMyCode": true
+    }
+  ]
+}
\ No newline at end of file

From 0f94e4cd011111dbbb716dfb633a9b48b74d3039 Mon Sep 17 00:00:00 2001
From: chenhe <guchenhe@gmail.com>
Date: Sat, 16 Mar 2024 07:44:49 -0700
Subject: [PATCH 8/8] optionally specify available bedrock model used in
 validation

---
 .vscode/launch.json                           | 30 -------------------
 .../model_providers/bedrock/bedrock.py        |  6 ++--
 .../model_providers/bedrock/bedrock.yaml      |  3 +-
 3 files changed, 6 insertions(+), 33 deletions(-)
 delete mode 100644 .vscode/launch.json

diff --git a/.vscode/launch.json b/.vscode/launch.json
deleted file mode 100644
index 181ae6f1b4..0000000000
--- a/.vscode/launch.json
+++ /dev/null
@@ -1,30 +0,0 @@
-{
-  // Use IntelliSense to learn about possible attributes.
-  // Hover to view descriptions of existing attributes.
-  // For more information, visit: https://go.microsoft.com/fwlink/?linkid=830387
-  "version": "0.2.0",
-  "configurations": [
-    {
-      // set root directory to api/ folder
-      "cwd": "${workspaceFolder}/api",
-      "name": "Python: Flask",
-      "type": "python",
-      "request": "launch",
-      "module": "flask",
-      "env": {
-        "FLASK_APP": "app.py",
-        "FLASK_DEBUG": "1",
-        "GEVENT_SUPPORT": "True"
-      },
-      "args": [
-        "run",
-        "--no-debugger",
-        "--no-reload",
-        "--host=0.0.0.0",
-        "--port=5001"
-      ],
-      "jinja": true,
-      "justMyCode": true
-    }
-  ]
-}
\ No newline at end of file
diff --git a/api/core/model_runtime/model_providers/bedrock/bedrock.py b/api/core/model_runtime/model_providers/bedrock/bedrock.py
index 96cb90280e..e99bc52ff8 100644
--- a/api/core/model_runtime/model_providers/bedrock/bedrock.py
+++ b/api/core/model_runtime/model_providers/bedrock/bedrock.py
@@ -17,9 +17,11 @@ class BedrockProvider(ModelProvider):
         """
         try:
             model_instance = self.get_model_instance(ModelType.LLM)
-            bedrock_validate_model_name = credentials.get('model_for_validation', 'amazon.titan-text-lite-v1')
+
+            # Use `amazon.titan-text-lite-v1` model by default for validating credentials
+            model_for_validation = credentials.get('model_for_validation', 'amazon.titan-text-lite-v1')
             model_instance.validate_credentials(
-                model=bedrock_validate_model_name,
+                model=model_for_validation,
                 credentials=credentials
             )
         except CredentialsValidateFailedError as ex:
diff --git a/api/core/model_runtime/model_providers/bedrock/bedrock.yaml b/api/core/model_runtime/model_providers/bedrock/bedrock.yaml
index e1923f8f8a..19ce51ddcd 100644
--- a/api/core/model_runtime/model_providers/bedrock/bedrock.yaml
+++ b/api/core/model_runtime/model_providers/bedrock/bedrock.yaml
@@ -74,7 +74,8 @@ provider_credential_schema:
       label:
         en_US: Available Model Name
         zh_Hans: 可用模型名称
-      type: text-input
+      type: secret-input
       placeholder:
         en_US: A model you have access to (e.g. amazon.titan-text-lite-v1) for validation.
         zh_Hans: 为了进行验证，请输入一个您可用的模型名称 (例如：amazon.titan-text-lite-v1)
+