From d135677c25aa632d13b6a4db8eca68365cd4b660 Mon Sep 17 00:00:00 2001
From: Jyong <76649700+JohnJyong@users.noreply.github.com>
Date: Thu, 20 Mar 2025 01:38:15 +0800
Subject: [PATCH 1/7] add vdb document id  index (#16244)

Co-authored-by: crazywoola <427733928@qq.com>
---
 .../vdb/elasticsearch/elasticsearch_vector.py |  3 +-
 api/core/rag/datasource/vdb/field.py          |  1 +
 .../datasource/vdb/qdrant/qdrant_vector.py    |  4 +++
 .../tidb_on_qdrant/tidb_on_qdrant_vector.py   | 36 ++++++++-----------
 .../datasource/vdb/tidb_vector/tidb_vector.py |  2 ++
 5 files changed, 23 insertions(+), 23 deletions(-)

diff --git a/api/core/rag/datasource/vdb/elasticsearch/elasticsearch_vector.py b/api/core/rag/datasource/vdb/elasticsearch/elasticsearch_vector.py
index 093368b0cc..033d05a077 100644
--- a/api/core/rag/datasource/vdb/elasticsearch/elasticsearch_vector.py
+++ b/api/core/rag/datasource/vdb/elasticsearch/elasticsearch_vector.py
@@ -196,7 +196,8 @@ class ElasticSearchVector(BaseVector):
                         Field.METADATA_KEY.value: {
                             "type": "object",
                             "properties": {
-                                "doc_id": {"type": "keyword"}  # Map doc_id to keyword type
+                                "doc_id": {"type": "keyword"},  # Map doc_id to keyword type
+                                "document_id": {"type": "keyword"},  # Map doc_id to keyword type
                             },
                         },
                     }
diff --git a/api/core/rag/datasource/vdb/field.py b/api/core/rag/datasource/vdb/field.py
index a64407bce1..9887e21b7c 100644
--- a/api/core/rag/datasource/vdb/field.py
+++ b/api/core/rag/datasource/vdb/field.py
@@ -11,3 +11,4 @@ class Field(Enum):
     TEXT_KEY = "text"
     PRIMARY_KEY = "id"
     DOC_ID = "metadata.doc_id"
+    DOCUMENT_ID = "metadata.document_id"
diff --git a/api/core/rag/datasource/vdb/qdrant/qdrant_vector.py b/api/core/rag/datasource/vdb/qdrant/qdrant_vector.py
index 73ce8201fd..4efd90667a 100644
--- a/api/core/rag/datasource/vdb/qdrant/qdrant_vector.py
+++ b/api/core/rag/datasource/vdb/qdrant/qdrant_vector.py
@@ -134,6 +134,10 @@ class QdrantVector(BaseVector):
                 self._client.create_payload_index(
                     collection_name, Field.DOC_ID.value, field_schema=PayloadSchemaType.KEYWORD
                 )
+                # create document_id payload index
+                self._client.create_payload_index(
+                    collection_name, Field.DOCUMENT_ID.value, field_schema=PayloadSchemaType.KEYWORD
+                )
                 # create full text index
                 text_index_params = TextIndexParams(
                     type=TextIndexType.TEXT,
diff --git a/api/core/rag/datasource/vdb/tidb_on_qdrant/tidb_on_qdrant_vector.py b/api/core/rag/datasource/vdb/tidb_on_qdrant/tidb_on_qdrant_vector.py
index ae4baeb17e..6a61fe9496 100644
--- a/api/core/rag/datasource/vdb/tidb_on_qdrant/tidb_on_qdrant_vector.py
+++ b/api/core/rag/datasource/vdb/tidb_on_qdrant/tidb_on_qdrant_vector.py
@@ -144,6 +144,10 @@ class TidbOnQdrantVector(BaseVector):
                 self._client.create_payload_index(
                     collection_name, Field.DOC_ID.value, field_schema=PayloadSchemaType.KEYWORD
                 )
+                # create document_id payload index
+                self._client.create_payload_index(
+                    collection_name, Field.DOCUMENT_ID.value, field_schema=PayloadSchemaType.KEYWORD
+                )
                 # create full text index
                 text_index_params = TextIndexParams(
                     type=TextIndexType.TEXT,
@@ -318,23 +322,17 @@ class TidbOnQdrantVector(BaseVector):
     def search_by_vector(self, query_vector: list[float], **kwargs: Any) -> list[Document]:
         from qdrant_client.http import models
 
-        filter = models.Filter(
-            must=[
-                models.FieldCondition(
-                    key="group_id",
-                    match=models.MatchValue(value=self._group_id),
-                ),
-            ],
-        )
+        filter = None
         document_ids_filter = kwargs.get("document_ids_filter")
         if document_ids_filter:
-            if filter.must:
-                filter.must.append(
+            filter = models.Filter(
+                must=[
                     models.FieldCondition(
                         key="metadata.document_id",
                         match=models.MatchAny(any=document_ids_filter),
                     )
-                )
+                ],
+            )
         results = self._client.search(
             collection_name=self._collection_name,
             query_vector=query_vector,
@@ -369,23 +367,17 @@ class TidbOnQdrantVector(BaseVector):
         """
         from qdrant_client.http import models
 
-        scroll_filter = models.Filter(
-            must=[
-                models.FieldCondition(
-                    key="page_content",
-                    match=models.MatchText(text=query),
-                )
-            ]
-        )
+        scroll_filter = None
         document_ids_filter = kwargs.get("document_ids_filter")
         if document_ids_filter:
-            if scroll_filter.must:
-                scroll_filter.must.append(
+            scroll_filter = models.Filter(
+                must=[
                     models.FieldCondition(
                         key="metadata.document_id",
                         match=models.MatchAny(any=document_ids_filter),
                     )
-                )
+                ]
+            )
         response = self._client.scroll(
             collection_name=self._collection_name,
             scroll_filter=scroll_filter,
diff --git a/api/core/rag/datasource/vdb/tidb_vector/tidb_vector.py b/api/core/rag/datasource/vdb/tidb_vector/tidb_vector.py
index 77c5786042..efa68059e5 100644
--- a/api/core/rag/datasource/vdb/tidb_vector/tidb_vector.py
+++ b/api/core/rag/datasource/vdb/tidb_vector/tidb_vector.py
@@ -105,10 +105,12 @@ class TiDBVector(BaseVector):
                         text TEXT NOT NULL,
                         meta JSON NOT NULL,
                         doc_id VARCHAR(64) AS (JSON_UNQUOTE(JSON_EXTRACT(meta, '$.doc_id'))) STORED,
+                        document_id VARCHAR(64) AS (JSON_UNQUOTE(JSON_EXTRACT(meta, '$.document_id'))) STORED,
                         vector VECTOR<FLOAT>({dimension}) NOT NULL,
                         create_time DATETIME DEFAULT CURRENT_TIMESTAMP,
                         update_time DATETIME DEFAULT CURRENT_TIMESTAMP ON UPDATE CURRENT_TIMESTAMP,
                         KEY (doc_id),
+                        KEY (document_id),
                         VECTOR INDEX idx_vector (({tidb_dist_func}(vector))) USING HNSW
                     );
                 """)

From daad5824bf6c23aec1c2169b3866c647af96393a Mon Sep 17 00:00:00 2001
From: wyy-holding <59436937+wyy-holding@users.noreply.github.com>
Date: Thu, 20 Mar 2025 09:28:09 +0800
Subject: [PATCH 2/7] add kubernetes yaml for dify by docker-compose.yaml
 (#16246)

---
 README.md    | 1 +
 README_AR.md | 1 +
 README_BN.md | 1 +
 README_CN.md | 1 +
 README_DE.md | 1 +
 README_ES.md | 1 +
 README_FR.md | 1 +
 README_JA.md | 1 +
 README_KL.md | 1 +
 README_KR.md | 1 +
 README_PT.md | 1 +
 README_SI.md | 1 +
 README_TR.md | 1 +
 README_TW.md | 1 +
 README_VI.md | 1 +
 15 files changed, 15 insertions(+)

diff --git a/README.md b/README.md
index c97bf9cf3a..87ebc9bafc 100644
--- a/README.md
+++ b/README.md
@@ -206,6 +206,7 @@ If you'd like to configure a highly-available setup, there are community-contrib
 - [Helm Chart by @BorisPolonsky](https://github.com/BorisPolonsky/dify-helm)
 - [Helm Chart by @magicsong](https://github.com/magicsong/ai-charts)
 - [YAML file by @Winson-030](https://github.com/Winson-030/dify-kubernetes)
+- [YAML file by @wyy-holding](https://github.com/wyy-holding/dify-k8s)
 
 #### Using Terraform for Deployment
 
diff --git a/README_AR.md b/README_AR.md
index 7f167a5a2d..e58f59da5d 100644
--- a/README_AR.md
+++ b/README_AR.md
@@ -189,6 +189,7 @@ docker compose up -d
 - [رسم بياني Helm من قبل @BorisPolonsky](https://github.com/BorisPolonsky/dify-helm)
 - [رسم بياني Helm من قبل @magicsong](https://github.com/magicsong/ai-charts)
 - [ملف YAML من قبل @Winson-030](https://github.com/Winson-030/dify-kubernetes)
+- [ملف YAML من قبل @wyy-holding](https://github.com/wyy-holding/dify-k8s)
 
 #### استخدام Terraform للتوزيع
 
diff --git a/README_BN.md b/README_BN.md
index 1329ddf1ed..3ebc81af5d 100644
--- a/README_BN.md
+++ b/README_BN.md
@@ -205,6 +205,7 @@ GitHub-এ ডিফাইকে স্টার দিয়ে রাখুন 
 - [Helm Chart by @BorisPolonsky](https://github.com/BorisPolonsky/dify-helm)
 - [Helm Chart by @magicsong](https://github.com/magicsong/ai-charts)
 - [YAML file by @Winson-030](https://github.com/Winson-030/dify-kubernetes)
+- [YAML file by @wyy-holding](https://github.com/wyy-holding/dify-k8s)
 
 #### টেরাফর্ম ব্যবহার করে ডিপ্লয়
 
diff --git a/README_CN.md b/README_CN.md
index d4fa930c2f..33e34423ff 100644
--- a/README_CN.md
+++ b/README_CN.md
@@ -207,6 +207,7 @@ docker compose up -d
 - [Helm Chart by @BorisPolonsky](https://github.com/BorisPolonsky/dify-helm)
 - [Helm Chart by @magicsong](https://github.com/magicsong/ai-charts)
 - [YAML 文件 by @Winson-030](https://github.com/Winson-030/dify-kubernetes)
+- [YAML file by @wyy-holding](https://github.com/wyy-holding/dify-k8s)
 
 #### 使用 Terraform 部署
 
diff --git a/README_DE.md b/README_DE.md
index d260d17136..b3b9bf3221 100644
--- a/README_DE.md
+++ b/README_DE.md
@@ -207,6 +207,7 @@ Falls Sie eine hochverfügbare Konfiguration einrichten möchten, gibt es von de
 - [Helm Chart by @BorisPolonsky](https://github.com/BorisPolonsky/dify-helm)
 - [Helm Chart by @magicsong](https://github.com/magicsong/ai-charts)
 - [YAML file by @Winson-030](https://github.com/Winson-030/dify-kubernetes)
+- [YAML file by @wyy-holding](https://github.com/wyy-holding/dify-k8s)
 
 #### Terraform für die Bereitstellung verwenden
 
diff --git a/README_ES.md b/README_ES.md
index 1547ce8271..d14afdd2eb 100644
--- a/README_ES.md
+++ b/README_ES.md
@@ -207,6 +207,7 @@ Si desea configurar una configuración de alta disponibilidad, la comunidad prop
 - [Gráfico Helm por @BorisPolonsky](https://github.com/BorisPolonsky/dify-helm)
 - [Gráfico Helm por @magicsong](https://github.com/magicsong/ai-charts)
 - [Ficheros YAML por @Winson-030](https://github.com/Winson-030/dify-kubernetes)
+- [Ficheros YAML por @wyy-holding](https://github.com/wyy-holding/dify-k8s)
 
 #### Uso de Terraform para el despliegue
 
diff --git a/README_FR.md b/README_FR.md
index 5ff9fe2ded..031196303e 100644
--- a/README_FR.md
+++ b/README_FR.md
@@ -205,6 +205,7 @@ Si vous souhaitez configurer une configuration haute disponibilité, la communau
 - [Helm Chart par @BorisPolonsky](https://github.com/BorisPolonsky/dify-helm)
 - [Helm Chart par @magicsong](https://github.com/magicsong/ai-charts)
 - [Fichier YAML par @Winson-030](https://github.com/Winson-030/dify-kubernetes)
+- [Fichier YAML par @wyy-holding](https://github.com/wyy-holding/dify-k8s)
 
 #### Utilisation de Terraform pour le déploiement
 
diff --git a/README_JA.md b/README_JA.md
index 6575c5f113..3b7a6f50db 100644
--- a/README_JA.md
+++ b/README_JA.md
@@ -206,6 +206,7 @@ docker compose up -d
 - [Helm Chart by @BorisPolonsky](https://github.com/BorisPolonsky/dify-helm)
 - [Helm Chart by @magicsong](https://github.com/magicsong/ai-charts)
 - [YAML file by @Winson-030](https://github.com/Winson-030/dify-kubernetes)
+- [YAML file by @wyy-holding](https://github.com/wyy-holding/dify-k8s)
 
 #### Terraformを使用したデプロイ
 
diff --git a/README_KL.md b/README_KL.md
index 2ad3744f15..ccadb77274 100644
--- a/README_KL.md
+++ b/README_KL.md
@@ -205,6 +205,7 @@ If you'd like to configure a highly-available setup, there are community-contrib
 - [Helm Chart by @BorisPolonsky](https://github.com/BorisPolonsky/dify-helm)
 - [Helm Chart by @magicsong](https://github.com/magicsong/ai-charts)
 - [YAML file by @Winson-030](https://github.com/Winson-030/dify-kubernetes)
+- [YAML file by @wyy-holding](https://github.com/wyy-holding/dify-k8s)
 
 #### Terraform atorlugu pilersitsineq
 
diff --git a/README_KR.md b/README_KR.md
index 3f9ea20099..c1a98f8b68 100644
--- a/README_KR.md
+++ b/README_KR.md
@@ -199,6 +199,7 @@ Dify를 Kubernetes에 배포하고 프리미엄 스케일링 설정을 구성했
 - [Helm Chart by @BorisPolonsky](https://github.com/BorisPolonsky/dify-helm)
 - [Helm Chart by @magicsong](https://github.com/magicsong/ai-charts)
 - [YAML file by @Winson-030](https://github.com/Winson-030/dify-kubernetes)
+- [YAML file by @wyy-holding](https://github.com/wyy-holding/dify-k8s)
 
 #### Terraform을 사용한 배포
 
diff --git a/README_PT.md b/README_PT.md
index 90b508c8f6..5b3c782645 100644
--- a/README_PT.md
+++ b/README_PT.md
@@ -205,6 +205,7 @@ Se deseja configurar uma instalação de alta disponibilidade, há [Helm Charts]
 - [Helm Chart de @BorisPolonsky](https://github.com/BorisPolonsky/dify-helm)
 - [Helm Chart de @magicsong](https://github.com/magicsong/ai-charts)
 - [Arquivo YAML por @Winson-030](https://github.com/Winson-030/dify-kubernetes)
+- [Arquivo YAML por @wyy-holding](https://github.com/wyy-holding/dify-k8s)
 
 #### Usando o Terraform para Implantação
 
diff --git a/README_SI.md b/README_SI.md
index 5b7c9611f9..7c0867c776 100644
--- a/README_SI.md
+++ b/README_SI.md
@@ -205,6 +205,7 @@ Star Dify on GitHub and be instantly notified of new releases.
 - [Helm Chart by @LeoQuote](https://github.com/douban/charts/tree/master/charts/dify)
 - [Helm Chart by @BorisPolonsky](https://github.com/BorisPolonsky/dify-helm)
 - [YAML file by @Winson-030](https://github.com/Winson-030/dify-kubernetes)
+- [YAML file by @wyy-holding](https://github.com/wyy-holding/dify-k8s)
 
 #### Uporaba Terraform za uvajanje
 
diff --git a/README_TR.md b/README_TR.md
index 7af8582b7d..f8890b00ef 100644
--- a/README_TR.md
+++ b/README_TR.md
@@ -198,6 +198,7 @@ Yüksek kullanılabilirliğe sahip bir kurulum yapılandırmak isterseniz, Dify'
 - [@LeoQuote tarafından Helm Chart](https://github.com/douban/charts/tree/master/charts/dify)
 - [@BorisPolonsky tarafından Helm Chart](https://github.com/BorisPolonsky/dify-helm)
 - [@Winson-030 tarafından YAML dosyası](https://github.com/Winson-030/dify-kubernetes)
+- [@wyy-holding tarafından YAML dosyası](https://github.com/wyy-holding/dify-k8s)
 
 #### Dağıtım için Terraform Kullanımı
 
diff --git a/README_TW.md b/README_TW.md
index 4bfc81a25e..260f1e80ac 100644
--- a/README_TW.md
+++ b/README_TW.md
@@ -204,6 +204,7 @@ Dify 的所有功能都提供相應的 API，因此您可以輕鬆地將 Dify 
 - [由 @LeoQuote 提供的 Helm Chart](https://github.com/douban/charts/tree/master/charts/dify)
 - [由 @BorisPolonsky 提供的 Helm Chart](https://github.com/BorisPolonsky/dify-helm)
 - [由 @Winson-030 提供的 YAML 文件](https://github.com/Winson-030/dify-kubernetes)
+- [由 @wyy-holding 提供的 YAML 文件](https://github.com/wyy-holding/dify-k8s)
 
 ### 使用 Terraform 進行部署
 
diff --git a/README_VI.md b/README_VI.md
index 2f64541285..15d2d5ae80 100644
--- a/README_VI.md
+++ b/README_VI.md
@@ -200,6 +200,7 @@ Nếu bạn muốn cấu hình một cài đặt có độ sẵn sàng cao, có
 - [Helm Chart bởi @LeoQuote](https://github.com/douban/charts/tree/master/charts/dify)
 - [Helm Chart bởi @BorisPolonsky](https://github.com/BorisPolonsky/dify-helm)
 - [Tệp YAML bởi @Winson-030](https://github.com/Winson-030/dify-kubernetes)
+- [Tệp YAML bởi @wyy-holding](https://github.com/wyy-holding/dify-k8s)
 
 #### Sử dụng Terraform để Triển khai
 

From 285314da1c35edcf08aa97e73101457a24f6bce6 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=E3=83=AA=E3=82=A4=E3=83=8E=20Lin?= <sorphwer@gmail.com>
Date: Thu, 20 Mar 2025 09:28:42 +0800
Subject: [PATCH 3/7] fix: update workflow doc (#16251)

---
 .../develop/template/template_workflow.en.mdx | 37 ++++++++++---------
 .../develop/template/template_workflow.ja.mdx | 36 ++++++++++--------
 .../develop/template/template_workflow.zh.mdx | 36 +++++++++---------
 3 files changed, 60 insertions(+), 49 deletions(-)

diff --git a/web/app/components/develop/template/template_workflow.en.mdx b/web/app/components/develop/template/template_workflow.en.mdx
index 27c0d26505..c8b4b614c8 100644
--- a/web/app/components/develop/template/template_workflow.en.mdx
+++ b/web/app/components/develop/template/template_workflow.en.mdx
@@ -43,18 +43,9 @@ Workflow applications offers non-session support and is ideal for translation, a
       - `inputs` (object) Required
         Allows the entry of various variable values defined by the App.
         The `inputs` parameter contains multiple key/value pairs, with each key corresponding to a specific variable and each value being the specific value for that variable.
-        The workflow application requires at least one key/value pair to be inputted.
-        If the variable is of File type, specify an object that has the keys described in `files` below.
-      - `response_mode` (string) Required
-        The mode of response return, supporting:
-        - `streaming` Streaming mode (recommended), implements a typewriter-like output through SSE ([Server-Sent Events](https://developer.mozilla.org/en-US/docs/Web/API/Server-sent_events/Using_server-sent_events)).
-        - `blocking` Blocking mode, returns result after execution is complete. (Requests may be interrupted if the process is long)
-        <i>Due to Cloudflare restrictions, the request will be interrupted without a return after 100 seconds.</i>
-      - `user` (string) Required
-        User identifier, used to define the identity of the end-user for retrieval and statistics.
-        Should be uniquely defined by the developer within the application.
-      - `files` (array[object]) Optional
-        File list, suitable for inputting files combined with text understanding and answering questions, available only when the model supports file parsing and understanding capability.
+        The workflow application requires at least one key/value pair to be inputted. The variable can be of File Array type.
+        File Array type variable is suitable for inputting files combined with text understanding and answering questions, available only when the model supports file parsing and understanding capability.
+        If the variable is of File Array type, the corresponding value should be a list whose elements contain following attributions: 
           - `type` (string) Supported type: 
             - `document` ('TXT', 'MD', 'MARKDOWN', 'PDF', 'HTML', 'XLSX', 'XLS', 'DOCX', 'CSV', 'EML', 'MSG', 'PPTX', 'PPT', 'XML', 'EPUB')
             - `image` ('JPG', 'JPEG', 'PNG', 'GIF', 'WEBP', 'SVG')
@@ -65,6 +56,15 @@ Workflow applications offers non-session support and is ideal for translation, a
           - `url` (string) Image URL (when the transfer method is `remote_url`)
           - `upload_file_id` (string) Uploaded file ID, which must be obtained by uploading through the File Upload API in advance (when the transfer method is `local_file`)
 
+      - `response_mode` (string) Required
+        The mode of response return, supporting:
+        - `streaming` Streaming mode (recommended), implements a typewriter-like output through SSE ([Server-Sent Events](https://developer.mozilla.org/en-US/docs/Web/API/Server-sent_events/Using_server-sent_events)).
+        - `blocking` Blocking mode, returns result after execution is complete. (Requests may be interrupted if the process is long)
+        <i>Due to Cloudflare restrictions, the request will be interrupted without a return after 100 seconds.</i>
+      - `user` (string) Required
+        User identifier, used to define the identity of the end-user for retrieval and statistics.
+        Should be uniquely defined by the developer within the application.
+
     ### Response
     When `response_mode` is `blocking`, return a CompletionResponse object.
     When `response_mode` is `streaming`, return a ChunkCompletionResponse stream.
@@ -190,15 +190,18 @@ Workflow applications offers non-session support and is ideal for translation, a
     ```
 
     </CodeGroup>
-    <CodeGroup title="File variable example">
+    <CodeGroup title="Example: file array as an input variable">
       ```json {{ title: 'File variable example' }}
       {
         "inputs": {
-          "{variable_name}": {
+          "{variable_name}": 
+          [
+            {
             "transfer_method": "local_file",
             "upload_file_id": "{upload_file_id}",
             "type": "{document_type}"
-          }
+            }
+          ]
         }
       }
       ```
@@ -279,11 +282,11 @@ Workflow applications offers non-session support and is ideal for translation, a
 
           data = {
               "inputs": {
-                  "orig_mail": {
+                  "orig_mail": [{
                       "transfer_method": "local_file",
                       "upload_file_id": file_id,
                       "type": "document"
-                  }
+                  }]
               },
               "response_mode": response_mode,
               "user": user
diff --git a/web/app/components/develop/template/template_workflow.ja.mdx b/web/app/components/develop/template/template_workflow.ja.mdx
index 9e66973db7..3bd56807ae 100644
--- a/web/app/components/develop/template/template_workflow.ja.mdx
+++ b/web/app/components/develop/template/template_workflow.ja.mdx
@@ -43,18 +43,10 @@ import { Row, Col, Properties, Property, Heading, SubProperty, Paragraph } from
       - `inputs` (object) 必須
         アプリで定義されたさまざまな変数値の入力を許可します。
         `inputs`パラメータには複数のキー/値ペアが含まれ、各キーは特定の変数に対応し、各値はその変数の特定の値です。
-        ワークフローアプリケーションは少なくとも1つのキー/値ペアの入力を必要とします。
-        変数がファイルタイプの場合、以下の`files`で説明されているキーを持つオブジェクトを指定してください。
-      - `response_mode` (string) 必須
-        応答の返却モードを指定します。サポートされているモード：
-        - `streaming` ストリーミングモード（推奨）、SSE（[Server-Sent Events](https://developer.mozilla.org/en-US/docs/Web/API/Server-sent_events/Using_server-sent_events)）を通じてタイプライターのような出力を実装します。
-        - `blocking` ブロッキングモード、実行完了後に結果を返します。（プロセスが長い場合、リクエストが中断される可能性があります）
-        <i>Cloudflareの制限により、100秒後に応答がない場合、リクエストは中断されます。</i>
-      - `user` (string) 必須
-        ユーザー識別子、エンドユーザーのアイデンティティを定義するために使用されます。
-        アプリケーション内で開発者によって一意に定義される必要があります。
-      - `files` (array[object]) オプション
+        ワークフローアプリケーションは少なくとも1つのキー/値ペアの入力を必要とします。値はファイルリストである場合もあります。
         ファイルリストは、テキスト理解と質問への回答を組み合わせたファイルの入力に適しています。モデルがファイルの解析と理解機能をサポートしている場合にのみ使用できます。
+
+        変数がファイルリストの場合、リストの各要素は以下の属性を持つ必要があります。
           - `type` (string) サポートされているタイプ: 
             - `document` ('TXT', 'MD', 'MARKDOWN', 'PDF', 'HTML', 'XLSX', 'XLS', 'DOCX', 'CSV', 'EML', 'MSG', 'PPTX', 'PPT', 'XML', 'EPUB')
             - `image` ('JPG', 'JPEG', 'PNG', 'GIF', 'WEBP', 'SVG')
@@ -65,6 +57,17 @@ import { Row, Col, Properties, Property, Heading, SubProperty, Paragraph } from
           - `url` (string) 画像URL（転送方法が`remote_url`の場合）
           - `upload_file_id` (string) アップロードされたファイルID、事前にファイルアップロードAPIを通じて取得する必要があります（転送方法が`local_file`の場合）
 
+      - `response_mode` (string) 必須
+        応答の返却モードを指定します。サポートされているモード：
+        - `streaming` ストリーミングモード（推奨）、SSE（[Server-Sent Events](https://developer.mozilla.org/en-US/docs/Web/API/Server-sent_events/Using_server-sent_events)）を通じてタイプライターのような出力を実装します。
+        - `blocking` ブロッキングモード、実行完了後に結果を返します。（プロセスが長い場合、リクエストが中断される可能性があります）
+        <i>Cloudflareの制限により、100秒後に応答がない場合、リクエストは中断されます。</i>
+      - `user` (string) 必須
+        ユーザー識別子、エンドユーザーのアイデンティティを定義するために使用されます。
+        アプリケーション内で開発者によって一意に定義される必要があります。
+      - `files` (array[object]) オプション
+        
+
     ### 応答
     `response_mode`が`blocking`の場合、CompletionResponseオブジェクトを返します。
     `response_mode`が`streaming`の場合、ChunkCompletionResponseストリームを返します。
@@ -194,11 +197,14 @@ import { Row, Col, Properties, Property, Heading, SubProperty, Paragraph } from
       ```json {{ title: 'ファイル変数の例' }}
       {
         "inputs": {
-          "{variable_name}": {
+          "{variable_name}": 
+          [
+            {
             "transfer_method": "local_file",
             "upload_file_id": "{upload_file_id}",
             "type": "{document_type}"
-          }
+            }
+          ]
         }
       }
       ```
@@ -279,11 +285,11 @@ import { Row, Col, Properties, Property, Heading, SubProperty, Paragraph } from
 
           data = {
               "inputs": {
-                  "orig_mail": {
+                  "orig_mail": [{
                       "transfer_method": "local_file",
                       "upload_file_id": file_id,
                       "type": "document"
-                  }
+                  }]
               },
               "response_mode": response_mode,
               "user": user
diff --git a/web/app/components/develop/template/template_workflow.zh.mdx b/web/app/components/develop/template/template_workflow.zh.mdx
index 40dfb863a0..c687fa1a51 100644
--- a/web/app/components/develop/template/template_workflow.zh.mdx
+++ b/web/app/components/develop/template/template_workflow.zh.mdx
@@ -41,18 +41,8 @@ Workflow 应用无会话支持，适合用于翻译/文章写作/总结 AI 等
     ### Request Body
       - `inputs` (object) Required
         允许传入 App 定义的各变量值。
-        inputs 参数包含了多组键值对（Key/Value pairs），每组的键对应一个特定变量，每组的值则是该变量的具体值。
-        如果变量是文件类型，请指定一个包含以下 `files` 中所述键的对象。
-      - `response_mode` (string) Required
-        返回响应模式，支持：
-        - `streaming` 流式模式（推荐）。基于 SSE（**[Server-Sent Events](https://developer.mozilla.org/en-US/docs/Web/API/Server-sent_events/Using_server-sent_events)**）实现类似打字机输出方式的流式返回。
-        - `blocking` 阻塞模式，等待执行完毕后返回结果。（请求若流程较长可能会被中断）。
-        <i>由于 Cloudflare 限制，请求会在 100 秒超时无返回后中断。</i>
-      - `user` (string) Required
-        用户标识，用于定义终端用户的身份，方便检索、统计。
-        由开发者定义规则，需保证用户标识在应用内唯一。
-      - `files` (array[object]) Optional
-          文件列表，适用于传入文件结合文本理解并回答问题，仅当模型支持该类型文件解析能力时可用。
+        inputs 参数包含了多组键值对（Key/Value pairs），每组的键对应一个特定变量，每组的值则是该变量的具体值。变量可以是文件列表类型。
+        文件列表类型变量适用于传入文件结合文本理解并回答问题，仅当模型支持该类型文件解析能力时可用。如果该变量是文件列表类型，该变量对应的值应是列表格式，其中每个元素应包含以下内容：
           - `type` (string) 支持类型：
             - `document` 具体类型包含：'TXT', 'MD', 'MARKDOWN', 'PDF', 'HTML', 'XLSX', 'XLS', 'DOCX', 'CSV', 'EML', 'MSG', 'PPTX', 'PPT', 'XML', 'EPUB'
             - `image` 具体类型包含：'JPG', 'JPEG', 'PNG', 'GIF', 'WEBP', 'SVG'
@@ -62,6 +52,15 @@ Workflow 应用无会话支持，适合用于翻译/文章写作/总结 AI 等
           - `transfer_method` (string) 传递方式，`remote_url` 图片地址 / `local_file` 上传文件
           - `url` (string) 图片地址（仅当传递方式为 `remote_url` 时）
           - `upload_file_id` (string) (string) 上传文件 ID（仅当传递方式为 `local_file` 时）
+      - `response_mode` (string) Required
+        返回响应模式，支持：
+        - `streaming` 流式模式（推荐）。基于 SSE（**[Server-Sent Events](https://developer.mozilla.org/en-US/docs/Web/API/Server-sent_events/Using_server-sent_events)**）实现类似打字机输出方式的流式返回。
+        - `blocking` 阻塞模式，等待执行完毕后返回结果。（请求若流程较长可能会被中断）。
+        <i>由于 Cloudflare 限制，请求会在 100 秒超时无返回后中断。</i>
+      - `user` (string) Required
+        用户标识，用于定义终端用户的身份，方便检索、统计。
+        由开发者定义规则，需保证用户标识在应用内唯一。
+
 
     ### Response
     当 `response_mode` 为 `blocking` 时，返回 CompletionResponse object。
@@ -184,15 +183,18 @@ Workflow 应用无会话支持，适合用于翻译/文章写作/总结 AI 等
     }'
     ```
     </CodeGroup>
-    <CodeGroup title="File variable example">
+    <CodeGroup title="Example: file array as an input variable">
       ```json {{ title: 'File variable example' }}
       {
         "inputs": {
-          "{variable_name}": {
+          "{variable_name}": 
+          [
+            {
             "transfer_method": "local_file",
             "upload_file_id": "{upload_file_id}",
             "type": "{document_type}"
-          }
+            }
+          ]
         }
       }
       ```
@@ -273,11 +275,11 @@ Workflow 应用无会话支持，适合用于翻译/文章写作/总结 AI 等
 
           data = {
               "inputs": {
-                  "orig_mail": {
+                  "orig_mail": [{
                       "transfer_method": "local_file",
                       "upload_file_id": file_id,
                       "type": "document"
-                  }
+                  }]
               },
               "response_mode": response_mode,
               "user": user

From 79118f51c2522bf518becbee74b02e96eb09685f Mon Sep 17 00:00:00 2001
From: Ning <accelerator314@gmail.com>
Date: Thu, 20 Mar 2025 09:38:46 +0800
Subject: [PATCH 4/7] fix: dify-web docker MAX_TOOLS_NUM environment value not
 work (#16241)

---
 web/docker/entrypoint.sh | 1 +
 1 file changed, 1 insertion(+)

diff --git a/web/docker/entrypoint.sh b/web/docker/entrypoint.sh
index d0ee56b889..797b61081a 100755
--- a/web/docker/entrypoint.sh
+++ b/web/docker/entrypoint.sh
@@ -27,5 +27,6 @@ export NEXT_PUBLIC_TEXT_GENERATION_TIMEOUT_MS=${TEXT_GENERATION_TIMEOUT_MS}
 export NEXT_PUBLIC_CSP_WHITELIST=${CSP_WHITELIST}
 export NEXT_PUBLIC_TOP_K_MAX_VALUE=${TOP_K_MAX_VALUE}
 export NEXT_PUBLIC_INDEXING_MAX_SEGMENTATION_TOKENS_LENGTH=${INDEXING_MAX_SEGMENTATION_TOKENS_LENGTH}
+export NEXT_PUBLIC_MAX_TOOLS_NUM=${MAX_TOOLS_NUM}
 
 pm2 start /app/web/server.js --name dify-web --cwd /app/web -i ${PM2_INSTANCES} --no-daemon

From c1f3d968bfa36f44ccf23860fd7f8d80f2423407 Mon Sep 17 00:00:00 2001
From: GuanMu <ballmanjq@gmail.com>
Date: Thu, 20 Mar 2025 10:55:37 +0800
Subject: [PATCH 5/7] fix: enhance React imports in LLM panel component #16282
 (#16283)

---
 web/app/components/workflow/nodes/llm/panel.tsx | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/web/app/components/workflow/nodes/llm/panel.tsx b/web/app/components/workflow/nodes/llm/panel.tsx
index c655188536..e1264ad89e 100644
--- a/web/app/components/workflow/nodes/llm/panel.tsx
+++ b/web/app/components/workflow/nodes/llm/panel.tsx
@@ -1,5 +1,5 @@
 import type { FC } from 'react'
-import React from 'react'
+import React, { useCallback } from 'react'
 import { useTranslation } from 'react-i18next'
 import MemoryConfig from '../_base/components/memory-config'
 import VarReferencePicker from '../_base/components/variable/var-reference-picker'

From 2c9af712a2936df8b020c3825d4e85be09801c4f Mon Sep 17 00:00:00 2001
From: Jyong <76649700+JohnJyong@users.noreply.github.com>
Date: Thu, 20 Mar 2025 14:33:32 +0800
Subject: [PATCH 6/7] Fix/create document by api with metadata (#16307)

Co-authored-by: zxhlyh <jasonapring2015@outlook.com>
---
 api/commands.py                               |  79 ++++++-
 .../service_api/dataset/document.py           |  96 --------
 api/extensions/ext_commands.py                |   6 +-
 api/services/dataset_service.py               |  15 --
 .../knowledge_entities/knowledge_entities.py  |   1 -
 .../datasets/template/template.en.mdx         | 200 -----------------
 .../datasets/template/template.zh.mdx         | 205 ------------------
 7 files changed, 75 insertions(+), 527 deletions(-)

diff --git a/api/commands.py b/api/commands.py
index df67f29aff..94e7e74e36 100644
--- a/api/commands.py
+++ b/api/commands.py
@@ -20,7 +20,7 @@ from libs.helper import email as email_validate
 from libs.password import hash_password, password_pattern, valid_password
 from libs.rsa import generate_key_pair
 from models import Tenant
-from models.dataset import Dataset, DatasetCollectionBinding, DocumentSegment
+from models.dataset import Dataset, DatasetCollectionBinding, DatasetMetadata, DatasetMetadataBinding, DocumentSegment
 from models.dataset import Document as DatasetDocument
 from models.model import Account, App, AppAnnotationSetting, AppMode, Conversation, MessageAnnotation
 from models.provider import Provider, ProviderModel
@@ -483,14 +483,11 @@ def convert_to_agent_apps():
     click.echo(click.style("Conversion complete. Converted {} agent apps.".format(len(proceeded_app_ids)), fg="green"))
 
 
-@click.command("add-qdrant-doc-id-index", help="Add Qdrant doc_id index.")
+@click.command("add-qdrant-index", help="Add Qdrant index.")
 @click.option("--field", default="metadata.doc_id", prompt=False, help="Index field , default is metadata.doc_id.")
-def add_qdrant_doc_id_index(field: str):
-    click.echo(click.style("Starting Qdrant doc_id index creation.", fg="green"))
-    vector_type = dify_config.VECTOR_STORE
-    if vector_type != "qdrant":
-        click.echo(click.style("This command only supports Qdrant vector store.", fg="red"))
-        return
+def add_qdrant_index(field: str):
+    click.echo(click.style("Starting Qdrant index creation.", fg="green"))
+
     create_count = 0
 
     try:
@@ -539,6 +536,72 @@ def add_qdrant_doc_id_index(field: str):
     click.echo(click.style(f"Index creation complete. Created {create_count} collection indexes.", fg="green"))
 
 
+@click.command("old-metadata-migration", help="Old metadata migration.")
+def old_metadata_migration():
+    """
+    Old metadata migration.
+    """
+    click.echo(click.style("Starting old metadata migration.", fg="green"))
+
+    page = 1
+    while True:
+        try:
+            documents = (
+                DatasetDocument.query.filter(DatasetDocument.doc_metadata is not None)
+                .order_by(DatasetDocument.created_at.desc())
+                .paginate(page=page, per_page=50)
+            )
+        except NotFound:
+            break
+        if not documents:
+            break
+        for document in documents:
+            if document.doc_metadata:
+                doc_metadata = document.doc_metadata
+                for key, value in doc_metadata.items():
+                    dataset_metadata = (
+                        db.session.query(DatasetMetadata)
+                        .filter(DatasetMetadata.dataset_id == document.dataset_id, DatasetMetadata.name == key)
+                        .first()
+                    )
+                    if not dataset_metadata:
+                        dataset_metadata = DatasetMetadata(
+                            tenant_id=document.tenant_id,
+                            dataset_id=document.dataset_id,
+                            name=key,
+                            type="string",
+                            created_by=document.created_by,
+                        )
+                        db.session.add(dataset_metadata)
+                        db.session.flush()
+                        dataset_metadata_binding = DatasetMetadataBinding(
+                            tenant_id=document.tenant_id,
+                            dataset_id=document.dataset_id,
+                            metadata_id=dataset_metadata.id,
+                            document_id=document.id,
+                            created_by=document.created_by,
+                        )
+                        db.session.add(dataset_metadata_binding)
+                    else:
+                        dataset_metadata_binding = DatasetMetadataBinding.query.filter(
+                            DatasetMetadataBinding.dataset_id == document.dataset_id,
+                            DatasetMetadataBinding.document_id == document.id,
+                            DatasetMetadataBinding.metadata_id == dataset_metadata.id,
+                        ).first()
+                        if not dataset_metadata_binding:
+                            dataset_metadata_binding = DatasetMetadataBinding(
+                                tenant_id=document.tenant_id,
+                                dataset_id=document.dataset_id,
+                                metadata_id=dataset_metadata.id,
+                                document_id=document.id,
+                                created_by=document.created_by,
+                            )
+                            db.session.add(dataset_metadata_binding)
+                db.session.commit()
+        page += 1
+    click.echo(click.style("Old metadata migration completed.", fg="green"))
+
+
 @click.command("create-tenant", help="Create account and tenant.")
 @click.option("--email", prompt=True, help="Tenant account email.")
 @click.option("--name", prompt=True, help="Workspace name.")
diff --git a/api/controllers/service_api/dataset/document.py b/api/controllers/service_api/dataset/document.py
index d4e67b6596..995444ee48 100644
--- a/api/controllers/service_api/dataset/document.py
+++ b/api/controllers/service_api/dataset/document.py
@@ -18,7 +18,6 @@ from controllers.service_api.app.error import (
 from controllers.service_api.dataset.error import (
     ArchivedDocumentImmutableError,
     DocumentIndexingError,
-    InvalidMetadataError,
 )
 from controllers.service_api.wraps import DatasetApiResource, cloud_edition_billing_resource_check
 from core.errors.error import ProviderTokenNotInitError
@@ -51,8 +50,6 @@ class DocumentAddByTextApi(DatasetApiResource):
             "indexing_technique", type=str, choices=Dataset.INDEXING_TECHNIQUE_LIST, nullable=False, location="json"
         )
         parser.add_argument("retrieval_model", type=dict, required=False, nullable=False, location="json")
-        parser.add_argument("doc_type", type=str, required=False, nullable=True, location="json")
-        parser.add_argument("doc_metadata", type=dict, required=False, nullable=True, location="json")
 
         args = parser.parse_args()
         dataset_id = str(dataset_id)
@@ -65,28 +62,6 @@ class DocumentAddByTextApi(DatasetApiResource):
         if not dataset.indexing_technique and not args["indexing_technique"]:
             raise ValueError("indexing_technique is required.")
 
-        # Validate metadata if provided
-        if args.get("doc_type") or args.get("doc_metadata"):
-            if not args.get("doc_type") or not args.get("doc_metadata"):
-                raise InvalidMetadataError("Both doc_type and doc_metadata must be provided when adding metadata")
-
-            if args["doc_type"] not in DocumentService.DOCUMENT_METADATA_SCHEMA:
-                raise InvalidMetadataError(
-                    "Invalid doc_type. Must be one of: " + ", ".join(DocumentService.DOCUMENT_METADATA_SCHEMA.keys())
-                )
-
-            if not isinstance(args["doc_metadata"], dict):
-                raise InvalidMetadataError("doc_metadata must be a dictionary")
-
-            # Validate metadata schema based on doc_type
-            if args["doc_type"] != "others":
-                metadata_schema = DocumentService.DOCUMENT_METADATA_SCHEMA[args["doc_type"]]
-                for key, value in args["doc_metadata"].items():
-                    if key in metadata_schema and not isinstance(value, metadata_schema[key]):
-                        raise InvalidMetadataError(f"Invalid type for metadata field {key}")
-            # set to MetaDataConfig
-            args["metadata"] = {"doc_type": args["doc_type"], "doc_metadata": args["doc_metadata"]}
-
         text = args.get("text")
         name = args.get("name")
         if text is None or name is None:
@@ -133,8 +108,6 @@ class DocumentUpdateByTextApi(DatasetApiResource):
             "doc_language", type=str, default="English", required=False, nullable=False, location="json"
         )
         parser.add_argument("retrieval_model", type=dict, required=False, nullable=False, location="json")
-        parser.add_argument("doc_type", type=str, required=False, nullable=True, location="json")
-        parser.add_argument("doc_metadata", type=dict, required=False, nullable=True, location="json")
         args = parser.parse_args()
         dataset_id = str(dataset_id)
         tenant_id = str(tenant_id)
@@ -146,29 +119,6 @@ class DocumentUpdateByTextApi(DatasetApiResource):
         # indexing_technique is already set in dataset since this is an update
         args["indexing_technique"] = dataset.indexing_technique
 
-        # Validate metadata if provided
-        if args.get("doc_type") or args.get("doc_metadata"):
-            if not args.get("doc_type") or not args.get("doc_metadata"):
-                raise InvalidMetadataError("Both doc_type and doc_metadata must be provided when adding metadata")
-
-            if args["doc_type"] not in DocumentService.DOCUMENT_METADATA_SCHEMA:
-                raise InvalidMetadataError(
-                    "Invalid doc_type. Must be one of: " + ", ".join(DocumentService.DOCUMENT_METADATA_SCHEMA.keys())
-                )
-
-            if not isinstance(args["doc_metadata"], dict):
-                raise InvalidMetadataError("doc_metadata must be a dictionary")
-
-            # Validate metadata schema based on doc_type
-            if args["doc_type"] != "others":
-                metadata_schema = DocumentService.DOCUMENT_METADATA_SCHEMA[args["doc_type"]]
-                for key, value in args["doc_metadata"].items():
-                    if key in metadata_schema and not isinstance(value, metadata_schema[key]):
-                        raise InvalidMetadataError(f"Invalid type for metadata field {key}")
-
-            # set to MetaDataConfig
-            args["metadata"] = {"doc_type": args["doc_type"], "doc_metadata": args["doc_metadata"]}
-
         if args["text"]:
             text = args.get("text")
             name = args.get("name")
@@ -216,29 +166,6 @@ class DocumentAddByFileApi(DatasetApiResource):
         if "doc_language" not in args:
             args["doc_language"] = "English"
 
-        # Validate metadata if provided
-        if args.get("doc_type") or args.get("doc_metadata"):
-            if not args.get("doc_type") or not args.get("doc_metadata"):
-                raise InvalidMetadataError("Both doc_type and doc_metadata must be provided when adding metadata")
-
-            if args["doc_type"] not in DocumentService.DOCUMENT_METADATA_SCHEMA:
-                raise InvalidMetadataError(
-                    "Invalid doc_type. Must be one of: " + ", ".join(DocumentService.DOCUMENT_METADATA_SCHEMA.keys())
-                )
-
-            if not isinstance(args["doc_metadata"], dict):
-                raise InvalidMetadataError("doc_metadata must be a dictionary")
-
-            # Validate metadata schema based on doc_type
-            if args["doc_type"] != "others":
-                metadata_schema = DocumentService.DOCUMENT_METADATA_SCHEMA[args["doc_type"]]
-                for key, value in args["doc_metadata"].items():
-                    if key in metadata_schema and not isinstance(value, metadata_schema[key]):
-                        raise InvalidMetadataError(f"Invalid type for metadata field {key}")
-
-            # set to MetaDataConfig
-            args["metadata"] = {"doc_type": args["doc_type"], "doc_metadata": args["doc_metadata"]}
-
         # get dataset info
         dataset_id = str(dataset_id)
         tenant_id = str(tenant_id)
@@ -306,29 +233,6 @@ class DocumentUpdateByFileApi(DatasetApiResource):
         if "doc_language" not in args:
             args["doc_language"] = "English"
 
-        # Validate metadata if provided
-        if args.get("doc_type") or args.get("doc_metadata"):
-            if not args.get("doc_type") or not args.get("doc_metadata"):
-                raise InvalidMetadataError("Both doc_type and doc_metadata must be provided when adding metadata")
-
-            if args["doc_type"] not in DocumentService.DOCUMENT_METADATA_SCHEMA:
-                raise InvalidMetadataError(
-                    "Invalid doc_type. Must be one of: " + ", ".join(DocumentService.DOCUMENT_METADATA_SCHEMA.keys())
-                )
-
-            if not isinstance(args["doc_metadata"], dict):
-                raise InvalidMetadataError("doc_metadata must be a dictionary")
-
-            # Validate metadata schema based on doc_type
-            if args["doc_type"] != "others":
-                metadata_schema = DocumentService.DOCUMENT_METADATA_SCHEMA[args["doc_type"]]
-                for key, value in args["doc_metadata"].items():
-                    if key in metadata_schema and not isinstance(value, metadata_schema[key]):
-                        raise InvalidMetadataError(f"Invalid type for metadata field {key}")
-
-            # set to MetaDataConfig
-            args["metadata"] = {"doc_type": args["doc_type"], "doc_metadata": args["doc_metadata"]}
-
         # get dataset info
         dataset_id = str(dataset_id)
         tenant_id = str(tenant_id)
diff --git a/api/extensions/ext_commands.py b/api/extensions/ext_commands.py
index 3f5ae539c5..92996f75e5 100644
--- a/api/extensions/ext_commands.py
+++ b/api/extensions/ext_commands.py
@@ -3,7 +3,7 @@ from dify_app import DifyApp
 
 def init_app(app: DifyApp):
     from commands import (
-        add_qdrant_doc_id_index,
+        add_qdrant_index,
         convert_to_agent_apps,
         create_tenant,
         extract_plugins,
@@ -11,6 +11,7 @@ def init_app(app: DifyApp):
         fix_app_site_missing,
         install_plugins,
         migrate_data_for_plugin,
+        old_metadata_migration,
         reset_email,
         reset_encrypt_key_pair,
         reset_password,
@@ -24,7 +25,7 @@ def init_app(app: DifyApp):
         reset_encrypt_key_pair,
         vdb_migrate,
         convert_to_agent_apps,
-        add_qdrant_doc_id_index,
+        add_qdrant_index,
         create_tenant,
         upgrade_db,
         fix_app_site_missing,
@@ -32,6 +33,7 @@ def init_app(app: DifyApp):
         extract_plugins,
         extract_unique_plugins,
         install_plugins,
+        old_metadata_migration,
     ]
     for cmd in cmds_to_register:
         app.cli.add_command(cmd)
diff --git a/api/services/dataset_service.py b/api/services/dataset_service.py
index 7ce4e4af22..d3654a3d48 100644
--- a/api/services/dataset_service.py
+++ b/api/services/dataset_service.py
@@ -46,7 +46,6 @@ from models.source import DataSourceOauthBinding
 from services.entities.knowledge_entities.knowledge_entities import (
     ChildChunkUpdateArgs,
     KnowledgeConfig,
-    MetaDataConfig,
     RerankingModel,
     RetrievalModel,
     SegmentUpdateArgs,
@@ -999,9 +998,6 @@ class DocumentService:
                                 document.data_source_info = json.dumps(data_source_info)
                                 document.batch = batch
                                 document.indexing_status = "waiting"
-                                if knowledge_config.metadata:
-                                    document.doc_type = knowledge_config.metadata.doc_type
-                                    document.metadata = knowledge_config.metadata.doc_metadata
                                 db.session.add(document)
                                 documents.append(document)
                                 duplicate_document_ids.append(document.id)
@@ -1018,7 +1014,6 @@ class DocumentService:
                             account,
                             file_name,
                             batch,
-                            knowledge_config.metadata,
                         )
                         db.session.add(document)
                         db.session.flush()
@@ -1076,7 +1071,6 @@ class DocumentService:
                                     account,
                                     truncated_page_name,
                                     batch,
-                                    knowledge_config.metadata,
                                 )
                                 db.session.add(document)
                                 db.session.flush()
@@ -1117,7 +1111,6 @@ class DocumentService:
                             account,
                             document_name,
                             batch,
-                            knowledge_config.metadata,
                         )
                         db.session.add(document)
                         db.session.flush()
@@ -1155,7 +1148,6 @@ class DocumentService:
         account: Account,
         name: str,
         batch: str,
-        metadata: Optional[MetaDataConfig] = None,
     ):
         document = Document(
             tenant_id=dataset.tenant_id,
@@ -1180,9 +1172,6 @@ class DocumentService:
                 BuiltInField.last_update_date: datetime.datetime.now(datetime.UTC).strftime("%Y-%m-%d %H:%M:%S"),
                 BuiltInField.source: data_source_type,
             }
-        if metadata is not None:
-            doc_metadata.update(metadata.doc_metadata)
-            document.doc_type = metadata.doc_type
         if doc_metadata:
             document.doc_metadata = doc_metadata
         return document
@@ -1297,10 +1286,6 @@ class DocumentService:
         # update document name
         if document_data.name:
             document.name = document_data.name
-        # update doc_type and doc_metadata if provided
-        if document_data.metadata is not None:
-            document.doc_metadata = document_data.metadata.doc_metadata
-            document.doc_type = document_data.metadata.doc_type
         # update document to be waiting
         document.indexing_status = "waiting"
         document.completed_at = None
diff --git a/api/services/entities/knowledge_entities/knowledge_entities.py b/api/services/entities/knowledge_entities/knowledge_entities.py
index 37c0fb49e5..51ce596e5c 100644
--- a/api/services/entities/knowledge_entities/knowledge_entities.py
+++ b/api/services/entities/knowledge_entities/knowledge_entities.py
@@ -128,7 +128,6 @@ class KnowledgeConfig(BaseModel):
     embedding_model: Optional[str] = None
     embedding_model_provider: Optional[str] = None
     name: Optional[str] = None
-    metadata: Optional[MetaDataConfig] = None
 
 
 class SegmentUpdateArgs(BaseModel):
diff --git a/web/app/(commonLayout)/datasets/template/template.en.mdx b/web/app/(commonLayout)/datasets/template/template.en.mdx
index 7d32f8cebe..a5f4c40ef6 100644
--- a/web/app/(commonLayout)/datasets/template/template.en.mdx
+++ b/web/app/(commonLayout)/datasets/template/template.en.mdx
@@ -47,44 +47,6 @@ import { Row, Col, Properties, Property, Heading, SubProperty, PropertyInstructi
       <Property name='text' type='string' key='text'>
         Document content
       </Property>
-      <Property name='doc_type' type='string' key='doc_type'>
-        Type of document (optional):
-          - <code>book</code> Book
-          - <code>web_page</code> Web page
-          - <code>paper</code> Academic paper/article 
-          - <code>social_media_post</code> Social media post
-          - <code>wikipedia_entry</code> Wikipedia entry
-          - <code>personal_document</code> Personal document
-          - <code>business_document</code> Business document
-          - <code>im_chat_log</code> Chat log
-          - <code>synced_from_notion</code> Notion document
-          - <code>synced_from_github</code> GitHub document
-          - <code>others</code> Other document types
-      </Property>
-      <Property name='doc_metadata' type='object' key='doc_metadata'>
-        Document metadata (required if doc_type is provided). Fields vary by doc_type:
-          For <code>book</code>:
-          - <code>title</code> Book title 
-          - <code>language</code> Book language
-          - <code>author</code> Book author
-          - <code>publisher</code> Publisher name
-          - <code>publication_date</code> Publication date
-          - <code>isbn</code> ISBN number
-          - <code>category</code> Book category
-
-          For <code>web_page</code>:
-          - <code>title</code> Page title
-          - <code>url</code> Page URL
-          - <code>language</code> Page language
-          - <code>publish_date</code> Publish date
-          - <code>author/publisher</code> Author or publisher
-          - <code>topic/keywords</code> Topic or keywords
-          - <code>description</code> Page description
-
-          Please check [api/services/dataset_service.py](https://github.com/langgenius/dify/blob/main/api/services/dataset_service.py#L475) for more details on the fields required for each doc_type.
-
-          For doc_type "others", any valid JSON object is accepted
-      </Property>
       <Property name='indexing_technique' type='string' key='indexing_technique'>
         Index mode
           - <code>high_quality</code> High quality: embedding using embedding model, built as vector database index
@@ -233,68 +195,6 @@ import { Row, Col, Properties, Property, Heading, SubProperty, PropertyInstructi
           - <code>hierarchical_model</code> Parent-child mode
           - <code>qa_model</code> Q&A Mode: Generates Q&A pairs for segmented documents and then embeds the questions
 
-        - <code>doc_type</code> Type of document (optional)
-          - <code>book</code> Book
-            Document records a book or publication
-          - <code>web_page</code> Web page 
-            Document records web page content
-          - <code>paper</code> Academic paper/article
-            Document records academic paper or research article
-          - <code>social_media_post</code> Social media post
-            Content from social media posts
-          - <code>wikipedia_entry</code> Wikipedia entry
-            Content from Wikipedia entries
-          - <code>personal_document</code> Personal document
-            Documents related to personal content
-          - <code>business_document</code> Business document
-            Documents related to business content
-          - <code>im_chat_log</code> Chat log
-            Records of instant messaging chats
-          - <code>synced_from_notion</code> Notion document
-            Documents synchronized from Notion
-          - <code>synced_from_github</code> GitHub document
-            Documents synchronized from GitHub
-          - <code>others</code> Other document types
-            Other document types not listed above
-
-        - <code>doc_metadata</code> Document metadata (required if doc_type is provided)
-          Fields vary by doc_type:
-
-          For <code>book</code>:
-          - <code>title</code> Book title
-            Title of the book
-          - <code>language</code> Book language
-            Language of the book
-          - <code>author</code> Book author
-            Author of the book
-          - <code>publisher</code> Publisher name
-            Name of the publishing house
-          - <code>publication_date</code> Publication date
-            Date when the book was published
-          - <code>isbn</code> ISBN number
-            International Standard Book Number
-          - <code>category</code> Book category
-            Category or genre of the book
-
-          For <code>web_page</code>:
-          - <code>title</code> Page title
-            Title of the web page
-          - <code>url</code> Page URL
-            URL address of the web page
-          - <code>language</code> Page language
-            Language of the web page
-          - <code>publish_date</code> Publish date
-            Date when the web page was published
-          - <code>author/publisher</code> Author or publisher
-            Author or publisher of the web page
-          - <code>topic/keywords</code> Topic or keywords
-            Topics or keywords of the web page
-          - <code>description</code> Page description
-            Description of the web page content
-
-          Please check [api/services/dataset_service.py](https://github.com/langgenius/dify/blob/main/api/services/dataset_service.py#L475) for more details on the fields required for each doc_type.
-          For doc_type "others", any valid JSON object is accepted
-
         - <code>doc_language</code> In Q&A mode, specify the language of the document, for example: <code>English</code>, <code>Chinese</code>
 
         - <code>process_rule</code> Processing rules
@@ -407,44 +307,6 @@ import { Row, Col, Properties, Property, Heading, SubProperty, PropertyInstructi
       <Property name='description' type='string' key='description'>
         Knowledge description (optional)
       </Property>
-      <Property name='doc_type' type='string' key='doc_type'>
-        Type of document (optional):
-          - <code>book</code> Book
-          - <code>web_page</code> Web page
-          - <code>paper</code> Academic paper/article 
-          - <code>social_media_post</code> Social media post
-          - <code>wikipedia_entry</code> Wikipedia entry
-          - <code>personal_document</code> Personal document
-          - <code>business_document</code> Business document
-          - <code>im_chat_log</code> Chat log
-          - <code>synced_from_notion</code> Notion document
-          - <code>synced_from_github</code> GitHub document
-          - <code>others</code> Other document types
-      </Property>
-      <Property name='doc_metadata' type='object' key='doc_metadata'>
-        Document metadata (required if doc_type is provided). Fields vary by doc_type:
-          For <code>book</code>:
-          - <code>title</code> Book title 
-          - <code>language</code> Book language
-          - <code>author</code> Book author
-          - <code>publisher</code> Publisher name
-          - <code>publication_date</code> Publication date
-          - <code>isbn</code> ISBN number
-          - <code>category</code> Book category
-
-          For <code>web_page</code>:
-          - <code>title</code> Page title
-          - <code>url</code> Page URL
-          - <code>language</code> Page language
-          - <code>publish_date</code> Publish date
-          - <code>author/publisher</code> Author or publisher
-          - <code>topic/keywords</code> Topic or keywords
-          - <code>description</code> Page description
-
-          Please check [api/services/dataset_service.py](https://github.com/langgenius/dify/blob/main/api/services/dataset_service.py#L475) for more details on the fields required for each doc_type.
-
-          For doc_type "others", any valid JSON object is accepted
-      </Property>
       <Property name='indexing_technique' type='string' key='indexing_technique'>
         Index technique (optional)
           - <code>high_quality</code> High quality
@@ -762,67 +624,6 @@ import { Row, Col, Properties, Property, Heading, SubProperty, PropertyInstructi
               - <code>separator</code> Segmentation identifier. Currently, only one delimiter is allowed. The default is <code>***</code>
               - <code>max_tokens</code> The maximum length (tokens) must be validated to be shorter than the length of the parent chunk
               - <code>chunk_overlap</code> Define the overlap between adjacent chunks (optional)
-            - <code>doc_type</code> Type of document (optional)
-              - <code>book</code> Book
-                Document records a book or publication
-              - <code>web_page</code> Web page 
-                Document records web page content
-              - <code>paper</code> Academic paper/article
-                Document records academic paper or research article
-              - <code>social_media_post</code> Social media post
-                Content from social media posts
-              - <code>wikipedia_entry</code> Wikipedia entry
-                Content from Wikipedia entries
-              - <code>personal_document</code> Personal document
-                Documents related to personal content
-              - <code>business_document</code> Business document
-                Documents related to business content
-              - <code>im_chat_log</code> Chat log
-                Records of instant messaging chats
-              - <code>synced_from_notion</code> Notion document
-                Documents synchronized from Notion
-              - <code>synced_from_github</code> GitHub document
-                Documents synchronized from GitHub
-              - <code>others</code> Other document types
-                Other document types not listed above
-
-            - <code>doc_metadata</code> Document metadata (required if doc_type is provided)
-              Fields vary by doc_type:
-
-              For <code>book</code>:
-              - <code>title</code> Book title
-                Title of the book
-              - <code>language</code> Book language
-                Language of the book
-              - <code>author</code> Book author
-                Author of the book
-              - <code>publisher</code> Publisher name
-                Name of the publishing house
-              - <code>publication_date</code> Publication date
-                Date when the book was published
-              - <code>isbn</code> ISBN number
-                International Standard Book Number
-              - <code>category</code> Book category
-                Category or genre of the book
-
-              For <code>web_page</code>:
-              - <code>title</code> Page title
-                Title of the web page
-              - <code>url</code> Page URL
-                URL address of the web page
-              - <code>language</code> Page language
-                Language of the web page
-              - <code>publish_date</code> Publish date
-                Date when the web page was published
-              - <code>author/publisher</code> Author or publisher
-                Author or publisher of the web page
-              - <code>topic/keywords</code> Topic or keywords
-                Topics or keywords of the web page
-              - <code>description</code> Page description
-                Description of the web page content
-
-              Please check [api/services/dataset_service.py](https://github.com/langgenius/dify/blob/main/api/services/dataset_service.py#L475) for more details on the fields required for each doc_type.
-              For doc_type "others", any valid JSON object is accepted
       </Property>
     </Properties>
   </Col>
@@ -1528,7 +1329,6 @@ import { Row, Col, Properties, Property, Heading, SubProperty, PropertyInstructi
               "id": "a8c6c36f-9f5d-4d7a-8472-f5d7b75d71d2",
               "data_source_type": "upload_file",
               "name": "readme.txt",
-              "doc_type": null
             }
           },
           "score": 3.730463140527718e-05,
diff --git a/web/app/(commonLayout)/datasets/template/template.zh.mdx b/web/app/(commonLayout)/datasets/template/template.zh.mdx
index 8bd3d8d5eb..282849f3db 100644
--- a/web/app/(commonLayout)/datasets/template/template.zh.mdx
+++ b/web/app/(commonLayout)/datasets/template/template.zh.mdx
@@ -47,46 +47,6 @@ import { Row, Col, Properties, Property, Heading, SubProperty, PropertyInstructi
       <Property name='text' type='string' key='text'>
         文档内容
       </Property>
-      <Property name='doc_type' type='string' key='doc_type'>
-        文档类型（选填）
-          - <code>book</code> 图书 Book
-          - <code>web_page</code> 网页 Web page
-          - <code>paper</code> 学术论文/文章 Academic paper/article 
-          - <code>social_media_post</code> 社交媒体帖子 Social media post
-          - <code>wikipedia_entry</code> 维基百科条目 Wikipedia entry
-          - <code>personal_document</code> 个人文档 Personal document
-          - <code>business_document</code> 商业文档 Business document
-          - <code>im_chat_log</code> 即时通讯记录 Chat log
-          - <code>synced_from_notion</code> Notion同步文档 Notion document
-          - <code>synced_from_github</code> GitHub同步文档 GitHub document
-          - <code>others</code> 其他文档类型 Other document types
-      </Property>
-      <Property name='doc_metadata' type='object' key='doc_metadata'>
-      
-        文档元数据（如提供文档类型则必填）。字段因文档类型而异：
-          
-          针对图书 For <code>book</code>:
-          - <code>title</code> 书名 Book title 
-          - <code>language</code> 图书语言 Book language
-          - <code>author</code> 作者 Book author
-          - <code>publisher</code> 出版社 Publisher name
-          - <code>publication_date</code> 出版日期 Publication date
-          - <code>isbn</code> ISBN号码 ISBN number
-          - <code>category</code> 图书分类 Book category
-
-          针对网页 For <code>web_page</code>:
-          - <code>title</code> 页面标题 Page title
-          - <code>url</code> 页面网址 Page URL
-          - <code>language</code> 页面语言 Page language
-          - <code>publish_date</code> 发布日期 Publish date
-          - <code>author/publisher</code> 作者/发布者 Author or publisher
-          - <code>topic/keywords</code> 主题/关键词 Topic or keywords
-          - <code>description</code> 页面描述 Page description
-
-          请查看 [api/services/dataset_service.py](https://github.com/langgenius/dify/blob/main/api/services/dataset_service.py#L475) 了解各文档类型所需字段的详细信息。
-
-          针对"其他"类型文档，接受任何有效的JSON对象
-      </Property>
       <Property name='indexing_technique' type='string' key='indexing_technique'>
         索引方式
           - <code>high_quality</code> 高质量：使用  embedding 模型进行嵌入，构建为向量数据库索引
@@ -234,68 +194,6 @@ import { Row, Col, Properties, Property, Heading, SubProperty, PropertyInstructi
           - <code>text_model</code> text 文档直接 embedding，经济模式默认为该模式
           - <code>hierarchical_model</code> parent-child 模式
           - <code>qa_model</code> Q&A 模式：为分片文档生成 Q&A 对，然后对问题进行 embedding
-        - <code>doc_type</code> 文档类型（选填）Type of document (optional)
-          - <code>book</code> 图书
-            文档记录一本书籍或出版物
-          - <code>web_page</code> 网页
-            网页内容的文档记录
-          - <code>paper</code> 学术论文/文章
-            学术论文或研究文章的记录
-          - <code>social_media_post</code> 社交媒体帖子
-            社交媒体上的帖子内容
-          - <code>wikipedia_entry</code> 维基百科条目
-            维基百科的词条内容
-          - <code>personal_document</code> 个人文档
-            个人相关的文档记录
-          - <code>business_document</code> 商业文档
-            商业相关的文档记录
-          - <code>im_chat_log</code> 即时通讯记录
-            即时通讯的聊天记录
-          - <code>synced_from_notion</code> Notion同步文档
-            从Notion同步的文档内容
-          - <code>synced_from_github</code> GitHub同步文档
-            从GitHub同步的文档内容
-          - <code>others</code> 其他文档类型
-            其他未列出的文档类型
-
-        - <code>doc_metadata</code> 文档元数据（如提供文档类型则必填
-          字段因文档类型而异
-
-          针对图书类型 For <code>book</code>:
-          - <code>title</code> 书名
-            书籍的标题
-          - <code>language</code> 图书语言
-            书籍的语言
-          - <code>author</code> 作者
-            书籍的作者
-          - <code>publisher</code> 出版社
-            出版社的名称
-          - <code>publication_date</code> 出版日期
-            书籍的出版日期
-          - <code>isbn</code> ISBN号码
-            书籍的ISBN编号
-          - <code>category</code> 图书分类
-            书籍的分类类别
-
-          针对网页类型 For <code>web_page</code>:
-          - <code>title</code> 页面标题
-            网页的标题
-          - <code>url</code> 页面网址
-            网页的URL地址
-          - <code>language</code> 页面语言
-            网页的语言
-          - <code>publish_date</code> 发布日期
-            网页的发布日期
-          - <code>author/publisher</code> 作者/发布者
-            网页的作者或发布者
-          - <code>topic/keywords</code> 主题/关键词
-            网页的主题或关键词
-          - <code>description</code> 页面描述
-            网页的描述信息
-
-          请查看 [api/services/dataset_service.py](https://github.com/langgenius/dify/blob/main/api/services/dataset_service.py#L475) 了解各文档类型所需字段的详细信息。
-
-          针对"其他"类型文档，接受任何有效的JSON对象
 
         - <code>doc_language</code> 在 Q&A 模式下，指定文档的语言，例如：<code>English</code>、<code>Chinese</code>
 
@@ -606,46 +504,6 @@ import { Row, Col, Properties, Property, Heading, SubProperty, PropertyInstructi
       <Property name='text' type='string' key='text'>
         文档内容（选填）
       </Property>
-      <Property name='doc_type' type='string' key='doc_type'>
-        文档类型（选填）
-          - <code>book</code> 图书 Book
-          - <code>web_page</code> 网页 Web page
-          - <code>paper</code> 学术论文/文章 Academic paper/article 
-          - <code>social_media_post</code> 社交媒体帖子 Social media post
-          - <code>wikipedia_entry</code> 维基百科条目 Wikipedia entry
-          - <code>personal_document</code> 个人文档 Personal document
-          - <code>business_document</code> 商业文档 Business document
-          - <code>im_chat_log</code> 即时通讯记录 Chat log
-          - <code>synced_from_notion</code> Notion同步文档 Notion document
-          - <code>synced_from_github</code> GitHub同步文档 GitHub document
-          - <code>others</code> 其他文档类型 Other document types
-      </Property>
-      <Property name='doc_metadata' type='object' key='doc_metadata'>
-      
-        文档元数据（如提供文档类型则必填）。字段因文档类型而异：
-          
-          针对图书 For <code>book</code>:
-          - <code>title</code> 书名 Book title 
-          - <code>language</code> 图书语言 Book language
-          - <code>author</code> 作者 Book author
-          - <code>publisher</code> 出版社 Publisher name
-          - <code>publication_date</code> 出版日期 Publication date
-          - <code>isbn</code> ISBN号码 ISBN number
-          - <code>category</code> 图书分类 Book category
-
-          针对网页 For <code>web_page</code>:
-          - <code>title</code> 页面标题 Page title
-          - <code>url</code> 页面网址 Page URL
-          - <code>language</code> 页面语言 Page language
-          - <code>publish_date</code> 发布日期 Publish date
-          - <code>author/publisher</code> 作者/发布者 Author or publisher
-          - <code>topic/keywords</code> 主题/关键词 Topic or keywords
-          - <code>description</code> 页面描述 Page description
-
-          请查看 [api/services/dataset_service.py](https://github.com/langgenius/dify/blob/main/api/services/dataset_service.py#L475) 了解各文档类型所需字段的详细信息。
-
-          针对"其他"类型文档，接受任何有效的JSON对象
-      </Property>
       <Property name='process_rule' type='object' key='process_rule'>
         处理规则（选填）
           - <code>mode</code> (string) 清洗、分段模式 ，automatic 自动 / custom 自定义
@@ -766,68 +624,6 @@ import { Row, Col, Properties, Property, Heading, SubProperty, PropertyInstructi
               - <code>separator</code> 分段标识符，目前仅允许设置一个分隔符。默认为 <code>***</code>
               - <code>max_tokens</code> 最大长度 (token) 需要校验小于父级的长度
               - <code>chunk_overlap</code> 分段重叠指的是在对数据进行分段时，段与段之间存在一定的重叠部分（选填）
-            - <code>doc_type</code> 文档类型（选填）Type of document (optional)
-              - <code>book</code> 图书
-                文档记录一本书籍或出版物
-              - <code>web_page</code> 网页
-                网页内容的文档记录
-              - <code>paper</code> 学术论文/文章
-                学术论文或研究文章的记录
-              - <code>social_media_post</code> 社交媒体帖子
-                社交媒体上的帖子内容
-              - <code>wikipedia_entry</code> 维基百科条目
-                维基百科的词条内容
-              - <code>personal_document</code> 个人文档
-                个人相关的文档记录
-              - <code>business_document</code> 商业文档
-                商业相关的文档记录
-              - <code>im_chat_log</code> 即时通讯记录
-                即时通讯的聊天记录
-              - <code>synced_from_notion</code> Notion同步文档
-                从Notion同步的文档内容
-              - <code>synced_from_github</code> GitHub同步文档
-                从GitHub同步的文档内容
-              - <code>others</code> 其他文档类型
-                其他未列出的文档类型
-
-            - <code>doc_metadata</code> 文档元数据（如提供文档类型则必填
-              字段因文档类型而异
-
-              针对图书类型 For <code>book</code>:
-              - <code>title</code> 书名
-                书籍的标题
-              - <code>language</code> 图书语言
-                书籍的语言
-              - <code>author</code> 作者
-                书籍的作者
-              - <code>publisher</code> 出版社
-                出版社的名称
-              - <code>publication_date</code> 出版日期
-                书籍的出版日期
-              - <code>isbn</code> ISBN号码
-                书籍的ISBN编号
-              - <code>category</code> 图书分类
-                书籍的分类类别
-
-              针对网页类型 For <code>web_page</code>:
-              - <code>title</code> 页面标题
-                网页的标题
-              - <code>url</code> 页面网址
-                网页的URL地址
-              - <code>language</code> 页面语言
-                网页的语言
-              - <code>publish_date</code> 发布日期
-                网页的发布日期
-              - <code>author/publisher</code> 作者/发布者
-                网页的作者或发布者
-              - <code>topic/keywords</code> 主题/关键词
-                网页的主题或关键词
-              - <code>description</code> 页面描述
-                网页的描述信息
-
-              请查看 [api/services/dataset_service.py](https://github.com/langgenius/dify/blob/main/api/services/dataset_service.py#L475) 了解各文档类型所需字段的详细信息。
-
-              针对"其他"类型文档，接受任何有效的JSON对象
       </Property>
     </Properties>
   </Col>
@@ -1534,7 +1330,6 @@ import { Row, Col, Properties, Property, Heading, SubProperty, PropertyInstructi
               "id": "a8c6c36f-9f5d-4d7a-8472-f5d7b75d71d2",
               "data_source_type": "upload_file",
               "name": "readme.txt",
-              "doc_type": null
             }
           },
           "score": 3.730463140527718e-05,

From 3e84c77bbb361dbcea7e9d54ec583c23a236580a Mon Sep 17 00:00:00 2001
From: Jyong <76649700+JohnJyong@users.noreply.github.com>
Date: Thu, 20 Mar 2025 14:38:32 +0800
Subject: [PATCH 7/7] fix enable dataset metadata built-in field when dataset
 is empty (#16290)

---
 api/services/metadata_service.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/api/services/metadata_service.py b/api/services/metadata_service.py
index a43b970a39..4cd2f9e8cb 100644
--- a/api/services/metadata_service.py
+++ b/api/services/metadata_service.py
@@ -137,7 +137,7 @@ class MetadataService:
                     doc_metadata[BuiltInField.source.value] = MetadataDataSource[document.data_source_type].value
                     document.doc_metadata = doc_metadata
                     db.session.add(document)
-                db.session.commit()
+            db.session.commit()
         except Exception:
             logging.exception("Enable built-in field failed")
         finally: