fix: question classify can't use

Merge branch 'main' into feat/structured-output
feat: structured output
2025-03-20 09:08:51 +08:00 · 2025-03-19 13:40:58 +08:00 · 2025-03-19 13:39:40 +08:00
87 changed files with 1273 additions and 1053 deletions
--- a/.github/DISCUSSION_TEMPLATE/general.yml
+++ b/.github/DISCUSSION_TEMPLATE/general.yml
@ -9,7 +9,7 @@ body:
          required: true
        - label: I confirm that I am using English to submit this report (我已阅读并同意 [Language Policy](https://github.com/langgenius/dify/issues/1542)).
          required: true
-        - label: "[FOR CHINESE USERS] 请务必使用英文提交 Issue，否则会被关闭。谢谢！:)"
+        - label: "[FOR CHINESE USERS] 请务必使用英文提交 Issue，否则会被关闭。谢谢！:）"
          required: true
        - label: "Please do not modify this template :) and fill in all the required fields."
          required: true
--- a/.github/DISCUSSION_TEMPLATE/help.yml
+++ b/.github/DISCUSSION_TEMPLATE/help.yml
@ -9,7 +9,7 @@ body:
          required: true
        - label: I confirm that I am using English to submit this report (我已阅读并同意 [Language Policy](https://github.com/langgenius/dify/issues/1542)).
          required: true
-        - label: "[FOR CHINESE USERS] 请务必使用英文提交 Issue，否则会被关闭。谢谢！:)"
+        - label: "[FOR CHINESE USERS] 请务必使用英文提交 Issue，否则会被关闭。谢谢！:）"
          required: true
        - label: "Please do not modify this template :) and fill in all the required fields."
          required: true
--- a/.github/DISCUSSION_TEMPLATE/suggestion.yml
+++ b/.github/DISCUSSION_TEMPLATE/suggestion.yml
@ -9,7 +9,7 @@ body:
          required: true
        - label: I confirm that I am using English to submit this report (我已阅读并同意 [Language Policy](https://github.com/langgenius/dify/issues/1542)).
          required: true
-        - label: "[FOR CHINESE USERS] 请务必使用英文提交 Issue，否则会被关闭。谢谢！:)"
+        - label: "[FOR CHINESE USERS] 请务必使用英文提交 Issue，否则会被关闭。谢谢！:）"
          required: true
        - label: "Please do not modify this template :) and fill in all the required fields."
          required: true
--- a/.github/ISSUE_TEMPLATE/bug_report.yml
+++ b/.github/ISSUE_TEMPLATE/bug_report.yml
@ -14,7 +14,7 @@ body:
          required: true
        - label: I confirm that I am using English to submit this report (我已阅读并同意 [Language Policy](https://github.com/langgenius/dify/issues/1542)).
          required: true
-        - label: "[FOR CHINESE USERS] 请务必使用英文提交 Issue，否则会被关闭。谢谢！:)"
+        - label: "[FOR CHINESE USERS] 请务必使用英文提交 Issue，否则会被关闭。谢谢！:）"
          required: true
        - label: "Please do not modify this template :) and fill in all the required fields."
          required: true
--- a/.github/ISSUE_TEMPLATE/document_issue.yml
+++ b/.github/ISSUE_TEMPLATE/document_issue.yml
@ -12,7 +12,7 @@ body:
          required: true
        - label: I confirm that I am using English to submit report (我已阅读并同意 [Language Policy](https://github.com/langgenius/dify/issues/1542)).
          required: true
-        - label: "[FOR CHINESE USERS] 请务必使用英文提交 Issue，否则会被关闭。谢谢！:)"
+        - label: "[FOR CHINESE USERS] 请务必使用英文提交 Issue，否则会被关闭。谢谢！:）"
          required: true
        - label: "Please do not modify this template :) and fill in all the required fields."
          required: true
--- a/.github/ISSUE_TEMPLATE/feature_request.yml
+++ b/.github/ISSUE_TEMPLATE/feature_request.yml
@ -12,7 +12,7 @@ body:
          required: true
        - label: I confirm that I am using English to submit this report (我已阅读并同意 [Language Policy](https://github.com/langgenius/dify/issues/1542)).
          required: true
-        - label: "[FOR CHINESE USERS] 请务必使用英文提交 Issue，否则会被关闭。谢谢！:)"
+        - label: "[FOR CHINESE USERS] 请务必使用英文提交 Issue，否则会被关闭。谢谢！:）"
          required: true
        - label: "Please do not modify this template :) and fill in all the required fields."
          required: true
--- a/.github/ISSUE_TEMPLATE/tracker.yml
+++ b/.github/ISSUE_TEMPLATE/tracker.yml
@ -1,5 +1,5 @@
 name: "👾 Tracker"
-description: For inner usages, please do not use this template.
+description: For inner usages, please donot use this template.
 title: "[Tracker] "
 labels:
  - tracker
--- a/.github/ISSUE_TEMPLATE/translation_issue.yml
+++ b/.github/ISSUE_TEMPLATE/translation_issue.yml
@ -1,5 +1,5 @@
 name: "🌐 Localization/Translation issue"
-description: Report incorrect translations. [please use English :)]
+description: Report incorrect translations. [please use English :）]
 labels:
  - translation
 body:
@ -12,7 +12,7 @@ body:
          required: true
        - label: I confirm that I am using English to submit this report (我已阅读并同意 [Language Policy](https://github.com/langgenius/dify/issues/1542)).
          required: true
-        - label: "[FOR CHINESE USERS] 请务必使用英文提交 Issue，否则会被关闭。谢谢！:)"
+        - label: "[FOR CHINESE USERS] 请务必使用英文提交 Issue，否则会被关闭。谢谢！:）"
          required: true
        - label: "Please do not modify this template :) and fill in all the required fields."
          required: true
--- a/README.md
+++ b/README.md
@ -204,9 +204,7 @@ If you'd like to configure a highly-available setup, there are community-contrib

 - [Helm Chart by @LeoQuote](https://github.com/douban/charts/tree/master/charts/dify)
 - [Helm Chart by @BorisPolonsky](https://github.com/BorisPolonsky/dify-helm)
- [Helm Chart by @magicsong](https://github.com/magicsong/ai-charts)
 - [YAML file by @Winson-030](https://github.com/Winson-030/dify-kubernetes)
- [YAML file by @wyy-holding](https://github.com/wyy-holding/dify-k8s)

 #### Using Terraform for Deployment

--- a/README_AR.md
+++ b/README_AR.md
@ -187,9 +187,7 @@ docker compose up -d

 - [رسم بياني Helm من قبل @LeoQuote](https://github.com/douban/charts/tree/master/charts/dify)
 - [رسم بياني Helm من قبل @BorisPolonsky](https://github.com/BorisPolonsky/dify-helm)
- [رسم بياني Helm من قبل @magicsong](https://github.com/magicsong/ai-charts)
 - [ملف YAML من قبل @Winson-030](https://github.com/Winson-030/dify-kubernetes)
- [ملف YAML من قبل @wyy-holding](https://github.com/wyy-holding/dify-k8s)

 #### استخدام Terraform للتوزيع

--- a/README_BN.md
+++ b/README_BN.md
@ -203,9 +203,7 @@ GitHub-এ ডিফাইকে স্টার দিয়ে রাখুন

 - [Helm Chart by @LeoQuote](https://github.com/douban/charts/tree/master/charts/dify)
 - [Helm Chart by @BorisPolonsky](https://github.com/BorisPolonsky/dify-helm)
- [Helm Chart by @magicsong](https://github.com/magicsong/ai-charts)
 - [YAML file by @Winson-030](https://github.com/Winson-030/dify-kubernetes)
- [YAML file by @wyy-holding](https://github.com/wyy-holding/dify-k8s)

 #### টেরাফর্ম ব্যবহার করে ডিপ্লয়

--- a/README_CN.md
+++ b/README_CN.md
@ -205,9 +205,7 @@ docker compose up -d

 - [Helm Chart by @LeoQuote](https://github.com/douban/charts/tree/master/charts/dify)
 - [Helm Chart by @BorisPolonsky](https://github.com/BorisPolonsky/dify-helm)
- [Helm Chart by @magicsong](https://github.com/magicsong/ai-charts)
 - [YAML 文件 by @Winson-030](https://github.com/Winson-030/dify-kubernetes)
- [YAML file by @wyy-holding](https://github.com/wyy-holding/dify-k8s)

 #### 使用 Terraform 部署

--- a/README_DE.md
+++ b/README_DE.md
@ -205,9 +205,7 @@ Falls Sie eine hochverfügbare Konfiguration einrichten möchten, gibt es von de

 - [Helm Chart by @LeoQuote](https://github.com/douban/charts/tree/master/charts/dify)
 - [Helm Chart by @BorisPolonsky](https://github.com/BorisPolonsky/dify-helm)
- [Helm Chart by @magicsong](https://github.com/magicsong/ai-charts)
 - [YAML file by @Winson-030](https://github.com/Winson-030/dify-kubernetes)
- [YAML file by @wyy-holding](https://github.com/wyy-holding/dify-k8s)

 #### Terraform für die Bereitstellung verwenden

--- a/README_ES.md
+++ b/README_ES.md
@ -77,7 +77,9 @@ Dify es una plataforma de desarrollo de aplicaciones de LLM de código abierto.
  Amplias capacidades de RAG que cubren todo, desde la ingestión de documentos hasta la recuperación, con soporte listo para usar para la extracción de texto de PDF, PPT y otros formatos de documento comunes.

 **5. Capacidades de agente**: 
-  Puedes definir agentes basados en LLM Function Calling o ReAct, y agregar herramientas preconstruidas o personalizadas para el agente. Dify proporciona más de 50 herramientas integradas para agentes de IA, como Búsqueda de Google, DALL·E, Difusión Estable y WolframAlpha.
+  Puedes definir agent
+
+es basados en LLM Function Calling o ReAct, y agregar herramientas preconstruidas o personalizadas para el agente. Dify proporciona más de 50 herramientas integradas para agentes de IA, como Búsqueda de Google, DALL·E, Difusión Estable y WolframAlpha.

 **6. LLMOps**: 
  Supervisa y analiza registros de aplicaciones y rendimiento a lo largo del tiempo. Podrías mejorar continuamente prompts, conjuntos de datos y modelos basados en datos de producción y anotaciones.
@ -205,9 +207,7 @@ Si desea configurar una configuración de alta disponibilidad, la comunidad prop

 - [Gráfico Helm por @LeoQuote](https://github.com/douban/charts/tree/master/charts/dify)
 - [Gráfico Helm por @BorisPolonsky](https://github.com/BorisPolonsky/dify-helm)
- [Gráfico Helm por @magicsong](https://github.com/magicsong/ai-charts)
 - [Ficheros YAML por @Winson-030](https://github.com/Winson-030/dify-kubernetes)
- [Ficheros YAML por @wyy-holding](https://github.com/wyy-holding/dify-k8s)

 #### Uso de Terraform para el despliegue

--- a/README_FR.md
+++ b/README_FR.md
@ -203,9 +203,7 @@ Si vous souhaitez configurer une configuration haute disponibilité, la communau

 - [Helm Chart par @LeoQuote](https://github.com/douban/charts/tree/master/charts/dify)
 - [Helm Chart par @BorisPolonsky](https://github.com/BorisPolonsky/dify-helm)
- [Helm Chart par @magicsong](https://github.com/magicsong/ai-charts)
 - [Fichier YAML par @Winson-030](https://github.com/Winson-030/dify-kubernetes)
- [Fichier YAML par @wyy-holding](https://github.com/wyy-holding/dify-k8s)

 #### Utilisation de Terraform pour le déploiement

--- a/README_JA.md
+++ b/README_JA.md
@ -204,9 +204,7 @@ docker compose up -d

 - [Helm Chart by @LeoQuote](https://github.com/douban/charts/tree/master/charts/dify)
 - [Helm Chart by @BorisPolonsky](https://github.com/BorisPolonsky/dify-helm)
- [Helm Chart by @magicsong](https://github.com/magicsong/ai-charts)
 - [YAML file by @Winson-030](https://github.com/Winson-030/dify-kubernetes)
- [YAML file by @wyy-holding](https://github.com/wyy-holding/dify-k8s)

 #### Terraformを使用したデプロイ

--- a/README_KL.md
+++ b/README_KL.md
@ -203,9 +203,7 @@ If you'd like to configure a highly-available setup, there are community-contrib

 - [Helm Chart by @LeoQuote](https://github.com/douban/charts/tree/master/charts/dify)
 - [Helm Chart by @BorisPolonsky](https://github.com/BorisPolonsky/dify-helm)
- [Helm Chart by @magicsong](https://github.com/magicsong/ai-charts)
 - [YAML file by @Winson-030](https://github.com/Winson-030/dify-kubernetes)
- [YAML file by @wyy-holding](https://github.com/wyy-holding/dify-k8s)

 #### Terraform atorlugu pilersitsineq

--- a/README_KR.md
+++ b/README_KR.md
@ -197,9 +197,7 @@ Dify를 Kubernetes에 배포하고 프리미엄 스케일링 설정을 구성했

 - [Helm Chart by @LeoQuote](https://github.com/douban/charts/tree/master/charts/dify)
 - [Helm Chart by @BorisPolonsky](https://github.com/BorisPolonsky/dify-helm)
- [Helm Chart by @magicsong](https://github.com/magicsong/ai-charts)
 - [YAML file by @Winson-030](https://github.com/Winson-030/dify-kubernetes)
- [YAML file by @wyy-holding](https://github.com/wyy-holding/dify-k8s)

 #### Terraform을 사용한 배포

--- a/README_PT.md
+++ b/README_PT.md
@ -203,9 +203,7 @@ Se deseja configurar uma instalação de alta disponibilidade, há [Helm Charts]

 - [Helm Chart de @LeoQuote](https://github.com/douban/charts/tree/master/charts/dify)
 - [Helm Chart de @BorisPolonsky](https://github.com/BorisPolonsky/dify-helm)
- [Helm Chart de @magicsong](https://github.com/magicsong/ai-charts)
- [Arquivo YAML por @Winson-030](https://github.com/Winson-030/dify-kubernetes)
- [Arquivo YAML por @wyy-holding](https://github.com/wyy-holding/dify-k8s)
+- [Arquivo YAML de @Winson-030](https://github.com/Winson-030/dify-kubernetes)

 #### Usando o Terraform para Implantação

--- a/README_SI.md
+++ b/README_SI.md
@ -205,7 +205,6 @@ Star Dify on GitHub and be instantly notified of new releases.
 - [Helm Chart by @LeoQuote](https://github.com/douban/charts/tree/master/charts/dify)
 - [Helm Chart by @BorisPolonsky](https://github.com/BorisPolonsky/dify-helm)
 - [YAML file by @Winson-030](https://github.com/Winson-030/dify-kubernetes)
- [YAML file by @wyy-holding](https://github.com/wyy-holding/dify-k8s)

 #### Uporaba Terraform za uvajanje

--- a/README_TR.md
+++ b/README_TR.md
@ -198,7 +198,6 @@ Yüksek kullanılabilirliğe sahip bir kurulum yapılandırmak isterseniz, Dify'
 - [@LeoQuote tarafından Helm Chart](https://github.com/douban/charts/tree/master/charts/dify)
 - [@BorisPolonsky tarafından Helm Chart](https://github.com/BorisPolonsky/dify-helm)
 - [@Winson-030 tarafından YAML dosyası](https://github.com/Winson-030/dify-kubernetes)
- [@wyy-holding tarafından YAML dosyası](https://github.com/wyy-holding/dify-k8s)

 #### Dağıtım için Terraform Kullanımı

--- a/README_TW.md
+++ b/README_TW.md
@ -204,7 +204,6 @@ Dify 的所有功能都提供相應的 API，因此您可以輕鬆地將 Dify
 - [由 @LeoQuote 提供的 Helm Chart](https://github.com/douban/charts/tree/master/charts/dify)
 - [由 @BorisPolonsky 提供的 Helm Chart](https://github.com/BorisPolonsky/dify-helm)
 - [由 @Winson-030 提供的 YAML 文件](https://github.com/Winson-030/dify-kubernetes)
- [由 @wyy-holding 提供的 YAML 文件](https://github.com/wyy-holding/dify-k8s)

 ### 使用 Terraform 進行部署

--- a/README_VI.md
+++ b/README_VI.md
@ -200,7 +200,6 @@ Nếu bạn muốn cấu hình một cài đặt có độ sẵn sàng cao, có
 - [Helm Chart bởi @LeoQuote](https://github.com/douban/charts/tree/master/charts/dify)
 - [Helm Chart bởi @BorisPolonsky](https://github.com/BorisPolonsky/dify-helm)
 - [Tệp YAML bởi @Winson-030](https://github.com/Winson-030/dify-kubernetes)
- [Tệp YAML bởi @wyy-holding](https://github.com/wyy-holding/dify-k8s)

 #### Sử dụng Terraform để Triển khai

--- a/api/.env.example
+++ b/api/.env.example
@ -318,6 +318,7 @@ UPLOAD_AUDIO_FILE_SIZE_LIMIT=50
 MULTIMODAL_SEND_FORMAT=base64
 PROMPT_GENERATION_MAX_TOKENS=512
 CODE_GENERATION_MAX_TOKENS=1024
+STRUCTURED_OUTPUT_MAX_TOKENS=1024

 # Mail configuration, support: resend, smtp
 MAIL_TYPE=
--- a/api/commands.py
+++ b/api/commands.py
@ -12,7 +12,6 @@ from configs import dify_config
 from constants.languages import languages
 from core.rag.datasource.vdb.vector_factory import Vector
 from core.rag.datasource.vdb.vector_type import VectorType
-from core.rag.index_processor.constant.built_in_field import BuiltInField
 from core.rag.models.document import Document
 from events.app_event import app_was_created
 from extensions.ext_database import db
@ -21,7 +20,7 @@ from libs.helper import email as email_validate
 from libs.password import hash_password, password_pattern, valid_password
 from libs.rsa import generate_key_pair
 from models import Tenant
-from models.dataset import Dataset, DatasetCollectionBinding, DatasetMetadata, DatasetMetadataBinding, DocumentSegment
+from models.dataset import Dataset, DatasetCollectionBinding, DocumentSegment
 from models.dataset import Document as DatasetDocument
 from models.model import Account, App, AppAnnotationSetting, AppMode, Conversation, MessageAnnotation
 from models.provider import Provider, ProviderModel
@ -484,11 +483,14 @@ def convert_to_agent_apps():
    click.echo(click.style("Conversion complete. Converted {} agent apps.".format(len(proceeded_app_ids)), fg="green"))


-@click.command("add-qdrant-index", help="Add Qdrant index.")
+@click.command("add-qdrant-doc-id-index", help="Add Qdrant doc_id index.")
@click.option("--field", default="metadata.doc_id", prompt=False, help="Index field , default is metadata.doc_id.")
-def add_qdrant_index(field: str):
-    click.echo(click.style("Starting Qdrant index creation.", fg="green"))
-
+def add_qdrant_doc_id_index(field: str):
+    click.echo(click.style("Starting Qdrant doc_id index creation.", fg="green"))
+    vector_type = dify_config.VECTOR_STORE
+    if vector_type != "qdrant":
+        click.echo(click.style("This command only supports Qdrant vector store.", fg="red"))
+        return
    create_count = 0

    try:
@ -537,76 +539,6 @@ def add_qdrant_index(field: str):
    click.echo(click.style(f"Index creation complete. Created {create_count} collection indexes.", fg="green"))


-@click.command("old-metadata-migration", help="Old metadata migration.")
-def old_metadata_migration():
-    """
-    Old metadata migration.
-    """
-    click.echo(click.style("Starting old metadata migration.", fg="green"))
-
-    page = 1
-    while True:
-        try:
-            documents = (
-                DatasetDocument.query.filter(DatasetDocument.doc_metadata is not None)
-                .order_by(DatasetDocument.created_at.desc())
-                .paginate(page=page, per_page=50)
-            )
-        except NotFound:
-            break
-        if not documents:
-            break
-        for document in documents:
-            if document.doc_metadata:
-                doc_metadata = document.doc_metadata
-                for key, value in doc_metadata.items():
-                    for field in BuiltInField:
-                        if field.value == key:
-                            break
-                    else:
-                        dataset_metadata = (
-                            db.session.query(DatasetMetadata)
-                            .filter(DatasetMetadata.dataset_id == document.dataset_id, DatasetMetadata.name == key)
-                            .first()
-                        )
-                        if not dataset_metadata:
-                            dataset_metadata = DatasetMetadata(
-                                tenant_id=document.tenant_id,
-                                dataset_id=document.dataset_id,
-                                name=key,
-                                type="string",
-                                created_by=document.created_by,
-                            )
-                            db.session.add(dataset_metadata)
-                            db.session.flush()
-                            dataset_metadata_binding = DatasetMetadataBinding(
-                                tenant_id=document.tenant_id,
-                                dataset_id=document.dataset_id,
-                                metadata_id=dataset_metadata.id,
-                                document_id=document.id,
-                                created_by=document.created_by,
-                            )
-                            db.session.add(dataset_metadata_binding)
-                        else:
-                            dataset_metadata_binding = DatasetMetadataBinding.query.filter(
-                                DatasetMetadataBinding.dataset_id == document.dataset_id,
-                                DatasetMetadataBinding.document_id == document.id,
-                                DatasetMetadataBinding.metadata_id == dataset_metadata.id,
-                            ).first()
-                            if not dataset_metadata_binding:
-                                dataset_metadata_binding = DatasetMetadataBinding(
-                                    tenant_id=document.tenant_id,
-                                    dataset_id=document.dataset_id,
-                                    metadata_id=dataset_metadata.id,
-                                    document_id=document.id,
-                                    created_by=document.created_by,
-                                )
-                                db.session.add(dataset_metadata_binding)
-                        db.session.commit()
-        page += 1
-    click.echo(click.style("Old metadata migration completed.", fg="green"))
-
-
@click.command("create-tenant", help="Create account and tenant.")
@click.option("--email", prompt=True, help="Tenant account email.")
@click.option("--name", prompt=True, help="Workspace name.")
--- a/api/configs/feature/init.py
+++ b/api/configs/feature/init.py
@ -61,10 +61,6 @@ class AppExecutionConfig(BaseSettings):
        description="Maximum number of concurrent active requests per app (0 for unlimited)",
        default=0,
    )
-    APP_DAILY_RATE_LIMIT: NonNegativeInt = Field(
-        description="Maximum number of requests per app per day",
-        default=5000,
-    )


 class CodeExecutionSandboxConfig(BaseSettings):
--- a/api/configs/packaging/init.py
+++ b/api/configs/packaging/init.py
@ -9,7 +9,7 @@ class PackagingInfo(BaseSettings):

    CURRENT_VERSION: str = Field(
        description="Dify version",
-        default="1.1.1",
+        default="1.1.0",
    )

    COMMIT_SHA: str = Field(
--- a/api/controllers/console/app/generator.py
+++ b/api/controllers/console/app/generator.py
@ -85,5 +85,37 @@ class RuleCodeGenerateApi(Resource):
        return code_result


+class RuleStructuredOutputGenerateApi(Resource):
+    @setup_required
+    @login_required
+    @account_initialization_required
+    def post(self):
+        parser = reqparse.RequestParser()
+        parser.add_argument("instruction", type=str, required=True, nullable=False, location="json")
+        parser.add_argument("model_config", type=dict, required=True, nullable=False, location="json")
+        args = parser.parse_args()
+
+        account = current_user
+        structured_output_max_tokens = int(os.getenv("STRUCTURED_OUTPUT_MAX_TOKENS", "1024"))
+        try:
+            structured_output = LLMGenerator.generate_structured_output(
+                tenant_id=account.current_tenant_id,
+                instruction=args["instruction"],
+                model_config=args["model_config"],
+                max_tokens=structured_output_max_tokens,
+            )
+        except ProviderTokenNotInitError as ex:
+            raise ProviderNotInitializeError(ex.description)
+        except QuotaExceededError:
+            raise ProviderQuotaExceededError()
+        except ModelCurrentlyNotSupportError:
+            raise ProviderModelCurrentlyNotSupportError()
+        except InvokeError as e:
+            raise CompletionRequestError(e.description)
+
+        return structured_output
+
+
 api.add_resource(RuleGenerateApi, "/rule-generate")
 api.add_resource(RuleCodeGenerateApi, "/rule-code-generate")
+api.add_resource(RuleStructuredOutputGenerateApi, "/rule-structured-output-generate")
--- a/api/controllers/console/app/workflow.py
+++ b/api/controllers/console/app/workflow.py
@ -10,14 +10,9 @@ from werkzeug.exceptions import Forbidden, InternalServerError, NotFound
 import services
 from configs import dify_config
 from controllers.console import api
-from controllers.console.app.error import (
-    ConversationCompletedError,
-    DraftWorkflowNotExist,
-    DraftWorkflowNotSync,
-)
+from controllers.console.app.error import ConversationCompletedError, DraftWorkflowNotExist, DraftWorkflowNotSync
 from controllers.console.app.wraps import get_app_model
 from controllers.console.wraps import account_initialization_required, setup_required
-from controllers.web.error import InvokeRateLimitError as InvokeRateLimitHttpError
 from core.app.apps.base_app_queue_manager import AppQueueManager
 from core.app.entities.app_invoke_entities import InvokeFrom
 from extensions.ext_database import db
@ -32,7 +27,6 @@ from models.account import Account
 from models.model import AppMode
 from services.app_generate_service import AppGenerateService
 from services.errors.app import WorkflowHashNotEqualError
-from services.errors.llm import InvokeRateLimitError
 from services.workflow_service import DraftWorkflowDeletionError, WorkflowInUseError, WorkflowService

 logger = logging.getLogger(__name__)
@ -174,8 +168,6 @@ class AdvancedChatDraftWorkflowRunApi(Resource):
            raise NotFound("Conversation Not Exists.")
        except services.errors.conversation.ConversationCompletedError:
            raise ConversationCompletedError()
-        except InvokeRateLimitError as ex:
-            raise InvokeRateLimitHttpError(ex.description)
        except ValueError as e:
            raise e
        except Exception:
@ -352,7 +344,6 @@ class DraftWorkflowRunApi(Resource):
        parser.add_argument("files", type=list, required=False, location="json")
        args = parser.parse_args()

-        try:
        response = AppGenerateService.generate(
            app_model=app_model,
            user=current_user,
@ -362,8 +353,6 @@ class DraftWorkflowRunApi(Resource):
        )

        return helper.compact_generate_response(response)
-        except InvokeRateLimitError as ex:
-            raise InvokeRateLimitHttpError(ex.description)


 class WorkflowTaskStopApi(Resource):
--- a/api/controllers/console/datasets/datasets.py
+++ b/api/controllers/console/datasets/datasets.py
@ -79,7 +79,7 @@ class DatasetListApi(Resource):
        data = marshal(datasets, dataset_detail_fields)
        for item in data:
            # convert embedding_model_provider to plugin standard format
-            if item["indexing_technique"] == "high_quality" and item["embedding_model_provider"]:
+            if item["indexing_technique"] == "high_quality":
                item["embedding_model_provider"] = str(ModelProviderID(item["embedding_model_provider"]))
                item_model = f"{item['embedding_model']}:{item['embedding_model_provider']}"
                if item_model in model_names:
--- a/api/controllers/console/explore/completion.py
+++ b/api/controllers/console/explore/completion.py
@ -16,7 +16,6 @@ from controllers.console.app.error import (
 )
 from controllers.console.explore.error import NotChatAppError, NotCompletionAppError
 from controllers.console.explore.wraps import InstalledAppResource
-from controllers.web.error import InvokeRateLimitError as InvokeRateLimitHttpError
 from core.app.apps.base_app_queue_manager import AppQueueManager
 from core.app.entities.app_invoke_entities import InvokeFrom
 from core.errors.error import (
@ -30,7 +29,6 @@ from libs import helper
 from libs.helper import uuid_value
 from models.model import AppMode
 from services.app_generate_service import AppGenerateService
-from services.errors.llm import InvokeRateLimitError


 # define completion api for user
@ -77,7 +75,7 @@ class CompletionApi(InstalledAppResource):
            raise CompletionRequestError(e.description)
        except ValueError as e:
            raise e
-        except Exception:
+        except Exception as e:
            logging.exception("internal server error.")
            raise InternalServerError()

@ -135,11 +133,9 @@ class ChatApi(InstalledAppResource):
            raise ProviderModelCurrentlyNotSupportError()
        except InvokeError as e:
            raise CompletionRequestError(e.description)
-        except InvokeRateLimitError as ex:
-            raise InvokeRateLimitHttpError(ex.description)
        except ValueError as e:
            raise e
-        except Exception:
+        except Exception as e:
            logging.exception("internal server error.")
            raise InternalServerError()

--- a/api/controllers/console/explore/workflow.py
+++ b/api/controllers/console/explore/workflow.py
@ -11,7 +11,6 @@ from controllers.console.app.error import (
 )
 from controllers.console.explore.error import NotWorkflowAppError
 from controllers.console.explore.wraps import InstalledAppResource
-from controllers.web.error import InvokeRateLimitError as InvokeRateLimitHttpError
 from core.app.apps.base_app_queue_manager import AppQueueManager
 from core.app.entities.app_invoke_entities import InvokeFrom
 from core.errors.error import (
@ -24,7 +23,6 @@ from libs import helper
 from libs.login import current_user
 from models.model import AppMode, InstalledApp
 from services.app_generate_service import AppGenerateService
-from services.errors.llm import InvokeRateLimitError

 logger = logging.getLogger(__name__)

@ -58,11 +56,9 @@ class InstalledAppWorkflowRunApi(InstalledAppResource):
            raise ProviderModelCurrentlyNotSupportError()
        except InvokeError as e:
            raise CompletionRequestError(e.description)
-        except InvokeRateLimitError as ex:
-            raise InvokeRateLimitHttpError(ex.description)
        except ValueError as e:
            raise e
-        except Exception:
+        except Exception as e:
            logging.exception("internal server error.")
            raise InternalServerError()

--- a/api/controllers/service_api/init.py
+++ b/api/controllers/service_api/init.py
@ -7,4 +7,4 @@ api = ExternalApi(bp)

 from . import index
 from .app import app, audio, completion, conversation, file, message, workflow
-from .dataset import dataset, document, hit_testing, metadata, segment, upload_file
+from .dataset import dataset, document, hit_testing, segment, upload_file
--- a/api/controllers/service_api/app/completion.py
+++ b/api/controllers/service_api/app/completion.py
@ -15,7 +15,6 @@ from controllers.service_api.app.error import (
    ProviderQuotaExceededError,
 )
 from controllers.service_api.wraps import FetchUserArg, WhereisUserArg, validate_app_token
-from controllers.web.error import InvokeRateLimitError as InvokeRateLimitHttpError
 from core.app.apps.base_app_queue_manager import AppQueueManager
 from core.app.entities.app_invoke_entities import InvokeFrom
 from core.errors.error import (
@ -28,7 +27,6 @@ from libs import helper
 from libs.helper import uuid_value
 from models.model import App, AppMode, EndUser
 from services.app_generate_service import AppGenerateService
-from services.errors.llm import InvokeRateLimitError


 class CompletionApi(Resource):
@ -77,7 +75,7 @@ class CompletionApi(Resource):
            raise CompletionRequestError(e.description)
        except ValueError as e:
            raise e
-        except Exception:
+        except Exception as e:
            logging.exception("internal server error.")
            raise InternalServerError()

@ -132,13 +130,11 @@ class ChatApi(Resource):
            raise ProviderQuotaExceededError()
        except ModelCurrentlyNotSupportError:
            raise ProviderModelCurrentlyNotSupportError()
-        except InvokeRateLimitError as ex:
-            raise InvokeRateLimitHttpError(ex.description)
        except InvokeError as e:
            raise CompletionRequestError(e.description)
        except ValueError as e:
            raise e
-        except Exception:
+        except Exception as e:
            logging.exception("internal server error.")
            raise InternalServerError()

--- a/api/controllers/service_api/app/workflow.py
+++ b/api/controllers/service_api/app/workflow.py
@ -15,7 +15,6 @@ from controllers.service_api.app.error import (
    ProviderQuotaExceededError,
 )
 from controllers.service_api.wraps import FetchUserArg, WhereisUserArg, validate_app_token
-from controllers.web.error import InvokeRateLimitError as InvokeRateLimitHttpError
 from core.app.apps.base_app_queue_manager import AppQueueManager
 from core.app.entities.app_invoke_entities import InvokeFrom
 from core.errors.error import (
@ -30,7 +29,6 @@ from libs import helper
 from models.model import App, AppMode, EndUser
 from models.workflow import WorkflowRun, WorkflowRunStatus
 from services.app_generate_service import AppGenerateService
-from services.errors.llm import InvokeRateLimitError
 from services.workflow_app_service import WorkflowAppService

 logger = logging.getLogger(__name__)
@ -95,13 +93,11 @@ class WorkflowRunApi(Resource):
            raise ProviderQuotaExceededError()
        except ModelCurrentlyNotSupportError:
            raise ProviderModelCurrentlyNotSupportError()
-        except InvokeRateLimitError as ex:
-            raise InvokeRateLimitHttpError(ex.description)
        except InvokeError as e:
            raise CompletionRequestError(e.description)
        except ValueError as e:
            raise e
-        except Exception:
+        except Exception as e:
            logging.exception("internal server error.")
            raise InternalServerError()

--- a/api/controllers/service_api/dataset/dataset.py
+++ b/api/controllers/service_api/dataset/dataset.py
@ -7,7 +7,6 @@ from controllers.service_api import api
 from controllers.service_api.dataset.error import DatasetInUseError, DatasetNameDuplicateError
 from controllers.service_api.wraps import DatasetApiResource
 from core.model_runtime.entities.model_entities import ModelType
-from core.plugin.entities.plugin import ModelProviderID
 from core.provider_manager import ProviderManager
 from fields.dataset_fields import dataset_detail_fields
 from libs.login import current_user
@ -49,8 +48,7 @@ class DatasetListApi(DatasetApiResource):

        data = marshal(datasets, dataset_detail_fields)
        for item in data:
-            if item["indexing_technique"] == "high_quality" and item["embedding_model_provider"]:
-                item["embedding_model_provider"] = str(ModelProviderID(item["embedding_model_provider"]))
+            if item["indexing_technique"] == "high_quality":
                item_model = f"{item['embedding_model']}:{item['embedding_model_provider']}"
                if item_model in model_names:
                    item["embedding_available"] = True
--- a/api/controllers/service_api/dataset/document.py
+++ b/api/controllers/service_api/dataset/document.py
@ -18,6 +18,7 @@ from controllers.service_api.app.error import (
 from controllers.service_api.dataset.error import (
    ArchivedDocumentImmutableError,
    DocumentIndexingError,
+    InvalidMetadataError,
 )
 from controllers.service_api.wraps import DatasetApiResource, cloud_edition_billing_resource_check
 from core.errors.error import ProviderTokenNotInitError
@ -50,6 +51,8 @@ class DocumentAddByTextApi(DatasetApiResource):
            "indexing_technique", type=str, choices=Dataset.INDEXING_TECHNIQUE_LIST, nullable=False, location="json"
        )
        parser.add_argument("retrieval_model", type=dict, required=False, nullable=False, location="json")
+        parser.add_argument("doc_type", type=str, required=False, nullable=True, location="json")
+        parser.add_argument("doc_metadata", type=dict, required=False, nullable=True, location="json")

        args = parser.parse_args()
        dataset_id = str(dataset_id)
@ -62,6 +65,28 @@ class DocumentAddByTextApi(DatasetApiResource):
        if not dataset.indexing_technique and not args["indexing_technique"]:
            raise ValueError("indexing_technique is required.")

+        # Validate metadata if provided
+        if args.get("doc_type") or args.get("doc_metadata"):
+            if not args.get("doc_type") or not args.get("doc_metadata"):
+                raise InvalidMetadataError("Both doc_type and doc_metadata must be provided when adding metadata")
+
+            if args["doc_type"] not in DocumentService.DOCUMENT_METADATA_SCHEMA:
+                raise InvalidMetadataError(
+                    "Invalid doc_type. Must be one of: " + ", ".join(DocumentService.DOCUMENT_METADATA_SCHEMA.keys())
+                )
+
+            if not isinstance(args["doc_metadata"], dict):
+                raise InvalidMetadataError("doc_metadata must be a dictionary")
+
+            # Validate metadata schema based on doc_type
+            if args["doc_type"] != "others":
+                metadata_schema = DocumentService.DOCUMENT_METADATA_SCHEMA[args["doc_type"]]
+                for key, value in args["doc_metadata"].items():
+                    if key in metadata_schema and not isinstance(value, metadata_schema[key]):
+                        raise InvalidMetadataError(f"Invalid type for metadata field {key}")
+            # set to MetaDataConfig
+            args["metadata"] = {"doc_type": args["doc_type"], "doc_metadata": args["doc_metadata"]}
+
        text = args.get("text")
        name = args.get("name")
        if text is None or name is None:
@ -108,6 +133,8 @@ class DocumentUpdateByTextApi(DatasetApiResource):
            "doc_language", type=str, default="English", required=False, nullable=False, location="json"
        )
        parser.add_argument("retrieval_model", type=dict, required=False, nullable=False, location="json")
+        parser.add_argument("doc_type", type=str, required=False, nullable=True, location="json")
+        parser.add_argument("doc_metadata", type=dict, required=False, nullable=True, location="json")
        args = parser.parse_args()
        dataset_id = str(dataset_id)
        tenant_id = str(tenant_id)
@ -119,6 +146,29 @@ class DocumentUpdateByTextApi(DatasetApiResource):
        # indexing_technique is already set in dataset since this is an update
        args["indexing_technique"] = dataset.indexing_technique

+        # Validate metadata if provided
+        if args.get("doc_type") or args.get("doc_metadata"):
+            if not args.get("doc_type") or not args.get("doc_metadata"):
+                raise InvalidMetadataError("Both doc_type and doc_metadata must be provided when adding metadata")
+
+            if args["doc_type"] not in DocumentService.DOCUMENT_METADATA_SCHEMA:
+                raise InvalidMetadataError(
+                    "Invalid doc_type. Must be one of: " + ", ".join(DocumentService.DOCUMENT_METADATA_SCHEMA.keys())
+                )
+
+            if not isinstance(args["doc_metadata"], dict):
+                raise InvalidMetadataError("doc_metadata must be a dictionary")
+
+            # Validate metadata schema based on doc_type
+            if args["doc_type"] != "others":
+                metadata_schema = DocumentService.DOCUMENT_METADATA_SCHEMA[args["doc_type"]]
+                for key, value in args["doc_metadata"].items():
+                    if key in metadata_schema and not isinstance(value, metadata_schema[key]):
+                        raise InvalidMetadataError(f"Invalid type for metadata field {key}")
+
+            # set to MetaDataConfig
+            args["metadata"] = {"doc_type": args["doc_type"], "doc_metadata": args["doc_metadata"]}
+
        if args["text"]:
            text = args.get("text")
            name = args.get("name")
@ -166,6 +216,29 @@ class DocumentAddByFileApi(DatasetApiResource):
        if "doc_language" not in args:
            args["doc_language"] = "English"

+        # Validate metadata if provided
+        if args.get("doc_type") or args.get("doc_metadata"):
+            if not args.get("doc_type") or not args.get("doc_metadata"):
+                raise InvalidMetadataError("Both doc_type and doc_metadata must be provided when adding metadata")
+
+            if args["doc_type"] not in DocumentService.DOCUMENT_METADATA_SCHEMA:
+                raise InvalidMetadataError(
+                    "Invalid doc_type. Must be one of: " + ", ".join(DocumentService.DOCUMENT_METADATA_SCHEMA.keys())
+                )
+
+            if not isinstance(args["doc_metadata"], dict):
+                raise InvalidMetadataError("doc_metadata must be a dictionary")
+
+            # Validate metadata schema based on doc_type
+            if args["doc_type"] != "others":
+                metadata_schema = DocumentService.DOCUMENT_METADATA_SCHEMA[args["doc_type"]]
+                for key, value in args["doc_metadata"].items():
+                    if key in metadata_schema and not isinstance(value, metadata_schema[key]):
+                        raise InvalidMetadataError(f"Invalid type for metadata field {key}")
+
+            # set to MetaDataConfig
+            args["metadata"] = {"doc_type": args["doc_type"], "doc_metadata": args["doc_metadata"]}
+
        # get dataset info
        dataset_id = str(dataset_id)
        tenant_id = str(tenant_id)
@ -233,6 +306,29 @@ class DocumentUpdateByFileApi(DatasetApiResource):
        if "doc_language" not in args:
            args["doc_language"] = "English"

+        # Validate metadata if provided
+        if args.get("doc_type") or args.get("doc_metadata"):
+            if not args.get("doc_type") or not args.get("doc_metadata"):
+                raise InvalidMetadataError("Both doc_type and doc_metadata must be provided when adding metadata")
+
+            if args["doc_type"] not in DocumentService.DOCUMENT_METADATA_SCHEMA:
+                raise InvalidMetadataError(
+                    "Invalid doc_type. Must be one of: " + ", ".join(DocumentService.DOCUMENT_METADATA_SCHEMA.keys())
+                )
+
+            if not isinstance(args["doc_metadata"], dict):
+                raise InvalidMetadataError("doc_metadata must be a dictionary")
+
+            # Validate metadata schema based on doc_type
+            if args["doc_type"] != "others":
+                metadata_schema = DocumentService.DOCUMENT_METADATA_SCHEMA[args["doc_type"]]
+                for key, value in args["doc_metadata"].items():
+                    if key in metadata_schema and not isinstance(value, metadata_schema[key]):
+                        raise InvalidMetadataError(f"Invalid type for metadata field {key}")
+
+            # set to MetaDataConfig
+            args["metadata"] = {"doc_type": args["doc_type"], "doc_metadata": args["doc_metadata"]}
+
        # get dataset info
        dataset_id = str(dataset_id)
        tenant_id = str(tenant_id)
--- a/api/controllers/service_api/dataset/metadata.py
+++ b/api/controllers/service_api/dataset/metadata.py
@ -1,126 +0,0 @@
-from flask_login import current_user  # type: ignore  # type: ignore
-from flask_restful import marshal, reqparse  # type: ignore
-from werkzeug.exceptions import NotFound
-
-from controllers.service_api import api
-from controllers.service_api.wraps import DatasetApiResource
-from fields.dataset_fields import dataset_metadata_fields
-from services.dataset_service import DatasetService
-from services.entities.knowledge_entities.knowledge_entities import (
-    MetadataArgs,
-    MetadataOperationData,
-)
-from services.metadata_service import MetadataService
-
-
-def _validate_name(name):
-    if not name or len(name) < 1 or len(name) > 40:
-        raise ValueError("Name must be between 1 to 40 characters.")
-    return name
-
-
-def _validate_description_length(description):
-    if len(description) > 400:
-        raise ValueError("Description cannot exceed 400 characters.")
-    return description
-
-
-class DatasetMetadataCreateServiceApi(DatasetApiResource):
-    def post(self, tenant_id, dataset_id):
-        parser = reqparse.RequestParser()
-        parser.add_argument("type", type=str, required=True, nullable=True, location="json")
-        parser.add_argument("name", type=str, required=True, nullable=True, location="json")
-        args = parser.parse_args()
-        metadata_args = MetadataArgs(**args)
-
-        dataset_id_str = str(dataset_id)
-        dataset = DatasetService.get_dataset(dataset_id_str)
-        if dataset is None:
-            raise NotFound("Dataset not found.")
-        DatasetService.check_dataset_permission(dataset, current_user)
-
-        metadata = MetadataService.create_metadata(dataset_id_str, metadata_args)
-        return marshal(metadata, dataset_metadata_fields), 201
-
-    def get(self, tenant_id, dataset_id):
-        dataset_id_str = str(dataset_id)
-        dataset = DatasetService.get_dataset(dataset_id_str)
-        if dataset is None:
-            raise NotFound("Dataset not found.")
-        return MetadataService.get_dataset_metadatas(dataset), 200
-
-
-class DatasetMetadataServiceApi(DatasetApiResource):
-    def patch(self, tenant_id, dataset_id, metadata_id):
-        parser = reqparse.RequestParser()
-        parser.add_argument("name", type=str, required=True, nullable=True, location="json")
-        args = parser.parse_args()
-
-        dataset_id_str = str(dataset_id)
-        metadata_id_str = str(metadata_id)
-        dataset = DatasetService.get_dataset(dataset_id_str)
-        if dataset is None:
-            raise NotFound("Dataset not found.")
-        DatasetService.check_dataset_permission(dataset, current_user)
-
-        metadata = MetadataService.update_metadata_name(dataset_id_str, metadata_id_str, args.get("name"))
-        return marshal(metadata, dataset_metadata_fields), 200
-
-    def delete(self, tenant_id, dataset_id, metadata_id):
-        dataset_id_str = str(dataset_id)
-        metadata_id_str = str(metadata_id)
-        dataset = DatasetService.get_dataset(dataset_id_str)
-        if dataset is None:
-            raise NotFound("Dataset not found.")
-        DatasetService.check_dataset_permission(dataset, current_user)
-
-        MetadataService.delete_metadata(dataset_id_str, metadata_id_str)
-        return 200
-
-
-class DatasetMetadataBuiltInFieldServiceApi(DatasetApiResource):
-    def get(self, tenant_id):
-        built_in_fields = MetadataService.get_built_in_fields()
-        return {"fields": built_in_fields}, 200
-
-
-class DatasetMetadataBuiltInFieldActionServiceApi(DatasetApiResource):
-    def post(self, tenant_id, dataset_id, action):
-        dataset_id_str = str(dataset_id)
-        dataset = DatasetService.get_dataset(dataset_id_str)
-        if dataset is None:
-            raise NotFound("Dataset not found.")
-        DatasetService.check_dataset_permission(dataset, current_user)
-
-        if action == "enable":
-            MetadataService.enable_built_in_field(dataset)
-        elif action == "disable":
-            MetadataService.disable_built_in_field(dataset)
-        return 200
-
-
-class DocumentMetadataEditServiceApi(DatasetApiResource):
-    def post(self, tenant_id, dataset_id):
-        dataset_id_str = str(dataset_id)
-        dataset = DatasetService.get_dataset(dataset_id_str)
-        if dataset is None:
-            raise NotFound("Dataset not found.")
-        DatasetService.check_dataset_permission(dataset, current_user)
-
-        parser = reqparse.RequestParser()
-        parser.add_argument("operation_data", type=list, required=True, nullable=True, location="json")
-        args = parser.parse_args()
-        metadata_args = MetadataOperationData(**args)
-
-        MetadataService.update_documents_metadata(dataset, metadata_args)
-
-        return 200
-
-
-api.add_resource(DatasetMetadataCreateServiceApi, "/datasets/<uuid:dataset_id>/metadata")
-api.add_resource(DatasetMetadataServiceApi, "/datasets/<uuid:dataset_id>/metadata/<uuid:metadata_id>")
-api.add_resource(DatasetMetadataBuiltInFieldServiceApi, "/datasets/metadata/built-in")
-api.add_resource(
-    DatasetMetadataBuiltInFieldActionServiceApi, "/datasets/<uuid:dataset_id>/metadata/built-in/<string:action>"
-)
-api.add_resource(DocumentMetadataEditServiceApi, "/datasets/<uuid:dataset_id>/documents/metadata")
--- a/api/controllers/web/workflow.py
+++ b/api/controllers/web/workflow.py
@ -11,7 +11,6 @@ from controllers.web.error import (
    ProviderNotInitializeError,
    ProviderQuotaExceededError,
 )
-from controllers.web.error import InvokeRateLimitError as InvokeRateLimitHttpError
 from controllers.web.wraps import WebApiResource
 from core.app.apps.base_app_queue_manager import AppQueueManager
 from core.app.entities.app_invoke_entities import InvokeFrom
@ -24,7 +23,6 @@ from core.model_runtime.errors.invoke import InvokeError
 from libs import helper
 from models.model import App, AppMode, EndUser
 from services.app_generate_service import AppGenerateService
-from services.errors.llm import InvokeRateLimitError

 logger = logging.getLogger(__name__)

@ -57,11 +55,9 @@ class WorkflowRunApi(WebApiResource):
            raise ProviderModelCurrentlyNotSupportError()
        except InvokeError as e:
            raise CompletionRequestError(e.description)
-        except InvokeRateLimitError as ex:
-            raise InvokeRateLimitHttpError(ex.description)
        except ValueError as e:
            raise e
-        except Exception:
+        except Exception as e:
            logging.exception("internal server error.")
            raise InternalServerError()

--- a/api/core/llm_generator/llm_generator.py
+++ b/api/core/llm_generator/llm_generator.py
@ -10,6 +10,7 @@ from core.llm_generator.prompts import (
    GENERATOR_QA_PROMPT,
    JAVASCRIPT_CODE_GENERATOR_PROMPT_TEMPLATE,
    PYTHON_CODE_GENERATOR_PROMPT_TEMPLATE,
+    STRUCTURED_OUTPUT_GENERATE_TEMPLATE,
    WORKFLOW_RULE_CONFIG_PROMPT_GENERATE_TEMPLATE,
 )
 from core.model_manager import ModelManager
@ -340,3 +341,43 @@ class LLMGenerator:

        answer = cast(str, response.message.content)
        return answer.strip()
+
+    @classmethod
+    def generate_structured_output(cls, tenant_id: str, instruction: str, model_config: dict, max_tokens: int):
+        prompt_template = PromptTemplateParser(STRUCTURED_OUTPUT_GENERATE_TEMPLATE)
+
+        prompt = prompt_template.format(
+            inputs={
+                "INSTRUCTION": instruction,
+            },
+            remove_template_variables=False,
+        )
+
+        model_manager = ModelManager()
+        model_instance = model_manager.get_model_instance(
+            tenant_id=tenant_id,
+            model_type=ModelType.LLM,
+            provider=model_config.get("provider", ""),
+            model=model_config.get("name", ""),
+        )
+
+        prompt_messages = [UserPromptMessage(content=prompt)]
+        model_parameters = {"max_tokens": max_tokens, "temperature": 0.01}
+
+        try:
+            response = cast(
+                LLMResult,
+                model_instance.invoke_llm(
+                    prompt_messages=list(prompt_messages), model_parameters=model_parameters, stream=False
+                ),
+            )
+
+            generated_json_schema = cast(str, response.message.content)
+            return {"output": generated_json_schema, "error": ""}
+
+        except InvokeError as e:
+            error = str(e)
+            return {"output": "", "error": f"Failed to generate JSON Schema. Error: {error}"}
+        except Exception as e:
+            logging.exception(f"Failed to invoke LLM model, model: {model_config.get('name')}")
+            return {"output": "", "error": f"An unexpected error occurred: {str(e)}"}
--- a/api/core/llm_generator/prompts.py
+++ b/api/core/llm_generator/prompts.py
@ -220,3 +220,112 @@ Here is the task description: {{INPUT_TEXT}}

 You just need to generate the output
 """  # noqa: E501
+
+STRUCTURED_OUTPUT_GENERATE_TEMPLATE = """
+Your task is to convert simple user descriptions into properly formatted JSON Schema definitions. When a user describes data fields they need, generate a complete, valid JSON Schema that accurately represents those fields with appropriate types and requirements.
+
+## Instructions:
+
+1. Analyze the user's description of their data needs
+2. Identify each property that should be included in the schema
+3. Determine the appropriate data type for each property
+4. Decide which properties should be required
+5. Generate a complete JSON Schema with proper syntax
+6. Include appropriate constraints when specified (min/max values, patterns, formats)
+7. Provide ONLY the JSON Schema without any additional explanations, comments, or markdown formatting.
+8. DO NOT use markdown code blocks (``` or ``` json). Return the raw JSON Schema directly.
+
+## Examples:
+
+### Example 1:
+**User Input:** I need name and age
+**JSON Schema Output:**
+{
+  "type": "object",
+  "properties": {
+    "name": { "type": "string" },
+    "age": { "type": "number" }
+  },
+  "required": ["name", "age"]
+}
+
+### Example 2:
+**User Input:** I want to store information about books including title, author, publication year and optional page count
+**JSON Schema Output:**
+{
+  "type": "object",
+  "properties": {
+    "title": { "type": "string" },
+    "author": { "type": "string" },
+    "publicationYear": { "type": "integer" },
+    "pageCount": { "type": "integer" }
+  },
+  "required": ["title", "author", "publicationYear"]
+}
+
+### Example 3:
+**User Input:** Create a schema for user profiles with email, password, and age (must be at least 18)
+**JSON Schema Output:**
+{
+  "type": "object",
+  "properties": {
+    "email": { 
+      "type": "string",
+      "format": "email"
+    },
+    "password": { 
+      "type": "string",
+      "minLength": 8
+    },
+    "age": { 
+      "type": "integer",
+      "minimum": 18
+    }
+  },
+  "required": ["email", "password", "age"]
+}
+
+### Example 4:
+**User Input:** I need album schema, the ablum has songs, and each song has name, duration, and artist.
+**JSON Schema Output:**
+{
+    "type": "object",
+    "properties": {
+        "properties": {
+            "songs": {
+                "type": "array",
+                "items": {
+                    "type": "object",
+                    "properties": {
+                        "name": {
+                            "type": "string"
+                        },
+                        "id": {
+                            "type": "string"
+                        },
+                        "duration": {
+                            "type": "string"
+                        },
+                        "aritst": {
+                            "type": "string"
+                        }
+                    },
+                    "required": [
+                        "name",
+                        "id",
+                        "duration",
+                        "aritst"
+                    ]
+                }
+            }
+        }
+    },
+    "required": [
+        "songs"
+    ]
+}
+
+Now, generate a JSON Schema based on my description:
+**User Input:** {{INSTRUCTION}}
+**JSON Schema Output:**
+"""  # noqa: E501
--- a/api/core/model_manager.py
+++ b/api/core/model_manager.py
@ -1,7 +1,11 @@
+import json
 import logging
 from collections.abc import Callable, Generator, Iterable, Sequence
 from typing import IO, Any, Literal, Optional, Union, cast, overload

+from packaging import version
+from packaging.version import Version
+
 from configs import dify_config
 from core.entities.embedding_type import EmbeddingInputType
 from core.entities.provider_configuration import ProviderConfiguration, ProviderModelBundle
@ -9,8 +13,8 @@ from core.entities.provider_entities import ModelLoadBalancingConfiguration
 from core.errors.error import ProviderTokenNotInitError
 from core.model_runtime.callbacks.base_callback import Callback
 from core.model_runtime.entities.llm_entities import LLMResult
-from core.model_runtime.entities.message_entities import PromptMessage, PromptMessageTool
-from core.model_runtime.entities.model_entities import ModelType
+from core.model_runtime.entities.message_entities import PromptMessage, PromptMessageTool, UserPromptMessage
+from core.model_runtime.entities.model_entities import AIModelEntity, ModelType
 from core.model_runtime.entities.rerank_entities import RerankResult
 from core.model_runtime.entities.text_embedding_entities import TextEmbeddingResult
 from core.model_runtime.errors.invoke import InvokeAuthorizationError, InvokeConnectionError, InvokeRateLimitError
@ -20,7 +24,9 @@ from core.model_runtime.model_providers.__base.rerank_model import RerankModel
 from core.model_runtime.model_providers.__base.speech2text_model import Speech2TextModel
 from core.model_runtime.model_providers.__base.text_embedding_model import TextEmbeddingModel
 from core.model_runtime.model_providers.__base.tts_model import TTSModel
+from core.model_runtime.model_providers.model_provider_factory import ModelProviderFactory
 from core.provider_manager import ProviderManager
+from core.workflow.utils.structured_output.prompt import STRUCTURED_OUTPUT_PROMPT
 from extensions.ext_redis import redis_client
 from models.provider import ProviderType

@ -160,6 +166,13 @@ class ModelInstance:
            raise Exception("Model type instance is not LargeLanguageModel")

        self.model_type_instance = cast(LargeLanguageModel, self.model_type_instance)
+        if model_parameters and model_parameters.get("structured_output_schema"):
+            result = self._handle_structured_output(
+                model_parameters=model_parameters,
+                prompt=prompt_messages,
+            )
+            prompt_messages = result["prompt"]
+            model_parameters = result["parameters"]
        return cast(
            Union[LLMResult, Generator],
            self._round_robin_invoke(
@ -410,6 +423,83 @@ class ModelInstance:
            model=self.model, credentials=self.credentials, language=language
        )

+    def _handle_structured_output(self, model_parameters: dict, prompt: Sequence[PromptMessage]) -> dict:
+        """
+        Handle structured output
+
+        :param model_parameters: model parameters
+        :param provider: provider name
+        :return: updated model parameters
+        """
+        structured_output_schema = model_parameters.pop("structured_output_schema")
+        if not structured_output_schema:
+            raise ValueError("Please provide a valid structured output schema")
+
+        try:
+            schema = json.loads(structured_output_schema)
+        except json.JSONDecodeError:
+            raise ValueError("structured_output_schema is not valid JSON format")
+
+        model_schema = self._fetch_model_schema(self.provider, self.model_type_instance.model_type, self.model)
+        if not model_schema:
+            raise ValueError("Unable to fetch model schema")
+
+        supported_schema_keys = ["json_schema", "format"]
+        rules = model_schema.parameter_rules
+        schema_key = next((rule.name for rule in rules if rule.name in supported_schema_keys), None)
+
+        if schema_key == "json_schema":
+            name = {"name": "llm_response"}
+            if "gemini" in self.model:
+
+                def remove_additional_properties(schema):
+                    if isinstance(schema, dict):
+                        for key, value in list(schema.items()):
+                            if key == "additionalProperties":
+                                del schema[key]
+                            else:
+                                remove_additional_properties(value)
+
+                remove_additional_properties(schema)
+                schema_json = schema
+            else:
+                schema_json = {"schema": schema, **name}
+
+            model_parameters["json_schema"] = json.dumps(schema_json, ensure_ascii=False)
+
+        elif schema_key == "format" and self.plugin_version > version.parse("0.0.3"):
+            model_parameters["format"] = json.dumps(schema, ensure_ascii=False)
+        else:
+            content = prompt[-1].content if isinstance(prompt[-1].content, str) else ""
+            structured_output_prompt = STRUCTURED_OUTPUT_PROMPT.replace("{{schema}}", structured_output_schema).replace(
+                "{{question}}", content
+            )
+            structured_output_prompt_message = UserPromptMessage(content=structured_output_prompt)
+            prompt = list(prompt[:-1]) + [structured_output_prompt_message]
+            return {"prompt": prompt, "parameters": model_parameters}
+        for rule in rules:
+            if rule.name == "response_format":
+                model_parameters["response_format"] = "JSON" if "JSON" in rule.options else "json_schema"
+        return {"prompt": prompt, "parameters": model_parameters}
+
+    def _fetch_model_schema(self, provider: str, model_type: ModelType, model: str) -> AIModelEntity | None:
+        """
+        Fetch model schema
+        """
+        model_provider = ModelProviderFactory(self.model_type_instance.tenant_id)
+        return model_provider.get_model_schema(
+            provider=provider, model_type=model_type, model=model, credentials=self.credentials
+        )
+
+    @property
+    def plugin_version(self) -> Version:
+        """
+        Check if the model is a plugin model
+        """
+        return version.parse(
+            self.model_type_instance.plugin_model_provider.plugin_unique_identifier.split(":")[1].split("@")[0]
+        )
+

 class ModelManager:
    def __init__(self) -> None:
--- a/api/core/model_runtime/README_CN.md
+++ b/api/core/model_runtime/README_CN.md
@ -10,7 +10,7 @@
 - 支持 5 种模型类型的能力调用

  - `LLM` - LLM 文本补全、对话，预计算 tokens 能力
-  - `Text Embedding Model` - 文本 Embedding ，预计算 tokens 能力
+  - `Text Embedidng Model` - 文本 Embedding ，预计算 tokens 能力
  - `Rerank Model` - 分段 Rerank 能力
  - `Speech-to-text Model` - 语音转文本能力
  - `Text-to-speech Model` - 文本转语音能力
--- a/api/core/rag/datasource/vdb/elasticsearch/elasticsearch_vector.py
+++ b/api/core/rag/datasource/vdb/elasticsearch/elasticsearch_vector.py
@ -196,8 +196,7 @@ class ElasticSearchVector(BaseVector):
                        Field.METADATA_KEY.value: {
                            "type": "object",
                            "properties": {
-                                "doc_id": {"type": "keyword"},  # Map doc_id to keyword type
-                                "document_id": {"type": "keyword"},  # Map doc_id to keyword type
+                                "doc_id": {"type": "keyword"}  # Map doc_id to keyword type
                            },
                        },
                    }
--- a/api/core/rag/datasource/vdb/field.py
+++ b/api/core/rag/datasource/vdb/field.py
@ -11,4 +11,3 @@ class Field(Enum):
    TEXT_KEY = "text"
    PRIMARY_KEY = "id"
    DOC_ID = "metadata.doc_id"
-    DOCUMENT_ID = "metadata.document_id"
--- a/api/core/rag/datasource/vdb/qdrant/qdrant_vector.py
+++ b/api/core/rag/datasource/vdb/qdrant/qdrant_vector.py
@ -119,7 +119,7 @@ class QdrantVector(BaseVector):
                    max_indexing_threads=0,
                    on_disk=False,
                )
-                self._client.create_collection(
+                self._client.recreate_collection(
                    collection_name=collection_name,
                    vectors_config=vectors_config,
                    hnsw_config=hnsw_config,
@ -134,10 +134,6 @@ class QdrantVector(BaseVector):
                self._client.create_payload_index(
                    collection_name, Field.DOC_ID.value, field_schema=PayloadSchemaType.KEYWORD
                )
-                # create document_id payload index
-                self._client.create_payload_index(
-                    collection_name, Field.DOCUMENT_ID.value, field_schema=PayloadSchemaType.KEYWORD
-                )
                # create full text index
                text_index_params = TextIndexParams(
                    type=TextIndexType.TEXT,
--- a/api/core/rag/datasource/vdb/tidb_on_qdrant/tidb_on_qdrant_vector.py
+++ b/api/core/rag/datasource/vdb/tidb_on_qdrant/tidb_on_qdrant_vector.py
@ -129,7 +129,7 @@ class TidbOnQdrantVector(BaseVector):
                    max_indexing_threads=0,
                    on_disk=False,
                )
-                self._client.create_collection(
+                self._client.recreate_collection(
                    collection_name=collection_name,
                    vectors_config=vectors_config,
                    hnsw_config=hnsw_config,
@ -144,10 +144,6 @@ class TidbOnQdrantVector(BaseVector):
                self._client.create_payload_index(
                    collection_name, Field.DOC_ID.value, field_schema=PayloadSchemaType.KEYWORD
                )
-                # create document_id payload index
-                self._client.create_payload_index(
-                    collection_name, Field.DOCUMENT_ID.value, field_schema=PayloadSchemaType.KEYWORD
-                )
                # create full text index
                text_index_params = TextIndexParams(
                    type=TextIndexType.TEXT,
@ -322,17 +318,26 @@ class TidbOnQdrantVector(BaseVector):
    def search_by_vector(self, query_vector: list[float], **kwargs: Any) -> list[Document]:
        from qdrant_client.http import models

-        filter = None
-        document_ids_filter = kwargs.get("document_ids_filter")
-        if document_ids_filter:
        filter = models.Filter(
            must=[
                models.FieldCondition(
-                        key="metadata.document_id",
-                        match=models.MatchAny(any=document_ids_filter),
-                    )
+                    key="group_id",
+                    match=models.MatchValue(value=self._group_id),
+                ),
            ],
        )
+        document_ids_filter = kwargs.get("document_ids_filter")
+        if document_ids_filter:
+            should_conditions = []
+            for document_id_filter in document_ids_filter:
+                should_conditions.append(
+                    models.FieldCondition(
+                        key="metadata.document_id",
+                        match=models.MatchValue(value=document_id_filter),
+                    )
+                )
+            if should_conditions:
+                filter.should = should_conditions  # type: ignore
        results = self._client.search(
            collection_name=self._collection_name,
            query_vector=query_vector,
@ -367,17 +372,26 @@ class TidbOnQdrantVector(BaseVector):
        """
        from qdrant_client.http import models

-        scroll_filter = None
-        document_ids_filter = kwargs.get("document_ids_filter")
-        if document_ids_filter:
        scroll_filter = models.Filter(
            must=[
                models.FieldCondition(
-                        key="metadata.document_id",
-                        match=models.MatchAny(any=document_ids_filter),
+                    key="page_content",
+                    match=models.MatchText(text=query),
                )
            ]
        )
+        document_ids_filter = kwargs.get("document_ids_filter")
+        if document_ids_filter:
+            should_conditions = []
+            for document_id_filter in document_ids_filter:
+                should_conditions.append(
+                    models.FieldCondition(
+                        key="metadata.document_id",
+                        match=models.MatchValue(value=document_id_filter),
+                    )
+                )
+            if should_conditions:
+                scroll_filter.should = should_conditions  # type: ignore
        response = self._client.scroll(
            collection_name=self._collection_name,
            scroll_filter=scroll_filter,
--- a/api/core/rag/datasource/vdb/tidb_vector/tidb_vector.py
+++ b/api/core/rag/datasource/vdb/tidb_vector/tidb_vector.py
@ -105,12 +105,10 @@ class TiDBVector(BaseVector):
                        text TEXT NOT NULL,
                        meta JSON NOT NULL,
                        doc_id VARCHAR(64) AS (JSON_UNQUOTE(JSON_EXTRACT(meta, '$.doc_id'))) STORED,
-                        document_id VARCHAR(64) AS (JSON_UNQUOTE(JSON_EXTRACT(meta, '$.document_id'))) STORED,
                        vector VECTOR<FLOAT>({dimension}) NOT NULL,
                        create_time DATETIME DEFAULT CURRENT_TIMESTAMP,
                        update_time DATETIME DEFAULT CURRENT_TIMESTAMP ON UPDATE CURRENT_TIMESTAMP,
                        KEY (doc_id),
-                        KEY (document_id),
                        VECTOR INDEX idx_vector (({tidb_dist_func}(vector))) USING HNSW
                    );
                """)
--- a/api/core/rag/datasource/vdb/weaviate/weaviate_vector.py
+++ b/api/core/rag/datasource/vdb/weaviate/weaviate_vector.py
@ -189,10 +189,7 @@ class WeaviateVector(BaseVector):
        vector = {"vector": query_vector}
        document_ids_filter = kwargs.get("document_ids_filter")
        if document_ids_filter:
-            operands = []
-            for document_id_filter in document_ids_filter:
-                operands.append({"path": ["document_id"], "operator": "Equal", "valueText": document_id_filter})
-            where_filter = {"operator": "Or", "operands": operands}
+            where_filter = {"operator": "ContainsAny", "path": ["document_id"], "valueTextArray": document_ids_filter}
            query_obj = query_obj.with_where(where_filter)
        result = (
            query_obj.with_near_vector(vector)
@ -240,10 +237,7 @@ class WeaviateVector(BaseVector):
        query_obj = self._client.query.get(collection_name, properties)
        document_ids_filter = kwargs.get("document_ids_filter")
        if document_ids_filter:
-            operands = []
-            for document_id_filter in document_ids_filter:
-                operands.append({"path": ["document_id"], "operator": "Equal", "valueText": document_id_filter})
-            where_filter = {"operator": "Or", "operands": operands}
+            where_filter = {"operator": "ContainsAny", "path": ["document_id"], "valueTextArray": document_ids_filter}
            query_obj = query_obj.with_where(where_filter)
        query_obj = query_obj.with_additional(["vector"])
        properties = ["text"]
--- a/api/core/rag/retrieval/dataset_retrieval.py
+++ b/api/core/rag/retrieval/dataset_retrieval.py
@ -870,7 +870,7 @@ class DatasetRetrieval:
                for condition in metadata_filtering_conditions.conditions:  # type: ignore
                    metadata_name = condition.name
                    expected_value = condition.value
-                    if expected_value is not None or condition.comparison_operator in ("empty", "not empty"):
+                    if expected_value or condition.comparison_operator in ("empty", "not empty"):
                        if isinstance(expected_value, str):
                            expected_value = self._replace_metadata_filter_value(expected_value, inputs)
                        filters = self._process_metadata_filter_func(
--- a/api/core/workflow/nodes/agent/agent_node.py
+++ b/api/core/workflow/nodes/agent/agent_node.py
@ -1,4 +1,4 @@
-import json
+from ast import literal_eval
 from collections.abc import Generator, Mapping, Sequence
 from typing import Any, cast

@ -143,23 +143,15 @@ class AgentNode(ToolNode):
                    raise ValueError(f"Variable {agent_input.value} does not exist")
                parameter_value = variable.value
            elif agent_input.type in {"mixed", "constant"}:
-                # variable_pool.convert_template expects a string template,
-                # but if passing a dict, convert to JSON string first before rendering
-                try:
-                    parameter_value = json.dumps(agent_input.value, ensure_ascii=False)
-                except TypeError:
-                    parameter_value = str(agent_input.value)
-                segment_group = variable_pool.convert_template(parameter_value)
+                segment_group = variable_pool.convert_template(str(agent_input.value))
                parameter_value = segment_group.log if for_log else segment_group.text
-                # variable_pool.convert_template returns a string,
-                # so we need to convert it back to a dictionary
-                try:
-                    parameter_value = json.loads(parameter_value)
-                except json.JSONDecodeError:
-                    parameter_value = parameter_value
            else:
                raise ValueError(f"Unknown agent input type '{agent_input.type}'")
-            value = parameter_value
+            value = parameter_value.strip()
+            if (parameter_value.startswith("{") and parameter_value.endswith("}")) or (
+                parameter_value.startswith("[") and parameter_value.endswith("]")
+            ):
+                value = literal_eval(parameter_value)  # transform string to python object
            if parameter.type == "array[tools]":
                value = cast(list[dict[str, Any]], value)
                value = [tool for tool in value if tool.get("enabled", False)]
--- a/api/core/workflow/nodes/answer/base_stream_processor.py
+++ b/api/core/workflow/nodes/answer/base_stream_processor.py
@ -65,7 +65,7 @@ class StreamProcessor(ABC):
                    # Issues: #13626
                    if (
                        finished_node_id in self.graph.node_parallel_mapping
-                        and edge.target_node_id not in self.graph.node_parallel_mapping
+                        and edge.target_node_id not in self.graph.parallel_mapping
                    ):
                        continue
                    unreachable_first_node_ids.append(edge.target_node_id)
--- a/api/core/workflow/nodes/knowledge_retrieval/knowledge_retrieval_node.py
+++ b/api/core/workflow/nodes/knowledge_retrieval/knowledge_retrieval_node.py
@ -356,7 +356,7 @@ class KnowledgeRetrievalNode(LLMNode):
                    for condition in node_data.metadata_filtering_conditions.conditions:  # type: ignore
                        metadata_name = condition.name
                        expected_value = condition.value
-                        if expected_value is not None or condition.comparison_operator in ("empty", "not empty"):
+                        if expected_value or condition.comparison_operator in ("empty", "not empty"):
                            if isinstance(expected_value, str):
                                expected_value = self.graph_runtime_state.variable_pool.convert_template(
                                    expected_value
--- a/api/core/workflow/nodes/llm/entities.py
+++ b/api/core/workflow/nodes/llm/entities.py
@ -65,6 +65,8 @@ class LLMNodeData(BaseNodeData):
    memory: Optional[MemoryConfig] = None
    context: ContextConfig
    vision: VisionConfig = Field(default_factory=VisionConfig)
+    structured_output: dict | None = None
+    structured_output_enabled: bool = False

    @field_validator("prompt_config", mode="before")
    @classmethod
--- a/api/core/workflow/nodes/llm/node.py
+++ b/api/core/workflow/nodes/llm/node.py
@ -1,5 +1,6 @@
 import json
 import logging
+import re
 from collections.abc import Generator, Mapping, Sequence
 from datetime import UTC, datetime
 from typing import TYPE_CHECKING, Any, Optional, cast
@ -57,6 +58,7 @@ from core.workflow.nodes.event import (
    RunRetrieverResourceEvent,
    RunStreamChunkEvent,
 )
+from core.workflow.utils.structured_output.utils import parse_partial_json
 from core.workflow.utils.variable_template_parser import VariableTemplateParser
 from extensions.ext_database import db
 from models.model import Conversation
@ -192,7 +194,19 @@ class LLMNode(BaseNode[LLMNodeData]):
                    self.deduct_llm_quota(tenant_id=self.tenant_id, model_instance=model_instance, usage=usage)
                    break
            outputs = {"text": result_text, "usage": jsonable_encoder(usage), "finish_reason": finish_reason}
-
+            if self.node_data.structured_output_enabled and self.node_data.structured_output:
+                structured_output = {}
+                try:
+                    structured_output = parse_partial_json(result_text)
+                except json.JSONDecodeError:
+                    # Try to find JSON string within triple backticks
+                    _json_markdown_re = re.compile(r"```(json)?(.*)", re.DOTALL)
+                    match = _json_markdown_re.search(result_text)
+                    # If no match found, assume the entire string is a JSON string
+                    # Else, use the content within the backticks
+                    json_str = result_text if match is None else match.group(2)
+                    structured_output = parse_partial_json(json_str)
+                outputs["structured_output"] = structured_output
            yield RunCompletedEvent(
                run_result=NodeRunResult(
                    status=WorkflowNodeExecutionStatus.SUCCEEDED,
@ -499,6 +513,14 @@ class LLMNode(BaseNode[LLMNodeData]):

        # model config
        completion_params = node_data_model.completion_params
+        if (
+            isinstance(self.node_data, LLMNodeData)
+            and self.node_data.structured_output_enabled
+            and self.node_data.structured_output
+        ):
+            completion_params["structured_output_schema"] = json.dumps(
+                self.node_data.structured_output.get("schema", {}), ensure_ascii=False
+            )
        stop = []
        if "stop" in completion_params:
            stop = completion_params["stop"]
--- a/api/core/workflow/utils/structured_output/prompt.py
+++ b/api/core/workflow/utils/structured_output/prompt.py
@ -0,0 +1,21 @@
+STRUCTURED_OUTPUT_PROMPT = """
+You’re a helpful AI assistant. You could answer questions and output in JSON format.
+
+eg1:
+    Here is the JSON schema:
+    {"additionalProperties": false, "properties": {"age": {"type": "number"}, "name": {"type": "string"}}, "required": ["name", "age"], "type": "object"}
+
+    Here is the user's question:
+    My name is John Doe and I am 30 years old.
+
+    output:
+    {"name": "John Doe", "age": 30}
+    
+Here is the JSON schema:
+{{schema}}
+
+Here is the user's question:
+{{question}}
+output:
+
+"""  # noqa: E501
--- a/api/core/workflow/utils/structured_output/utils.py
+++ b/api/core/workflow/utils/structured_output/utils.py
@ -0,0 +1,81 @@
+import json
+from typing import Any
+
+
+def parse_partial_json(s: str, *, strict: bool = False) -> Any:
+    """Parse a JSON string that may be missing closing braces.
+
+    Args:
+        s: The JSON string to parse.
+        strict: Whether to use strict parsing. Defaults to False.
+
+    Returns:
+        The parsed JSON object as a Python dictionary.
+    """
+    # Attempt to parse the string as-is.
+    try:
+        return json.loads(s, strict=strict)
+    except json.JSONDecodeError:
+        pass
+
+    # Initialize variables.
+    new_chars = []
+    stack = []
+    is_inside_string = False
+    escaped = False
+
+    # Process each character in the string one at a time.
+    for char in s:
+        if is_inside_string:
+            if char == '"' and not escaped:
+                is_inside_string = False
+            elif char == "\n" and not escaped:
+                char = "\\n"  # Replace the newline character with the escape sequence.
+            elif char == "\\":
+                escaped = not escaped
+            else:
+                escaped = False
+        else:
+            if char == '"':
+                is_inside_string = True
+                escaped = False
+            elif char == "{":
+                stack.append("}")
+            elif char == "[":
+                stack.append("]")
+            elif char in ("}", "]"):
+                if stack and stack[-1] == char:
+                    stack.pop()
+                else:
+                    # Mismatched closing character; the input is malformed.
+                    return {}
+
+        # Append the processed character to the new string.
+        new_chars.append(char)
+
+    # If we're still inside a string at the end of processing,
+    # we need to close the string.
+    if is_inside_string:
+        if escaped:  # Remoe unterminated escape character
+            new_chars.pop()
+        new_chars.append('"')
+
+    # Reverse the stack to get the closing characters.
+    stack.reverse()
+
+    # Try to parse mods of string until we succeed or run out of characters.
+    while new_chars:
+        # Close any remaining open structures in the reverse
+        # order that they were opened.
+        # Attempt to parse the modified string as JSON.
+        try:
+            return json.loads("".join(new_chars + stack), strict=strict)
+        except json.JSONDecodeError:
+            # If we still can't parse the string as JSON,
+            # try removing the last character
+            new_chars.pop()
+
+    # If we got here, we ran out of characters to remove
+    # and still couldn't parse the string as JSON, so return the parse error
+    # for the original string.
+    return json.loads(s, strict=strict)
--- a/api/extensions/ext_commands.py
+++ b/api/extensions/ext_commands.py
@ -3,7 +3,7 @@ from dify_app import DifyApp

 def init_app(app: DifyApp):
    from commands import (
-        add_qdrant_index,
+        add_qdrant_doc_id_index,
        convert_to_agent_apps,
        create_tenant,
        extract_plugins,
@ -11,7 +11,6 @@ def init_app(app: DifyApp):
        fix_app_site_missing,
        install_plugins,
        migrate_data_for_plugin,
-        old_metadata_migration,
        reset_email,
        reset_encrypt_key_pair,
        reset_password,
@ -25,7 +24,7 @@ def init_app(app: DifyApp):
        reset_encrypt_key_pair,
        vdb_migrate,
        convert_to_agent_apps,
-        add_qdrant_index,
+        add_qdrant_doc_id_index,
        create_tenant,
        upgrade_db,
        fix_app_site_missing,
@ -33,7 +32,6 @@ def init_app(app: DifyApp):
        extract_plugins,
        extract_unique_plugins,
        install_plugins,
-        old_metadata_migration,
    ]
    for cmd in cmds_to_register:
        app.cli.add_command(cmd)
--- a/api/services/account_service.py
+++ b/api/services/account_service.py
@ -785,11 +785,9 @@ class TenantService:
    @staticmethod
    def remove_member_from_tenant(tenant: Tenant, account: Account, operator: Account) -> None:
        """Remove member from tenant"""
-        if operator.id == account.id:
+        if operator.id == account.id and TenantService.check_member_permission(tenant, operator, account, "remove"):
            raise CannotOperateSelfError("Cannot operate self.")

-        TenantService.check_member_permission(tenant, operator, account, "remove")
-
        ta = TenantAccountJoin.query.filter_by(tenant_id=tenant.id, account_id=account.id).first()
        if not ta:
            raise MemberNotInTenantError("Member not in tenant.")
--- a/api/services/app_generate_service.py
+++ b/api/services/app_generate_service.py
@ -11,17 +11,13 @@ from core.app.apps.completion.app_generator import CompletionAppGenerator
 from core.app.apps.workflow.app_generator import WorkflowAppGenerator
 from core.app.entities.app_invoke_entities import InvokeFrom
 from core.app.features.rate_limiting import RateLimit
-from libs.helper import RateLimiter
 from models.model import Account, App, AppMode, EndUser
 from models.workflow import Workflow
-from services.billing_service import BillingService
 from services.errors.llm import InvokeRateLimitError
 from services.workflow_service import WorkflowService


 class AppGenerateService:
-    system_rate_limiter = RateLimiter("app_daily_rate_limiter", dify_config.APP_DAILY_RATE_LIMIT, 86400)
-
    @classmethod
    def generate(
        cls,
@ -40,19 +36,6 @@ class AppGenerateService:
        :param streaming: streaming
        :return:
        """
-        # system level rate limiter
-        if dify_config.BILLING_ENABLED:
-            # check if it's free plan
-            limit_info = BillingService.get_info(app_model.tenant_id)
-            if limit_info["subscription"]["plan"] == "sandbox":
-                if cls.system_rate_limiter.is_rate_limited(app_model.tenant_id):
-                    raise InvokeRateLimitError(
-                        "Rate limit exceeded, please upgrade your plan "
-                        f"or your RPD was {dify_config.APP_DAILY_RATE_LIMIT} requests/day"
-                    )
-                cls.system_rate_limiter.increment_rate_limit(app_model.tenant_id)
-
-        # app level rate limiter
        max_active_request = AppGenerateService._get_max_active_requests(app_model)
        rate_limit = RateLimit(app_model.id, max_active_request)
        request_id = RateLimit.gen_request_key()
--- a/api/services/dataset_service.py
+++ b/api/services/dataset_service.py
@ -46,6 +46,7 @@ from models.source import DataSourceOauthBinding
 from services.entities.knowledge_entities.knowledge_entities import (
    ChildChunkUpdateArgs,
    KnowledgeConfig,
+    MetaDataConfig,
    RerankingModel,
    RetrievalModel,
    SegmentUpdateArgs,
@ -998,6 +999,9 @@ class DocumentService:
                                document.data_source_info = json.dumps(data_source_info)
                                document.batch = batch
                                document.indexing_status = "waiting"
+                                if knowledge_config.metadata:
+                                    document.doc_type = knowledge_config.metadata.doc_type
+                                    document.metadata = knowledge_config.metadata.doc_metadata
                                db.session.add(document)
                                documents.append(document)
                                duplicate_document_ids.append(document.id)
@ -1014,6 +1018,7 @@ class DocumentService:
                            account,
                            file_name,
                            batch,
+                            knowledge_config.metadata,
                        )
                        db.session.add(document)
                        db.session.flush()
@ -1071,6 +1076,7 @@ class DocumentService:
                                    account,
                                    truncated_page_name,
                                    batch,
+                                    knowledge_config.metadata,
                                )
                                db.session.add(document)
                                db.session.flush()
@ -1111,6 +1117,7 @@ class DocumentService:
                            account,
                            document_name,
                            batch,
+                            knowledge_config.metadata,
                        )
                        db.session.add(document)
                        db.session.flush()
@ -1148,6 +1155,7 @@ class DocumentService:
        account: Account,
        name: str,
        batch: str,
+        metadata: Optional[MetaDataConfig] = None,
    ):
        document = Document(
            tenant_id=dataset.tenant_id,
@ -1172,6 +1180,9 @@ class DocumentService:
                BuiltInField.last_update_date: datetime.datetime.now(datetime.UTC).strftime("%Y-%m-%d %H:%M:%S"),
                BuiltInField.source: data_source_type,
            }
+        if metadata is not None:
+            doc_metadata.update(metadata.doc_metadata)
+            document.doc_type = metadata.doc_type
        if doc_metadata:
            document.doc_metadata = doc_metadata
        return document
@ -1286,6 +1297,10 @@ class DocumentService:
        # update document name
        if document_data.name:
            document.name = document_data.name
+        # update doc_type and doc_metadata if provided
+        if document_data.metadata is not None:
+            document.doc_metadata = document_data.metadata.doc_metadata
+            document.doc_type = document_data.metadata.doc_type
        # update document to be waiting
        document.indexing_status = "waiting"
        document.completed_at = None
--- a/api/services/entities/knowledge_entities/knowledge_entities.py
+++ b/api/services/entities/knowledge_entities/knowledge_entities.py
@ -84,22 +84,6 @@ class RerankingModel(BaseModel):
    reranking_model_name: Optional[str] = None


-class WeightVectorSetting(BaseModel):
-    vector_weight: float
-    embedding_provider_name: str
-    embedding_model_name: str
-
-
-class WeightKeywordSetting(BaseModel):
-    keyword_weight: float
-
-
-class WeightModel(BaseModel):
-    weight_type: str
-    vector_setting: Optional[WeightVectorSetting] = None
-    keyword_setting: Optional[WeightKeywordSetting] = None
-
-
 class RetrievalModel(BaseModel):
    search_method: Literal["hybrid_search", "semantic_search", "full_text_search"]
    reranking_enable: bool
@ -108,7 +92,6 @@ class RetrievalModel(BaseModel):
    top_k: int
    score_threshold_enabled: bool
    score_threshold: Optional[float] = None
-    weights: Optional[WeightModel] = None


 class MetaDataConfig(BaseModel):
@ -128,6 +111,7 @@ class KnowledgeConfig(BaseModel):
    embedding_model: Optional[str] = None
    embedding_model_provider: Optional[str] = None
    name: Optional[str] = None
+    metadata: Optional[MetaDataConfig] = None


 class SegmentUpdateArgs(BaseModel):
--- a/api/tests/unit_tests/core/workflow/graph_engine/test_graph_engine.py
+++ b/api/tests/unit_tests/core/workflow/graph_engine/test_graph_engine.py
@ -1,8 +1,5 @@
 from unittest.mock import patch

-import pytest
-from flask import Flask
-
 from core.app.entities.app_invoke_entities import InvokeFrom
 from core.workflow.entities.node_entities import NodeRunMetadataKey, NodeRunResult
 from core.workflow.entities.variable_pool import VariablePool
@ -20,20 +17,12 @@ from core.workflow.graph_engine.entities.event import (
 from core.workflow.graph_engine.entities.graph import Graph
 from core.workflow.graph_engine.entities.runtime_route_state import RouteNodeState
 from core.workflow.graph_engine.graph_engine import GraphEngine
-from core.workflow.nodes.code.code_node import CodeNode
 from core.workflow.nodes.event import RunCompletedEvent, RunStreamChunkEvent
 from core.workflow.nodes.llm.node import LLMNode
-from core.workflow.nodes.question_classifier.question_classifier_node import QuestionClassifierNode
 from models.enums import UserFrom
 from models.workflow import WorkflowNodeExecutionStatus, WorkflowType


-@pytest.fixture
-def app():
-    app = Flask(__name__)
-    return app
-
-
@patch("extensions.ext_database.db.session.remove")
@patch("extensions.ext_database.db.session.close")
 def test_run_parallel_in_workflow(mock_close, mock_remove):
@ -513,361 +502,3 @@ def test_run_branch(mock_close, mock_remove):
    assert isinstance(items[9], GraphRunSucceededEvent)

    # print(graph_engine.graph_runtime_state.model_dump_json(indent=2))
-
-
-@patch("extensions.ext_database.db.session.remove")
-@patch("extensions.ext_database.db.session.close")
-def test_condition_parallel_correct_output(mock_close, mock_remove, app):
-    """issue #16238, workflow got unexpected additional output"""
-
-    graph_config = {
-        "edges": [
-            {
-                "data": {
-                    "isInIteration": False,
-                    "isInLoop": False,
-                    "sourceType": "question-classifier",
-                    "targetType": "question-classifier",
-                },
-                "id": "1742382406742-1-1742382480077-target",
-                "source": "1742382406742",
-                "sourceHandle": "1",
-                "target": "1742382480077",
-                "targetHandle": "target",
-                "type": "custom",
-                "zIndex": 0,
-            },
-            {
-                "data": {
-                    "isInIteration": False,
-                    "isInLoop": False,
-                    "sourceType": "question-classifier",
-                    "targetType": "answer",
-                },
-                "id": "1742382480077-1-1742382531085-target",
-                "source": "1742382480077",
-                "sourceHandle": "1",
-                "target": "1742382531085",
-                "targetHandle": "target",
-                "type": "custom",
-                "zIndex": 0,
-            },
-            {
-                "data": {
-                    "isInIteration": False,
-                    "isInLoop": False,
-                    "sourceType": "question-classifier",
-                    "targetType": "answer",
-                },
-                "id": "1742382480077-2-1742382534798-target",
-                "source": "1742382480077",
-                "sourceHandle": "2",
-                "target": "1742382534798",
-                "targetHandle": "target",
-                "type": "custom",
-                "zIndex": 0,
-            },
-            {
-                "data": {
-                    "isInIteration": False,
-                    "isInLoop": False,
-                    "sourceType": "question-classifier",
-                    "targetType": "answer",
-                },
-                "id": "1742382480077-1742382525856-1742382538517-target",
-                "source": "1742382480077",
-                "sourceHandle": "1742382525856",
-                "target": "1742382538517",
-                "targetHandle": "target",
-                "type": "custom",
-                "zIndex": 0,
-            },
-            {
-                "data": {"isInLoop": False, "sourceType": "start", "targetType": "question-classifier"},
-                "id": "1742382361944-source-1742382406742-target",
-                "source": "1742382361944",
-                "sourceHandle": "source",
-                "target": "1742382406742",
-                "targetHandle": "target",
-                "type": "custom",
-                "zIndex": 0,
-            },
-            {
-                "data": {
-                    "isInIteration": False,
-                    "isInLoop": False,
-                    "sourceType": "question-classifier",
-                    "targetType": "code",
-                },
-                "id": "1742382406742-1-1742451801533-target",
-                "source": "1742382406742",
-                "sourceHandle": "1",
-                "target": "1742451801533",
-                "targetHandle": "target",
-                "type": "custom",
-                "zIndex": 0,
-            },
-            {
-                "data": {"isInLoop": False, "sourceType": "code", "targetType": "answer"},
-                "id": "1742451801533-source-1742434464898-target",
-                "source": "1742451801533",
-                "sourceHandle": "source",
-                "target": "1742434464898",
-                "targetHandle": "target",
-                "type": "custom",
-                "zIndex": 0,
-            },
-        ],
-        "nodes": [
-            {
-                "data": {"desc": "", "selected": False, "title": "开始", "type": "start", "variables": []},
-                "height": 54,
-                "id": "1742382361944",
-                "position": {"x": 30, "y": 286},
-                "positionAbsolute": {"x": 30, "y": 286},
-                "sourcePosition": "right",
-                "targetPosition": "left",
-                "type": "custom",
-                "width": 244,
-            },
-            {
-                "data": {
-                    "classes": [{"id": "1", "name": "financial"}, {"id": "2", "name": "other"}],
-                    "desc": "",
-                    "instruction": "",
-                    "instructions": "",
-                    "model": {
-                        "completion_params": {"temperature": 0.7},
-                        "mode": "chat",
-                        "name": "qwen-max-latest",
-                        "provider": "langgenius/tongyi/tongyi",
-                    },
-                    "query_variable_selector": ["1742382361944", "sys.query"],
-                    "selected": False,
-                    "title": "qc",
-                    "topics": [],
-                    "type": "question-classifier",
-                    "vision": {"enabled": False},
-                },
-                "height": 172,
-                "id": "1742382406742",
-                "position": {"x": 334, "y": 286},
-                "positionAbsolute": {"x": 334, "y": 286},
-                "selected": False,
-                "sourcePosition": "right",
-                "targetPosition": "left",
-                "type": "custom",
-                "width": 244,
-            },
-            {
-                "data": {
-                    "classes": [
-                        {"id": "1", "name": "VAT"},
-                        {"id": "2", "name": "Stamp Duty"},
-                        {"id": "1742382525856", "name": "other"},
-                    ],
-                    "desc": "",
-                    "instruction": "",
-                    "instructions": "",
-                    "model": {
-                        "completion_params": {"temperature": 0.7},
-                        "mode": "chat",
-                        "name": "qwen-max-latest",
-                        "provider": "langgenius/tongyi/tongyi",
-                    },
-                    "query_variable_selector": ["1742382361944", "sys.query"],
-                    "selected": False,
-                    "title": "qc 2",
-                    "topics": [],
-                    "type": "question-classifier",
-                    "vision": {"enabled": False},
-                },
-                "height": 210,
-                "id": "1742382480077",
-                "position": {"x": 638, "y": 452},
-                "positionAbsolute": {"x": 638, "y": 452},
-                "selected": False,
-                "sourcePosition": "right",
-                "targetPosition": "left",
-                "type": "custom",
-                "width": 244,
-            },
-            {
-                "data": {
-                    "answer": "VAT:{{#sys.query#}}\n",
-                    "desc": "",
-                    "selected": False,
-                    "title": "answer 2",
-                    "type": "answer",
-                    "variables": [],
-                },
-                "height": 105,
-                "id": "1742382531085",
-                "position": {"x": 942, "y": 486.5},
-                "positionAbsolute": {"x": 942, "y": 486.5},
-                "selected": False,
-                "sourcePosition": "right",
-                "targetPosition": "left",
-                "type": "custom",
-                "width": 244,
-            },
-            {
-                "data": {
-                    "answer": "Stamp Duty:{{#sys.query#}}\n",
-                    "desc": "",
-                    "selected": False,
-                    "title": "answer 3",
-                    "type": "answer",
-                    "variables": [],
-                },
-                "height": 105,
-                "id": "1742382534798",
-                "position": {"x": 942, "y": 631.5},
-                "positionAbsolute": {"x": 942, "y": 631.5},
-                "selected": False,
-                "sourcePosition": "right",
-                "targetPosition": "left",
-                "type": "custom",
-                "width": 244,
-            },
-            {
-                "data": {
-                    "answer": "other:{{#sys.query#}}\n",
-                    "desc": "",
-                    "selected": False,
-                    "title": "answer 4",
-                    "type": "answer",
-                    "variables": [],
-                },
-                "height": 105,
-                "id": "1742382538517",
-                "position": {"x": 942, "y": 776.5},
-                "positionAbsolute": {"x": 942, "y": 776.5},
-                "selected": False,
-                "sourcePosition": "right",
-                "targetPosition": "left",
-                "type": "custom",
-                "width": 244,
-            },
-            {
-                "data": {
-                    "answer": "{{#1742451801533.result#}}",
-                    "desc": "",
-                    "selected": False,
-                    "title": "Answer 5",
-                    "type": "answer",
-                    "variables": [],
-                },
-                "height": 105,
-                "id": "1742434464898",
-                "position": {"x": 942, "y": 274.70425695336615},
-                "positionAbsolute": {"x": 942, "y": 274.70425695336615},
-                "selected": True,
-                "sourcePosition": "right",
-                "targetPosition": "left",
-                "type": "custom",
-                "width": 244,
-            },
-            {
-                "data": {
-                    "code": '\ndef main(arg1: str, arg2: str) -> dict:\n    return {\n        "result": arg1 + arg2,\n    }\n',  # noqa: E501
-                    "code_language": "python3",
-                    "desc": "",
-                    "outputs": {"result": {"children": None, "type": "string"}},
-                    "selected": False,
-                    "title": "Code",
-                    "type": "code",
-                    "variables": [
-                        {"value_selector": ["sys", "query"], "variable": "arg1"},
-                        {"value_selector": ["sys", "query"], "variable": "arg2"},
-                    ],
-                },
-                "height": 54,
-                "id": "1742451801533",
-                "position": {"x": 627.8839285786928, "y": 286},
-                "positionAbsolute": {"x": 627.8839285786928, "y": 286},
-                "selected": False,
-                "sourcePosition": "right",
-                "targetPosition": "left",
-                "type": "custom",
-                "width": 244,
-            },
-        ],
-    }
-    graph = Graph.init(graph_config)
-
-    # construct variable pool
-    pool = VariablePool(
-        system_variables={
-            SystemVariableKey.QUERY: "dify",
-            SystemVariableKey.FILES: [],
-            SystemVariableKey.CONVERSATION_ID: "abababa",
-            SystemVariableKey.USER_ID: "1",
-        },
-        user_inputs={},
-        environment_variables=[],
-    )
-    pool.add(["pe", "list_output"], ["dify-1", "dify-2"])
-    variable_pool = VariablePool(
-        system_variables={SystemVariableKey.FILES: [], SystemVariableKey.USER_ID: "aaa"}, user_inputs={"query": "hi"}
-    )
-
-    graph_engine = GraphEngine(
-        tenant_id="111",
-        app_id="222",
-        workflow_type=WorkflowType.CHAT,
-        workflow_id="333",
-        graph_config=graph_config,
-        user_id="444",
-        user_from=UserFrom.ACCOUNT,
-        invoke_from=InvokeFrom.WEB_APP,
-        call_depth=0,
-        graph=graph,
-        variable_pool=variable_pool,
-        max_execution_steps=500,
-        max_execution_time=1200,
-    )
-
-    def qc_generator(self):
-        yield RunCompletedEvent(
-            run_result=NodeRunResult(
-                status=WorkflowNodeExecutionStatus.SUCCEEDED,
-                inputs={},
-                process_data={},
-                outputs={"class_name": "financial", "class_id": "1"},
-                metadata={
-                    NodeRunMetadataKey.TOTAL_TOKENS: 1,
-                    NodeRunMetadataKey.TOTAL_PRICE: 1,
-                    NodeRunMetadataKey.CURRENCY: "USD",
-                },
-                edge_source_handle="1",
-            )
-        )
-
-    def code_generator(self):
-        yield RunCompletedEvent(
-            run_result=NodeRunResult(
-                status=WorkflowNodeExecutionStatus.SUCCEEDED,
-                inputs={},
-                process_data={},
-                outputs={"result": "dify 123"},
-                metadata={
-                    NodeRunMetadataKey.TOTAL_TOKENS: 1,
-                    NodeRunMetadataKey.TOTAL_PRICE: 1,
-                    NodeRunMetadataKey.CURRENCY: "USD",
-                },
-            )
-        )
-
-    with patch.object(QuestionClassifierNode, "_run", new=qc_generator):
-        with app.app_context():
-            with patch.object(CodeNode, "_run", new=code_generator):
-                generator = graph_engine.run()
-                stream_content = ""
-                res_content = "VAT:\ndify 123"
-                for item in generator:
-                    if isinstance(item, NodeRunStreamChunkEvent):
-                        stream_content += f"{item.chunk_content}\n"
-                    if isinstance(item, GraphRunSucceededEvent):
-                        assert item.outputs == {"answer": res_content}
-                assert stream_content == res_content + "\n"
--- a/docker/.env.example
+++ b/docker/.env.example
@ -68,7 +68,7 @@ DEBUG=false
 # which is convenient for debugging.
 FLASK_DEBUG=false

-# A secret key that is used for securely signing the session cookie
+# A secretkey that is used for securely signing the session cookie
 # and encrypting sensitive information on the database.
 # You can generate a strong key using `openssl rand -base64 42`.
 SECRET_KEY=sk-9f73s3ljTXVcMT3Blb3ljTqtsKiGHXVcMT3BlbkFJLK7U
@ -76,7 +76,7 @@ SECRET_KEY=sk-9f73s3ljTXVcMT3Blb3ljTqtsKiGHXVcMT3BlbkFJLK7U
 # Password for admin user initialization.
 # If left unset, admin user will not be prompted for a password
 # when creating the initial admin account. 
-# The length of the password cannot exceed 30 characters.
+# The length of the password cannot exceed 30 charactors.
 INIT_PASSWORD=

 # Deployment environment.
@ -606,6 +606,12 @@ PROMPT_GENERATION_MAX_TOKENS=512
 # Default: 1024 tokens.
 CODE_GENERATION_MAX_TOKENS=1024

+# The maximum number of tokens allowed for structured output.
+# This setting controls the upper limit of tokens that can be used by the LLM 
+# when generating structured output in the structured output tool.
+# Default: 1024 tokens.
+STRUCTURED_OUTPUT_MAX_TOKENS=1024
+
 # ------------------------------
 # Multi-modal Configuration
 # ------------------------------
--- a/docker/README.md
+++ b/docker/README.md
@ -27,7 +27,7 @@ Welcome to the new `docker` directory for deploying Dify using Docker Compose. T
    - Execute `docker compose up` from the `docker` directory to start the services.
    - To specify a vector database, set the `VECTOR_STORE` variable in your `.env` file to your desired vector database service, such as `milvus`, `weaviate`, or `opensearch`.
 4. **SSL Certificate Setup**:
-    - Refer `docker/certbot/README.md` to set up SSL certificates using Certbot.
+    - Rrefer `docker/certbot/README.md` to set up SSL certificates using Certbot.

 ### How to Deploy Middleware for Developing Dify

@ -54,7 +54,7 @@ For users migrating from the `docker-legacy` setup:

 - **Vector Database Services**: Depending on the type of vector database used (`VECTOR_STORE`), users can set specific endpoints, ports, and authentication details.
 - **Storage Services**: Depending on the storage type (`STORAGE_TYPE`), users can configure specific settings for S3, Azure Blob, Google Storage, etc.
- **API and Web Services**: Users can define URLs and other settings that affect how the API and web frontend operate.
+- **API and Web Services**: Users can define URLs and other settings that affect how the API and web frontends operate.

 #### Other notable variables

--- a/docker/docker-compose-template.yaml
+++ b/docker/docker-compose-template.yaml
@ -2,7 +2,7 @@ x-shared-env: &shared-api-worker-env
 services:
  # API service
  api:
-    image: langgenius/dify-api:1.1.1
+    image: langgenius/dify-api:1.1.0
    restart: always
    environment:
      # Use the shared environment variables.
@ -29,7 +29,7 @@ services:
  # worker service
  # The Celery worker for processing the queue.
  worker:
-    image: langgenius/dify-api:1.1.1
+    image: langgenius/dify-api:1.1.0
    restart: always
    environment:
      # Use the shared environment variables.
@ -53,7 +53,7 @@ services:

  # Frontend web application.
  web:
-    image: langgenius/dify-web:1.1.1
+    image: langgenius/dify-web:1.1.0
    restart: always
    environment:
      CONSOLE_API_URL: ${CONSOLE_API_URL:-}
@ -133,7 +133,7 @@ services:

  # plugin daemon
  plugin_daemon:
-    image: langgenius/dify-plugin-daemon:0.0.6-local
+    image: langgenius/dify-plugin-daemon:0.0.5-local
    restart: always
    environment:
      # Use the shared environment variables.
--- a/docker/docker-compose.middleware.yaml
+++ b/docker/docker-compose.middleware.yaml
@ -66,7 +66,7 @@ services:

  # plugin daemon
  plugin_daemon:
-    image: langgenius/dify-plugin-daemon:0.0.6-local
+    image: langgenius/dify-plugin-daemon:0.0.5-local
    restart: always
    environment:
      # Use the shared environment variables.
--- a/docker/docker-compose.yaml
+++ b/docker/docker-compose.yaml
@ -269,6 +269,7 @@ x-shared-env: &shared-api-worker-env
  SCARF_NO_ANALYTICS: ${SCARF_NO_ANALYTICS:-true}
  PROMPT_GENERATION_MAX_TOKENS: ${PROMPT_GENERATION_MAX_TOKENS:-512}
  CODE_GENERATION_MAX_TOKENS: ${CODE_GENERATION_MAX_TOKENS:-1024}
+  STRUCTURED_OUTPUT_MAX_TOKENS: ${STRUCTURED_OUTPUT_MAX_TOKENS:-1024}
  MULTIMODAL_SEND_FORMAT: ${MULTIMODAL_SEND_FORMAT:-base64}
  UPLOAD_IMAGE_FILE_SIZE_LIMIT: ${UPLOAD_IMAGE_FILE_SIZE_LIMIT:-10}
  UPLOAD_VIDEO_FILE_SIZE_LIMIT: ${UPLOAD_VIDEO_FILE_SIZE_LIMIT:-100}
@ -432,7 +433,7 @@ x-shared-env: &shared-api-worker-env
 services:
  # API service
  api:
-    image: langgenius/dify-api:1.1.1
+    image: langgenius/dify-api:1.1.0
    restart: always
    environment:
      # Use the shared environment variables.
@ -459,7 +460,7 @@ services:
  # worker service
  # The Celery worker for processing the queue.
  worker:
-    image: langgenius/dify-api:1.1.1
+    image: langgenius/dify-api:1.1.0
    restart: always
    environment:
      # Use the shared environment variables.
@ -483,7 +484,7 @@ services:

  # Frontend web application.
  web:
-    image: langgenius/dify-web:1.1.1
+    image: langgenius/dify-web:1.1.0
    restart: always
    environment:
      CONSOLE_API_URL: ${CONSOLE_API_URL:-}
@ -563,7 +564,7 @@ services:

  # plugin daemon
  plugin_daemon:
-    image: langgenius/dify-plugin-daemon:0.0.6-local
+    image: langgenius/dify-plugin-daemon:0.0.5-local
    restart: always
    environment:
      # Use the shared environment variables.
--- a/web/app/(commonLayout)/datasets/template/template.en.mdx
+++ b/web/app/(commonLayout)/datasets/template/template.en.mdx
@ -47,6 +47,44 @@ import { Row, Col, Properties, Property, Heading, SubProperty, PropertyInstructi
      <Property name='text' type='string' key='text'>
        Document content
      </Property>
+      <Property name='doc_type' type='string' key='doc_type'>
+        Type of document (optional):
+          - <code>book</code> Book
+          - <code>web_page</code> Web page
+          - <code>paper</code> Academic paper/article 
+          - <code>social_media_post</code> Social media post
+          - <code>wikipedia_entry</code> Wikipedia entry
+          - <code>personal_document</code> Personal document
+          - <code>business_document</code> Business document
+          - <code>im_chat_log</code> Chat log
+          - <code>synced_from_notion</code> Notion document
+          - <code>synced_from_github</code> GitHub document
+          - <code>others</code> Other document types
+      </Property>
+      <Property name='doc_metadata' type='object' key='doc_metadata'>
+        Document metadata (required if doc_type is provided). Fields vary by doc_type:
+          For <code>book</code>:
+          - <code>title</code> Book title 
+          - <code>language</code> Book language
+          - <code>author</code> Book author
+          - <code>publisher</code> Publisher name
+          - <code>publication_date</code> Publication date
+          - <code>isbn</code> ISBN number
+          - <code>category</code> Book category
+
+          For <code>web_page</code>:
+          - <code>title</code> Page title
+          - <code>url</code> Page URL
+          - <code>language</code> Page language
+          - <code>publish_date</code> Publish date
+          - <code>author/publisher</code> Author or publisher
+          - <code>topic/keywords</code> Topic or keywords
+          - <code>description</code> Page description
+
+          Please check [api/services/dataset_service.py](https://github.com/langgenius/dify/blob/main/api/services/dataset_service.py#L475) for more details on the fields required for each doc_type.
+
+          For doc_type "others", any valid JSON object is accepted
+      </Property>
      <Property name='indexing_technique' type='string' key='indexing_technique'>
        Index mode
          - <code>high_quality</code> High quality: embedding using embedding model, built as vector database index
@ -195,6 +233,68 @@ import { Row, Col, Properties, Property, Heading, SubProperty, PropertyInstructi
          - <code>hierarchical_model</code> Parent-child mode
          - <code>qa_model</code> Q&A Mode: Generates Q&A pairs for segmented documents and then embeds the questions

+        - <code>doc_type</code> Type of document (optional)
+          - <code>book</code> Book
+            Document records a book or publication
+          - <code>web_page</code> Web page 
+            Document records web page content
+          - <code>paper</code> Academic paper/article
+            Document records academic paper or research article
+          - <code>social_media_post</code> Social media post
+            Content from social media posts
+          - <code>wikipedia_entry</code> Wikipedia entry
+            Content from Wikipedia entries
+          - <code>personal_document</code> Personal document
+            Documents related to personal content
+          - <code>business_document</code> Business document
+            Documents related to business content
+          - <code>im_chat_log</code> Chat log
+            Records of instant messaging chats
+          - <code>synced_from_notion</code> Notion document
+            Documents synchronized from Notion
+          - <code>synced_from_github</code> GitHub document
+            Documents synchronized from GitHub
+          - <code>others</code> Other document types
+            Other document types not listed above
+
+        - <code>doc_metadata</code> Document metadata (required if doc_type is provided)
+          Fields vary by doc_type:
+
+          For <code>book</code>:
+          - <code>title</code> Book title
+            Title of the book
+          - <code>language</code> Book language
+            Language of the book
+          - <code>author</code> Book author
+            Author of the book
+          - <code>publisher</code> Publisher name
+            Name of the publishing house
+          - <code>publication_date</code> Publication date
+            Date when the book was published
+          - <code>isbn</code> ISBN number
+            International Standard Book Number
+          - <code>category</code> Book category
+            Category or genre of the book
+
+          For <code>web_page</code>:
+          - <code>title</code> Page title
+            Title of the web page
+          - <code>url</code> Page URL
+            URL address of the web page
+          - <code>language</code> Page language
+            Language of the web page
+          - <code>publish_date</code> Publish date
+            Date when the web page was published
+          - <code>author/publisher</code> Author or publisher
+            Author or publisher of the web page
+          - <code>topic/keywords</code> Topic or keywords
+            Topics or keywords of the web page
+          - <code>description</code> Page description
+            Description of the web page content
+
+          Please check [api/services/dataset_service.py](https://github.com/langgenius/dify/blob/main/api/services/dataset_service.py#L475) for more details on the fields required for each doc_type.
+          For doc_type "others", any valid JSON object is accepted
+
        - <code>doc_language</code> In Q&A mode, specify the language of the document, for example: <code>English</code>, <code>Chinese</code>

        - <code>process_rule</code> Processing rules
@ -307,6 +407,44 @@ import { Row, Col, Properties, Property, Heading, SubProperty, PropertyInstructi
      <Property name='description' type='string' key='description'>
        Knowledge description (optional)
      </Property>
+      <Property name='doc_type' type='string' key='doc_type'>
+        Type of document (optional):
+          - <code>book</code> Book
+          - <code>web_page</code> Web page
+          - <code>paper</code> Academic paper/article 
+          - <code>social_media_post</code> Social media post
+          - <code>wikipedia_entry</code> Wikipedia entry
+          - <code>personal_document</code> Personal document
+          - <code>business_document</code> Business document
+          - <code>im_chat_log</code> Chat log
+          - <code>synced_from_notion</code> Notion document
+          - <code>synced_from_github</code> GitHub document
+          - <code>others</code> Other document types
+      </Property>
+      <Property name='doc_metadata' type='object' key='doc_metadata'>
+        Document metadata (required if doc_type is provided). Fields vary by doc_type:
+          For <code>book</code>:
+          - <code>title</code> Book title 
+          - <code>language</code> Book language
+          - <code>author</code> Book author
+          - <code>publisher</code> Publisher name
+          - <code>publication_date</code> Publication date
+          - <code>isbn</code> ISBN number
+          - <code>category</code> Book category
+
+          For <code>web_page</code>:
+          - <code>title</code> Page title
+          - <code>url</code> Page URL
+          - <code>language</code> Page language
+          - <code>publish_date</code> Publish date
+          - <code>author/publisher</code> Author or publisher
+          - <code>topic/keywords</code> Topic or keywords
+          - <code>description</code> Page description
+
+          Please check [api/services/dataset_service.py](https://github.com/langgenius/dify/blob/main/api/services/dataset_service.py#L475) for more details on the fields required for each doc_type.
+
+          For doc_type "others", any valid JSON object is accepted
+      </Property>
      <Property name='indexing_technique' type='string' key='indexing_technique'>
        Index technique (optional)
          - <code>high_quality</code> High quality
@ -624,6 +762,67 @@ import { Row, Col, Properties, Property, Heading, SubProperty, PropertyInstructi
              - <code>separator</code> Segmentation identifier. Currently, only one delimiter is allowed. The default is <code>***</code>
              - <code>max_tokens</code> The maximum length (tokens) must be validated to be shorter than the length of the parent chunk
              - <code>chunk_overlap</code> Define the overlap between adjacent chunks (optional)
+            - <code>doc_type</code> Type of document (optional)
+              - <code>book</code> Book
+                Document records a book or publication
+              - <code>web_page</code> Web page 
+                Document records web page content
+              - <code>paper</code> Academic paper/article
+                Document records academic paper or research article
+              - <code>social_media_post</code> Social media post
+                Content from social media posts
+              - <code>wikipedia_entry</code> Wikipedia entry
+                Content from Wikipedia entries
+              - <code>personal_document</code> Personal document
+                Documents related to personal content
+              - <code>business_document</code> Business document
+                Documents related to business content
+              - <code>im_chat_log</code> Chat log
+                Records of instant messaging chats
+              - <code>synced_from_notion</code> Notion document
+                Documents synchronized from Notion
+              - <code>synced_from_github</code> GitHub document
+                Documents synchronized from GitHub
+              - <code>others</code> Other document types
+                Other document types not listed above
+
+            - <code>doc_metadata</code> Document metadata (required if doc_type is provided)
+              Fields vary by doc_type:
+
+              For <code>book</code>:
+              - <code>title</code> Book title
+                Title of the book
+              - <code>language</code> Book language
+                Language of the book
+              - <code>author</code> Book author
+                Author of the book
+              - <code>publisher</code> Publisher name
+                Name of the publishing house
+              - <code>publication_date</code> Publication date
+                Date when the book was published
+              - <code>isbn</code> ISBN number
+                International Standard Book Number
+              - <code>category</code> Book category
+                Category or genre of the book
+
+              For <code>web_page</code>:
+              - <code>title</code> Page title
+                Title of the web page
+              - <code>url</code> Page URL
+                URL address of the web page
+              - <code>language</code> Page language
+                Language of the web page
+              - <code>publish_date</code> Publish date
+                Date when the web page was published
+              - <code>author/publisher</code> Author or publisher
+                Author or publisher of the web page
+              - <code>topic/keywords</code> Topic or keywords
+                Topics or keywords of the web page
+              - <code>description</code> Page description
+                Description of the web page content
+
+              Please check [api/services/dataset_service.py](https://github.com/langgenius/dify/blob/main/api/services/dataset_service.py#L475) for more details on the fields required for each doc_type.
+              For doc_type "others", any valid JSON object is accepted
      </Property>
    </Properties>
  </Col>
@ -1329,6 +1528,7 @@ import { Row, Col, Properties, Property, Heading, SubProperty, PropertyInstructi
              "id": "a8c6c36f-9f5d-4d7a-8472-f5d7b75d71d2",
              "data_source_type": "upload_file",
              "name": "readme.txt",
+              "doc_type": null
            }
          },
          "score": 3.730463140527718e-05,
--- a/web/app/(commonLayout)/datasets/template/template.zh.mdx
+++ b/web/app/(commonLayout)/datasets/template/template.zh.mdx
@ -47,6 +47,46 @@ import { Row, Col, Properties, Property, Heading, SubProperty, PropertyInstructi
      <Property name='text' type='string' key='text'>
        文档内容
      </Property>
+      <Property name='doc_type' type='string' key='doc_type'>
+        文档类型（选填）
+          - <code>book</code> 图书 Book
+          - <code>web_page</code> 网页 Web page
+          - <code>paper</code> 学术论文/文章 Academic paper/article 
+          - <code>social_media_post</code> 社交媒体帖子 Social media post
+          - <code>wikipedia_entry</code> 维基百科条目 Wikipedia entry
+          - <code>personal_document</code> 个人文档 Personal document
+          - <code>business_document</code> 商业文档 Business document
+          - <code>im_chat_log</code> 即时通讯记录 Chat log
+          - <code>synced_from_notion</code> Notion同步文档 Notion document
+          - <code>synced_from_github</code> GitHub同步文档 GitHub document
+          - <code>others</code> 其他文档类型 Other document types
+      </Property>
+      <Property name='doc_metadata' type='object' key='doc_metadata'>
+      
+        文档元数据（如提供文档类型则必填）。字段因文档类型而异：
+          
+          针对图书 For <code>book</code>:
+          - <code>title</code> 书名 Book title 
+          - <code>language</code> 图书语言 Book language
+          - <code>author</code> 作者 Book author
+          - <code>publisher</code> 出版社 Publisher name
+          - <code>publication_date</code> 出版日期 Publication date
+          - <code>isbn</code> ISBN号码 ISBN number
+          - <code>category</code> 图书分类 Book category
+
+          针对网页 For <code>web_page</code>:
+          - <code>title</code> 页面标题 Page title
+          - <code>url</code> 页面网址 Page URL
+          - <code>language</code> 页面语言 Page language
+          - <code>publish_date</code> 发布日期 Publish date
+          - <code>author/publisher</code> 作者/发布者 Author or publisher
+          - <code>topic/keywords</code> 主题/关键词 Topic or keywords
+          - <code>description</code> 页面描述 Page description
+
+          请查看 [api/services/dataset_service.py](https://github.com/langgenius/dify/blob/main/api/services/dataset_service.py#L475) 了解各文档类型所需字段的详细信息。
+
+          针对"其他"类型文档，接受任何有效的JSON对象
+      </Property>
      <Property name='indexing_technique' type='string' key='indexing_technique'>
        索引方式
          - <code>high_quality</code> 高质量：使用  embedding 模型进行嵌入，构建为向量数据库索引
@ -194,6 +234,68 @@ import { Row, Col, Properties, Property, Heading, SubProperty, PropertyInstructi
          - <code>text_model</code> text 文档直接 embedding，经济模式默认为该模式
          - <code>hierarchical_model</code> parent-child 模式
          - <code>qa_model</code> Q&A 模式：为分片文档生成 Q&A 对，然后对问题进行 embedding
+        - <code>doc_type</code> 文档类型（选填）Type of document (optional)
+          - <code>book</code> 图书
+            文档记录一本书籍或出版物
+          - <code>web_page</code> 网页
+            网页内容的文档记录
+          - <code>paper</code> 学术论文/文章
+            学术论文或研究文章的记录
+          - <code>social_media_post</code> 社交媒体帖子
+            社交媒体上的帖子内容
+          - <code>wikipedia_entry</code> 维基百科条目
+            维基百科的词条内容
+          - <code>personal_document</code> 个人文档
+            个人相关的文档记录
+          - <code>business_document</code> 商业文档
+            商业相关的文档记录
+          - <code>im_chat_log</code> 即时通讯记录
+            即时通讯的聊天记录
+          - <code>synced_from_notion</code> Notion同步文档
+            从Notion同步的文档内容
+          - <code>synced_from_github</code> GitHub同步文档
+            从GitHub同步的文档内容
+          - <code>others</code> 其他文档类型
+            其他未列出的文档类型
+
+        - <code>doc_metadata</code> 文档元数据（如提供文档类型则必填
+          字段因文档类型而异
+
+          针对图书类型 For <code>book</code>:
+          - <code>title</code> 书名
+            书籍的标题
+          - <code>language</code> 图书语言
+            书籍的语言
+          - <code>author</code> 作者
+            书籍的作者
+          - <code>publisher</code> 出版社
+            出版社的名称
+          - <code>publication_date</code> 出版日期
+            书籍的出版日期
+          - <code>isbn</code> ISBN号码
+            书籍的ISBN编号
+          - <code>category</code> 图书分类
+            书籍的分类类别
+
+          针对网页类型 For <code>web_page</code>:
+          - <code>title</code> 页面标题
+            网页的标题
+          - <code>url</code> 页面网址
+            网页的URL地址
+          - <code>language</code> 页面语言
+            网页的语言
+          - <code>publish_date</code> 发布日期
+            网页的发布日期
+          - <code>author/publisher</code> 作者/发布者
+            网页的作者或发布者
+          - <code>topic/keywords</code> 主题/关键词
+            网页的主题或关键词
+          - <code>description</code> 页面描述
+            网页的描述信息
+
+          请查看 [api/services/dataset_service.py](https://github.com/langgenius/dify/blob/main/api/services/dataset_service.py#L475) 了解各文档类型所需字段的详细信息。
+
+          针对"其他"类型文档，接受任何有效的JSON对象

        - <code>doc_language</code> 在 Q&A 模式下，指定文档的语言，例如：<code>English</code>、<code>Chinese</code>

@ -504,6 +606,46 @@ import { Row, Col, Properties, Property, Heading, SubProperty, PropertyInstructi
      <Property name='text' type='string' key='text'>
        文档内容（选填）
      </Property>
+      <Property name='doc_type' type='string' key='doc_type'>
+        文档类型（选填）
+          - <code>book</code> 图书 Book
+          - <code>web_page</code> 网页 Web page
+          - <code>paper</code> 学术论文/文章 Academic paper/article 
+          - <code>social_media_post</code> 社交媒体帖子 Social media post
+          - <code>wikipedia_entry</code> 维基百科条目 Wikipedia entry
+          - <code>personal_document</code> 个人文档 Personal document
+          - <code>business_document</code> 商业文档 Business document
+          - <code>im_chat_log</code> 即时通讯记录 Chat log
+          - <code>synced_from_notion</code> Notion同步文档 Notion document
+          - <code>synced_from_github</code> GitHub同步文档 GitHub document
+          - <code>others</code> 其他文档类型 Other document types
+      </Property>
+      <Property name='doc_metadata' type='object' key='doc_metadata'>
+      
+        文档元数据（如提供文档类型则必填）。字段因文档类型而异：
+          
+          针对图书 For <code>book</code>:
+          - <code>title</code> 书名 Book title 
+          - <code>language</code> 图书语言 Book language
+          - <code>author</code> 作者 Book author
+          - <code>publisher</code> 出版社 Publisher name
+          - <code>publication_date</code> 出版日期 Publication date
+          - <code>isbn</code> ISBN号码 ISBN number
+          - <code>category</code> 图书分类 Book category
+
+          针对网页 For <code>web_page</code>:
+          - <code>title</code> 页面标题 Page title
+          - <code>url</code> 页面网址 Page URL
+          - <code>language</code> 页面语言 Page language
+          - <code>publish_date</code> 发布日期 Publish date
+          - <code>author/publisher</code> 作者/发布者 Author or publisher
+          - <code>topic/keywords</code> 主题/关键词 Topic or keywords
+          - <code>description</code> 页面描述 Page description
+
+          请查看 [api/services/dataset_service.py](https://github.com/langgenius/dify/blob/main/api/services/dataset_service.py#L475) 了解各文档类型所需字段的详细信息。
+
+          针对"其他"类型文档，接受任何有效的JSON对象
+      </Property>
      <Property name='process_rule' type='object' key='process_rule'>
        处理规则（选填）
          - <code>mode</code> (string) 清洗、分段模式 ，automatic 自动 / custom 自定义
@ -624,6 +766,68 @@ import { Row, Col, Properties, Property, Heading, SubProperty, PropertyInstructi
              - <code>separator</code> 分段标识符，目前仅允许设置一个分隔符。默认为 <code>***</code>
              - <code>max_tokens</code> 最大长度 (token) 需要校验小于父级的长度
              - <code>chunk_overlap</code> 分段重叠指的是在对数据进行分段时，段与段之间存在一定的重叠部分（选填）
+            - <code>doc_type</code> 文档类型（选填）Type of document (optional)
+              - <code>book</code> 图书
+                文档记录一本书籍或出版物
+              - <code>web_page</code> 网页
+                网页内容的文档记录
+              - <code>paper</code> 学术论文/文章
+                学术论文或研究文章的记录
+              - <code>social_media_post</code> 社交媒体帖子
+                社交媒体上的帖子内容
+              - <code>wikipedia_entry</code> 维基百科条目
+                维基百科的词条内容
+              - <code>personal_document</code> 个人文档
+                个人相关的文档记录
+              - <code>business_document</code> 商业文档
+                商业相关的文档记录
+              - <code>im_chat_log</code> 即时通讯记录
+                即时通讯的聊天记录
+              - <code>synced_from_notion</code> Notion同步文档
+                从Notion同步的文档内容
+              - <code>synced_from_github</code> GitHub同步文档
+                从GitHub同步的文档内容
+              - <code>others</code> 其他文档类型
+                其他未列出的文档类型
+
+            - <code>doc_metadata</code> 文档元数据（如提供文档类型则必填
+              字段因文档类型而异
+
+              针对图书类型 For <code>book</code>:
+              - <code>title</code> 书名
+                书籍的标题
+              - <code>language</code> 图书语言
+                书籍的语言
+              - <code>author</code> 作者
+                书籍的作者
+              - <code>publisher</code> 出版社
+                出版社的名称
+              - <code>publication_date</code> 出版日期
+                书籍的出版日期
+              - <code>isbn</code> ISBN号码
+                书籍的ISBN编号
+              - <code>category</code> 图书分类
+                书籍的分类类别
+
+              针对网页类型 For <code>web_page</code>:
+              - <code>title</code> 页面标题
+                网页的标题
+              - <code>url</code> 页面网址
+                网页的URL地址
+              - <code>language</code> 页面语言
+                网页的语言
+              - <code>publish_date</code> 发布日期
+                网页的发布日期
+              - <code>author/publisher</code> 作者/发布者
+                网页的作者或发布者
+              - <code>topic/keywords</code> 主题/关键词
+                网页的主题或关键词
+              - <code>description</code> 页面描述
+                网页的描述信息
+
+              请查看 [api/services/dataset_service.py](https://github.com/langgenius/dify/blob/main/api/services/dataset_service.py#L475) 了解各文档类型所需字段的详细信息。
+
+              针对"其他"类型文档，接受任何有效的JSON对象
      </Property>
    </Properties>
  </Col>
@ -1330,6 +1534,7 @@ import { Row, Col, Properties, Property, Heading, SubProperty, PropertyInstructi
              "id": "a8c6c36f-9f5d-4d7a-8472-f5d7b75d71d2",
              "data_source_type": "upload_file",
              "name": "readme.txt",
+              "doc_type": null
            }
          },
          "score": 3.730463140527718e-05,
--- a/web/app/components/base/chat/embedded-chatbot/hooks.tsx
+++ b/web/app/components/base/chat/embedded-chatbot/hooks.tsx
@ -183,10 +183,7 @@ export const useEmbeddedChatbot = () => {

  useEffect(() => {
    // init inputs from url params
-    (async () => {
-      const inputs = await getProcessedInputsFromUrlParams()
-      setInitInputs(inputs)
-    })()
+    setInitInputs(getProcessedInputsFromUrlParams())
  }, [])
  useEffect(() => {
    const conversationInputs: Record<string, any> = {}
@ -291,11 +288,11 @@ export const useEmbeddedChatbot = () => {
    if (conversationId)
      setClearChatList(false)
  }, [handleConversationIdInfoChange, setClearChatList])
-  const handleNewConversation = useCallback(async () => {
+  const handleNewConversation = useCallback(() => {
    currentChatInstanceRef.current.handleStop()
    setShowNewConversationItemInList(true)
    handleChangeConversation('')
-    handleNewConversationInputsChange(await getProcessedInputsFromUrlParams())
+    handleNewConversationInputsChange({})
    setClearChatList(true)
  }, [handleChangeConversation, setShowNewConversationItemInList, handleNewConversationInputsChange, setClearChatList])

--- a/web/app/components/base/chat/utils.ts
+++ b/web/app/components/base/chat/utils.ts
@ -10,14 +10,12 @@ async function decodeBase64AndDecompress(base64String: string) {
  return new TextDecoder().decode(decompressedArrayBuffer)
 }

-async function getProcessedInputsFromUrlParams(): Promise<Record<string, any>> {
+function getProcessedInputsFromUrlParams(): Record<string, any> {
  const urlParams = new URLSearchParams(window.location.search)
  const inputs: Record<string, any> = {}
-  await Promise.all(
-    urlParams.entries().map(async ([key, value]) => {
+  urlParams.forEach(async (value, key) => {
    inputs[key] = await decodeBase64AndDecompress(decodeURIComponent(value))
-    }),
-  )
+  })
  return inputs
 }

--- a/web/app/components/datasets/create/step-one/index.tsx
+++ b/web/app/components/datasets/create/step-one/index.tsx
@ -132,8 +132,7 @@ const StepOne = ({
  }, [files, isShowVectorSpaceFull])

  return (
-    <div className='w-full h-full overflow-x-auto'>
-      <div className='flex w-full h-full min-w-[1440px]'>
+    <div className='flex w-full h-full'>
      <div className='w-1/2 h-full overflow-y-auto relative'>
        <div className='flex justify-end'>
          <div className={classNames(s.form)}>
@ -318,7 +317,6 @@ const StepOne = ({
        {currentWebsite && <WebsitePreview payload={currentWebsite} hidePreview={hideWebsitePreview} />}
      </div>
    </div>
-    </div>
  )
 }

--- a/web/app/components/develop/template/template_workflow.en.mdx
+++ b/web/app/components/develop/template/template_workflow.en.mdx
@ -43,9 +43,18 @@ Workflow applications offers non-session support and is ideal for translation, a
      - `inputs` (object) Required
        Allows the entry of various variable values defined by the App.
        The `inputs` parameter contains multiple key/value pairs, with each key corresponding to a specific variable and each value being the specific value for that variable.
-        The workflow application requires at least one key/value pair to be inputted. The variable can be of File Array type.
-        File Array type variable is suitable for inputting files combined with text understanding and answering questions, available only when the model supports file parsing and understanding capability.
-        If the variable is of File Array type, the corresponding value should be a list whose elements contain following attributions: 
+        The workflow application requires at least one key/value pair to be inputted.
+        If the variable is of File type, specify an object that has the keys described in `files` below.
+      - `response_mode` (string) Required
+        The mode of response return, supporting:
+        - `streaming` Streaming mode (recommended), implements a typewriter-like output through SSE ([Server-Sent Events](https://developer.mozilla.org/en-US/docs/Web/API/Server-sent_events/Using_server-sent_events)).
+        - `blocking` Blocking mode, returns result after execution is complete. (Requests may be interrupted if the process is long)
+        <i>Due to Cloudflare restrictions, the request will be interrupted without a return after 100 seconds.</i>
+      - `user` (string) Required
+        User identifier, used to define the identity of the end-user for retrieval and statistics.
+        Should be uniquely defined by the developer within the application.
+      - `files` (array[object]) Optional
+        File list, suitable for inputting files combined with text understanding and answering questions, available only when the model supports file parsing and understanding capability.
          - `type` (string) Supported type: 
            - `document` ('TXT', 'MD', 'MARKDOWN', 'PDF', 'HTML', 'XLSX', 'XLS', 'DOCX', 'CSV', 'EML', 'MSG', 'PPTX', 'PPT', 'XML', 'EPUB')
            - `image` ('JPG', 'JPEG', 'PNG', 'GIF', 'WEBP', 'SVG')
@ -56,15 +65,6 @@ Workflow applications offers non-session support and is ideal for translation, a
          - `url` (string) Image URL (when the transfer method is `remote_url`)
          - `upload_file_id` (string) Uploaded file ID, which must be obtained by uploading through the File Upload API in advance (when the transfer method is `local_file`)

-      - `response_mode` (string) Required
-        The mode of response return, supporting:
-        - `streaming` Streaming mode (recommended), implements a typewriter-like output through SSE ([Server-Sent Events](https://developer.mozilla.org/en-US/docs/Web/API/Server-sent_events/Using_server-sent_events)).
-        - `blocking` Blocking mode, returns result after execution is complete. (Requests may be interrupted if the process is long)
-        <i>Due to Cloudflare restrictions, the request will be interrupted without a return after 100 seconds.</i>
-      - `user` (string) Required
-        User identifier, used to define the identity of the end-user for retrieval and statistics.
-        Should be uniquely defined by the developer within the application.
-
    ### Response
    When `response_mode` is `blocking`, return a CompletionResponse object.
    When `response_mode` is `streaming`, return a ChunkCompletionResponse stream.
@ -190,18 +190,15 @@ Workflow applications offers non-session support and is ideal for translation, a
    ```

    </CodeGroup>
-    <CodeGroup title="Example: file array as an input variable">
+    <CodeGroup title="File variable example">
      ```json {{ title: 'File variable example' }}
      {
        "inputs": {
-          "{variable_name}": 
-          [
-            {
+          "{variable_name}": {
            "transfer_method": "local_file",
            "upload_file_id": "{upload_file_id}",
            "type": "{document_type}"
          }
-          ]
        }
      }
      ```
@ -282,11 +279,11 @@ Workflow applications offers non-session support and is ideal for translation, a

          data = {
              "inputs": {
-                  "orig_mail": [{
+                  "orig_mail": {
                      "transfer_method": "local_file",
                      "upload_file_id": file_id,
                      "type": "document"
-                  }]
+                  }
              },
              "response_mode": response_mode,
              "user": user
--- a/web/app/components/develop/template/template_workflow.ja.mdx
+++ b/web/app/components/develop/template/template_workflow.ja.mdx
@ -43,20 +43,8 @@ import { Row, Col, Properties, Property, Heading, SubProperty, Paragraph } from
      - `inputs` (object) 必須
        アプリで定義されたさまざまな変数値の入力を許可します。
        `inputs`パラメータには複数のキー/値ペアが含まれ、各キーは特定の変数に対応し、各値はその変数の特定の値です。
-        ワークフローアプリケーションは少なくとも1つのキー/値ペアの入力を必要とします。値はファイルリストである場合もあります。
-        ファイルリストは、テキスト理解と質問への回答を組み合わせたファイルの入力に適しています。モデルがファイルの解析と理解機能をサポートしている場合にのみ使用できます。
-
-        変数がファイルリストの場合、リストの各要素は以下の属性を持つ必要があります。
-          - `type` (string) サポートされているタイプ: 
-            - `document` ('TXT', 'MD', 'MARKDOWN', 'PDF', 'HTML', 'XLSX', 'XLS', 'DOCX', 'CSV', 'EML', 'MSG', 'PPTX', 'PPT', 'XML', 'EPUB')
-            - `image` ('JPG', 'JPEG', 'PNG', 'GIF', 'WEBP', 'SVG')
-            - `audio` ('MP3', 'M4A', 'WAV', 'WEBM', 'AMR')
-            - `video` ('MP4', 'MOV', 'MPEG', 'MPGA')
-            - `custom` (他のファイルタイプ)
-          - `transfer_method` (string) 転送方法、画像URLの場合は`remote_url` / ファイルアップロードの場合は`local_file`
-          - `url` (string) 画像URL（転送方法が`remote_url`の場合）
-          - `upload_file_id` (string) アップロードされたファイルID、事前にファイルアップロードAPIを通じて取得する必要があります（転送方法が`local_file`の場合）
-
+        ワークフローアプリケーションは少なくとも1つのキー/値ペアの入力を必要とします。
+        変数がファイルタイプの場合、以下の`files`で説明されているキーを持つオブジェクトを指定してください。
      - `response_mode` (string) 必須
        応答の返却モードを指定します。サポートされているモード：
        - `streaming` ストリーミングモード（推奨）、SSE（[Server-Sent Events](https://developer.mozilla.org/en-US/docs/Web/API/Server-sent_events/Using_server-sent_events)）を通じてタイプライターのような出力を実装します。
@ -66,7 +54,16 @@ import { Row, Col, Properties, Property, Heading, SubProperty, Paragraph } from
        ユーザー識別子、エンドユーザーのアイデンティティを定義するために使用されます。
        アプリケーション内で開発者によって一意に定義される必要があります。
      - `files` (array[object]) オプション
-        
+        ファイルリストは、テキスト理解と質問への回答を組み合わせたファイルの入力に適しています。モデルがファイルの解析と理解機能をサポートしている場合にのみ使用できます。
+          - `type` (string) サポートされているタイプ: 
+            - `document` ('TXT', 'MD', 'MARKDOWN', 'PDF', 'HTML', 'XLSX', 'XLS', 'DOCX', 'CSV', 'EML', 'MSG', 'PPTX', 'PPT', 'XML', 'EPUB')
+            - `image` ('JPG', 'JPEG', 'PNG', 'GIF', 'WEBP', 'SVG')
+            - `audio` ('MP3', 'M4A', 'WAV', 'WEBM', 'AMR')
+            - `video` ('MP4', 'MOV', 'MPEG', 'MPGA')
+            - `custom` (他のファイルタイプ)
+          - `transfer_method` (string) 転送方法、画像URLの場合は`remote_url` / ファイルアップロードの場合は`local_file`
+          - `url` (string) 画像URL（転送方法が`remote_url`の場合）
+          - `upload_file_id` (string) アップロードされたファイルID、事前にファイルアップロードAPIを通じて取得する必要があります（転送方法が`local_file`の場合）

    ### 応答
    `response_mode`が`blocking`の場合、CompletionResponseオブジェクトを返します。
@ -197,14 +194,11 @@ import { Row, Col, Properties, Property, Heading, SubProperty, Paragraph } from
      ```json {{ title: 'ファイル変数の例' }}
      {
        "inputs": {
-          "{variable_name}": 
-          [
-            {
+          "{variable_name}": {
            "transfer_method": "local_file",
            "upload_file_id": "{upload_file_id}",
            "type": "{document_type}"
          }
-          ]
        }
      }
      ```
@ -285,11 +279,11 @@ import { Row, Col, Properties, Property, Heading, SubProperty, Paragraph } from

          data = {
              "inputs": {
-                  "orig_mail": [{
+                  "orig_mail": {
                      "transfer_method": "local_file",
                      "upload_file_id": file_id,
                      "type": "document"
-                  }]
+                  }
              },
              "response_mode": response_mode,
              "user": user
--- a/web/app/components/develop/template/template_workflow.zh.mdx
+++ b/web/app/components/develop/template/template_workflow.zh.mdx
@ -41,8 +41,18 @@ Workflow 应用无会话支持，适合用于翻译/文章写作/总结 AI 等
    ### Request Body
      - `inputs` (object) Required
        允许传入 App 定义的各变量值。
-        inputs 参数包含了多组键值对（Key/Value pairs），每组的键对应一个特定变量，每组的值则是该变量的具体值。变量可以是文件列表类型。
-        文件列表类型变量适用于传入文件结合文本理解并回答问题，仅当模型支持该类型文件解析能力时可用。如果该变量是文件列表类型，该变量对应的值应是列表格式，其中每个元素应包含以下内容：
+        inputs 参数包含了多组键值对（Key/Value pairs），每组的键对应一个特定变量，每组的值则是该变量的具体值。
+        如果变量是文件类型，请指定一个包含以下 `files` 中所述键的对象。
+      - `response_mode` (string) Required
+        返回响应模式，支持：
+        - `streaming` 流式模式（推荐）。基于 SSE（**[Server-Sent Events](https://developer.mozilla.org/en-US/docs/Web/API/Server-sent_events/Using_server-sent_events)**）实现类似打字机输出方式的流式返回。
+        - `blocking` 阻塞模式，等待执行完毕后返回结果。（请求若流程较长可能会被中断）。
+        <i>由于 Cloudflare 限制，请求会在 100 秒超时无返回后中断。</i>
+      - `user` (string) Required
+        用户标识，用于定义终端用户的身份，方便检索、统计。
+        由开发者定义规则，需保证用户标识在应用内唯一。
+      - `files` (array[object]) Optional
+          文件列表，适用于传入文件结合文本理解并回答问题，仅当模型支持该类型文件解析能力时可用。
          - `type` (string) 支持类型：
            - `document` 具体类型包含：'TXT', 'MD', 'MARKDOWN', 'PDF', 'HTML', 'XLSX', 'XLS', 'DOCX', 'CSV', 'EML', 'MSG', 'PPTX', 'PPT', 'XML', 'EPUB'
            - `image` 具体类型包含：'JPG', 'JPEG', 'PNG', 'GIF', 'WEBP', 'SVG'
@ -52,15 +62,6 @@ Workflow 应用无会话支持，适合用于翻译/文章写作/总结 AI 等
          - `transfer_method` (string) 传递方式，`remote_url` 图片地址 / `local_file` 上传文件
          - `url` (string) 图片地址（仅当传递方式为 `remote_url` 时）
          - `upload_file_id` (string) (string) 上传文件 ID（仅当传递方式为 `local_file` 时）
-      - `response_mode` (string) Required
-        返回响应模式，支持：
-        - `streaming` 流式模式（推荐）。基于 SSE（**[Server-Sent Events](https://developer.mozilla.org/en-US/docs/Web/API/Server-sent_events/Using_server-sent_events)**）实现类似打字机输出方式的流式返回。
-        - `blocking` 阻塞模式，等待执行完毕后返回结果。（请求若流程较长可能会被中断）。
-        <i>由于 Cloudflare 限制，请求会在 100 秒超时无返回后中断。</i>
-      - `user` (string) Required
-        用户标识，用于定义终端用户的身份，方便检索、统计。
-        由开发者定义规则，需保证用户标识在应用内唯一。
-

    ### Response
    当 `response_mode` 为 `blocking` 时，返回 CompletionResponse object。
@ -183,18 +184,15 @@ Workflow 应用无会话支持，适合用于翻译/文章写作/总结 AI 等
    }'
    ```
    </CodeGroup>
-    <CodeGroup title="Example: file array as an input variable">
+    <CodeGroup title="File variable example">
      ```json {{ title: 'File variable example' }}
      {
        "inputs": {
-          "{variable_name}": 
-          [
-            {
+          "{variable_name}": {
            "transfer_method": "local_file",
            "upload_file_id": "{upload_file_id}",
            "type": "{document_type}"
          }
-          ]
        }
      }
      ```
@ -275,11 +273,11 @@ Workflow 应用无会话支持，适合用于翻译/文章写作/总结 AI 等

          data = {
              "inputs": {
-                  "orig_mail": [{
+                  "orig_mail": {
                      "transfer_method": "local_file",
                      "upload_file_id": file_id,
                      "type": "document"
-                  }]
+                  }
              },
              "response_mode": response_mode,
              "user": user
--- a/web/app/components/workflow/nodes/_base/hooks/use-node-help-link.ts
+++ b/web/app/components/workflow/nodes/_base/hooks/use-node-help-link.ts
@ -6,7 +6,7 @@ export const useNodeHelpLink = (nodeType: BlockEnum) => {
  const language = useGetLanguage()
  const prefixLink = useMemo(() => {
    if (language === 'zh_Hans')
-      return 'https://docs.dify.ai/zh-hans/guides/workflow/node/'
+      return 'https://docs.dify.ai/v/zh-hans/guides/workflow/node/'

    return 'https://docs.dify.ai/guides/workflow/node/'
  }, [language])
@ -34,7 +34,6 @@ export const useNodeHelpLink = (nodeType: BlockEnum) => {
        [BlockEnum.Tool]: 'tools',
        [BlockEnum.DocExtractor]: 'doc-extractor',
        [BlockEnum.ListFilter]: 'list-operator',
-        [BlockEnum.Agent]: 'agent',
      }
    }

--- a/web/app/components/workflow/nodes/knowledge-retrieval/use-config.ts
+++ b/web/app/components/workflow/nodes/knowledge-retrieval/use-config.ts
@ -218,15 +218,13 @@ const useConfig = (id: string, payload: KnowledgeRetrievalNodeType) => {
    (async () => {
      const inputs = inputRef.current
      const datasetIds = inputs.dataset_ids
-      let _datasets = selectedDatasets
      if (datasetIds?.length > 0) {
        const { data: dataSetsWithDetail } = await fetchDatasets({ url: '/datasets', params: { page: 1, ids: datasetIds } as any })
-        _datasets = dataSetsWithDetail
        setSelectedDatasets(dataSetsWithDetail)
      }
      const newInputs = produce(inputs, (draft) => {
        draft.dataset_ids = datasetIds
-        draft._datasets = _datasets
+        draft._datasets = selectedDatasets
      })
      setInputs(newInputs)
      setSelectedDatasetsLoaded(true)
--- a/web/app/components/workflow/nodes/llm/panel.tsx
+++ b/web/app/components/workflow/nodes/llm/panel.tsx
@ -1,5 +1,5 @@
 import type { FC } from 'react'
-import React, { useCallback } from 'react'
+import React from 'react'
 import { useTranslation } from 'react-i18next'
 import MemoryConfig from '../_base/components/memory-config'
 import VarReferencePicker from '../_base/components/variable/var-reference-picker'
@ -124,16 +124,6 @@ const Panel: FC<NodePanelProps<LLMNodeType>> = ({
    return forms
  })()

-  const handleModelChange = useCallback((model: {
-    provider: string
-    modelId: string
-    mode?: string
-  }) => {
-    handleCompletionParamsChange({})
-    handleModelChanged(model)
-  // eslint-disable-next-line react-hooks/exhaustive-deps
-  }, [])
-
  return (
    <div className='mt-2'>
      <div className='px-4 pb-4 space-y-4'>
@ -148,7 +138,7 @@ const Panel: FC<NodePanelProps<LLMNodeType>> = ({
            provider={model?.provider}
            completionParams={model?.completion_params}
            modelId={model?.name}
-            setModel={handleModelChange}
+            setModel={handleModelChanged}
            onCompletionParamsChange={handleCompletionParamsChange}
            hideDebugWithMultipleModel
            debugWithMultipleModel={false}
--- a/web/docker/entrypoint.sh
+++ b/web/docker/entrypoint.sh
@ -27,6 +27,5 @@ export NEXT_PUBLIC_TEXT_GENERATION_TIMEOUT_MS=${TEXT_GENERATION_TIMEOUT_MS}
 export NEXT_PUBLIC_CSP_WHITELIST=${CSP_WHITELIST}
 export NEXT_PUBLIC_TOP_K_MAX_VALUE=${TOP_K_MAX_VALUE}
 export NEXT_PUBLIC_INDEXING_MAX_SEGMENTATION_TOKENS_LENGTH=${INDEXING_MAX_SEGMENTATION_TOKENS_LENGTH}
-export NEXT_PUBLIC_MAX_TOOLS_NUM=${MAX_TOOLS_NUM}

 pm2 start /app/web/server.js --name dify-web --cwd /app/web -i ${PM2_INSTANCES} --no-daemon
--- a/web/i18n/en-US/app-debug.ts
+++ b/web/i18n/en-US/app-debug.ts
@ -481,7 +481,7 @@ const translation = {
    },
    retrieveMultiWay: {
      title: 'Multi-path retrieval',
-      description: 'Based on user intent, queries across all Knowledge, retrieves relevant text from multi-sources, and selects the best results matching the user query after reranking.',
+      description: 'Based on user intent, queries across all Knowledge, retrieves relevant text from multi-sources, and selects the best results matching the user query after reranking. ',
    },
    rerankModelRequired: 'A configured Rerank Model is required',
    params: 'Params',
--- a/web/i18n/en-US/login.ts
+++ b/web/i18n/en-US/login.ts
@ -19,7 +19,7 @@ const translation = {
  setAdminAccountDesc: 'Maximum privileges for admin account, which can be used to create applications and manage LLM providers, etc.',
  createAndSignIn: 'Create and sign in',
  oneMoreStep: 'One more step',
-  createSample: 'Based on this information, we\'ll create sample application for you',
+  createSample: 'Based on this information, we’ll create sample application for you',
  invitationCode: 'Invitation Code',
  invitationCodePlaceholder: 'Your invitation code',
  interfaceLanguage: 'Interface Language',
--- a/web/i18n/zh-Hans/app-overview.ts
+++ b/web/i18n/zh-Hans/app-overview.ts
@ -106,7 +106,7 @@ const translation = {
          step2Tip: '点击此处将仓库导入到 Vercel 中部署',
          step2Operation: '导入仓库',
          step3: '配置环境变量',
-          step3Tip: '在 Vercel 环境变量中添加以下环境变量',
+          step3Tip: '在 Vecel 环境变量中添加以下环境变量',
        },
        way2: {
          name: '编写客户端调用 API 并部署到服务器中',
--- a/web/i18n/zh-Hant/app-overview.ts
+++ b/web/i18n/zh-Hant/app-overview.ts
@ -105,7 +105,7 @@ const translation = {
          step2Tip: '點選此處將倉庫匯入到 Vercel 中部署',
          step2Operation: '匯入倉庫',
          step3: '配置環境變數',
-          step3Tip: '在 Vercel 環境變數中新增以下環境變數',
+          step3Tip: '在 Vecel 環境變數中新增以下環境變數',
        },
        way2: {
          name: '編寫客戶端呼叫 API 並部署到伺服器中',
--- a/web/package.json
+++ b/web/package.json
@ -1,6 +1,6 @@
 {
  "name": "dify-web",
-  "version": "1.1.1",
+  "version": "1.1.0",
  "private": true,
  "engines": {
    "node": ">=18.17.0"
--- a/web/service/use-plugins.ts
+++ b/web/service/use-plugins.ts
@ -187,8 +187,7 @@ export const useInstallOrUpdate = ({
          if (item.type === 'github') {
            const data = item as GitHubItemAndMarketPlaceDependency
            // From local bundle don't have data.value.github_plugin_unique_identifier
-            uniqueIdentifier = data.value.github_plugin_unique_identifier!
-            if (!uniqueIdentifier) {
+            if (!data.value.github_plugin_unique_identifier) {
              const { unique_identifier } = await post<uploadGitHubResponse>('/workspaces/current/plugin/upload/github', {
                body: {
                  repo: data.value.repo!,
Author	SHA1	Message	Date
Novice	abe34a71f7	fix: question classify can't use	2025-03-20 09:08:51 +08:00
Novice	e544541926	Merge branch 'main' into feat/structured-output	2025-03-19 13:40:58 +08:00
Novice	0dcbdfcb8d	feat: structured output	2025-03-19 13:39:40 +08:00