From d7c0bc8c2399fb7752ff3a20b29bdb2794cf9b98 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Giovanny=20Guti=C3=A9rrez?= Date: Wed, 1 Jan 2025 20:59:34 -0500 Subject: [PATCH] feat: Add response format support for openai compat models (#12240) Co-authored-by: Gio Gutierrez --- .../model_providers/groq/llm/gemma-7b-it.yaml | 12 ++++++++++++ .../model_providers/groq/llm/gemma2-9b-it.yaml | 12 ++++++++++++ .../groq/llm/llama-3.1-405b-reasoning.yaml | 12 ++++++++++++ .../groq/llm/llama-3.1-70b-versatile.yaml | 13 +++++++++++++ .../groq/llm/llama-3.1-8b-instant.yaml | 13 +++++++++++++ .../groq/llm/llama-3.2-11b-text-preview.yaml | 12 ++++++++++++ .../groq/llm/llama-3.2-11b-vision-preview.yaml | 12 ++++++++++++ .../groq/llm/llama-3.2-1b-preview.yaml | 12 ++++++++++++ .../groq/llm/llama-3.2-3b-preview.yaml | 12 ++++++++++++ .../groq/llm/llama-3.2-90b-text-preview.yaml | 12 ++++++++++++ .../groq/llm/llama-3.2-90b-vision-preview.yaml | 12 ++++++++++++ .../groq/llm/llama-3.3-70b-specdec.yaml | 13 +++++++++++++ .../groq/llm/llama-3.3-70b-versatile.yaml | 13 +++++++++++++ .../groq/llm/llama-guard-3-8b.yaml | 12 ++++++++++++ .../groq/llm/llama2-70b-4096.yaml | 12 ++++++++++++ .../groq/llm/llama3-70b-8192.yaml | 12 ++++++++++++ .../groq/llm/llama3-8b-8192.yaml | 13 +++++++++++++ .../llama3-groq-70b-8192-tool-use-preview.yaml | 13 +++++++++++++ .../model_providers/openai/llm/gpt-4o.yaml | 3 +++ .../openai_api_compatible/llm/llm.py | 17 +++++++++++++++++ 20 files changed, 242 insertions(+) diff --git a/api/core/model_runtime/model_providers/groq/llm/gemma-7b-it.yaml b/api/core/model_runtime/model_providers/groq/llm/gemma-7b-it.yaml index 02f84e95f6..157baaf315 100644 --- a/api/core/model_runtime/model_providers/groq/llm/gemma-7b-it.yaml +++ b/api/core/model_runtime/model_providers/groq/llm/gemma-7b-it.yaml @@ -18,6 +18,18 @@ parameter_rules: default: 512 min: 1 max: 8192 + - name: response_format + label: + zh_Hans: 回复格式 + en_US: Response Format + type: string + help: + zh_Hans: 指定模型必须输出的格式 + en_US: specifying the format that the model must output + required: false + options: + - text + - json_object pricing: input: '0.05' output: '0.1' diff --git a/api/core/model_runtime/model_providers/groq/llm/gemma2-9b-it.yaml b/api/core/model_runtime/model_providers/groq/llm/gemma2-9b-it.yaml index dad496f668..d0294ac6aa 100644 --- a/api/core/model_runtime/model_providers/groq/llm/gemma2-9b-it.yaml +++ b/api/core/model_runtime/model_providers/groq/llm/gemma2-9b-it.yaml @@ -18,6 +18,18 @@ parameter_rules: default: 512 min: 1 max: 8192 + - name: response_format + label: + zh_Hans: 回复格式 + en_US: Response Format + type: string + help: + zh_Hans: 指定模型必须输出的格式 + en_US: specifying the format that the model must output + required: false + options: + - text + - json_object pricing: input: '0.05' output: '0.1' diff --git a/api/core/model_runtime/model_providers/groq/llm/llama-3.1-405b-reasoning.yaml b/api/core/model_runtime/model_providers/groq/llm/llama-3.1-405b-reasoning.yaml index 217785cea2..3cbce0c053 100644 --- a/api/core/model_runtime/model_providers/groq/llm/llama-3.1-405b-reasoning.yaml +++ b/api/core/model_runtime/model_providers/groq/llm/llama-3.1-405b-reasoning.yaml @@ -18,6 +18,18 @@ parameter_rules: default: 512 min: 1 max: 8192 + - name: response_format + label: + zh_Hans: 回复格式 + en_US: Response Format + type: string + help: + zh_Hans: 指定模型必须输出的格式 + en_US: specifying the format that the model must output + required: false + options: + - text + - json_object pricing: input: '0.05' output: '0.1' diff --git a/api/core/model_runtime/model_providers/groq/llm/llama-3.1-70b-versatile.yaml b/api/core/model_runtime/model_providers/groq/llm/llama-3.1-70b-versatile.yaml index 01323a1b8a..07a0187e47 100644 --- a/api/core/model_runtime/model_providers/groq/llm/llama-3.1-70b-versatile.yaml +++ b/api/core/model_runtime/model_providers/groq/llm/llama-3.1-70b-versatile.yaml @@ -6,6 +6,7 @@ label: model_type: llm features: - agent-thought + - multi-tool-call model_properties: mode: chat context_size: 131072 @@ -19,6 +20,18 @@ parameter_rules: default: 512 min: 1 max: 8192 + - name: response_format + label: + zh_Hans: 回复格式 + en_US: Response Format + type: string + help: + zh_Hans: 指定模型必须输出的格式 + en_US: specifying the format that the model must output + required: false + options: + - text + - json_object pricing: input: '0.05' output: '0.1' diff --git a/api/core/model_runtime/model_providers/groq/llm/llama-3.1-8b-instant.yaml b/api/core/model_runtime/model_providers/groq/llm/llama-3.1-8b-instant.yaml index a82e64532e..04eae49b96 100644 --- a/api/core/model_runtime/model_providers/groq/llm/llama-3.1-8b-instant.yaml +++ b/api/core/model_runtime/model_providers/groq/llm/llama-3.1-8b-instant.yaml @@ -5,6 +5,7 @@ label: model_type: llm features: - agent-thought + - multi-tool-call model_properties: mode: chat context_size: 131072 @@ -18,6 +19,18 @@ parameter_rules: default: 512 min: 1 max: 8192 + - name: response_format + label: + zh_Hans: 回复格式 + en_US: Response Format + type: string + help: + zh_Hans: 指定模型必须输出的格式 + en_US: specifying the format that the model must output + required: false + options: + - text + - json_object pricing: input: '0.05' output: '0.1' diff --git a/api/core/model_runtime/model_providers/groq/llm/llama-3.2-11b-text-preview.yaml b/api/core/model_runtime/model_providers/groq/llm/llama-3.2-11b-text-preview.yaml index 3f30d81ae4..e6eadeb072 100644 --- a/api/core/model_runtime/model_providers/groq/llm/llama-3.2-11b-text-preview.yaml +++ b/api/core/model_runtime/model_providers/groq/llm/llama-3.2-11b-text-preview.yaml @@ -19,6 +19,18 @@ parameter_rules: default: 512 min: 1 max: 8192 + - name: response_format + label: + zh_Hans: 回复格式 + en_US: Response Format + type: string + help: + zh_Hans: 指定模型必须输出的格式 + en_US: specifying the format that the model must output + required: false + options: + - text + - json_object pricing: input: '0.05' output: '0.1' diff --git a/api/core/model_runtime/model_providers/groq/llm/llama-3.2-11b-vision-preview.yaml b/api/core/model_runtime/model_providers/groq/llm/llama-3.2-11b-vision-preview.yaml index 5632218797..241a7bed10 100644 --- a/api/core/model_runtime/model_providers/groq/llm/llama-3.2-11b-vision-preview.yaml +++ b/api/core/model_runtime/model_providers/groq/llm/llama-3.2-11b-vision-preview.yaml @@ -19,6 +19,18 @@ parameter_rules: default: 512 min: 1 max: 8192 + - name: response_format + label: + zh_Hans: 回复格式 + en_US: Response Format + type: string + help: + zh_Hans: 指定模型必须输出的格式 + en_US: specifying the format that the model must output + required: false + options: + - text + - json_object pricing: input: '0.05' output: '0.1' diff --git a/api/core/model_runtime/model_providers/groq/llm/llama-3.2-1b-preview.yaml b/api/core/model_runtime/model_providers/groq/llm/llama-3.2-1b-preview.yaml index a44e4ff508..a6087d3443 100644 --- a/api/core/model_runtime/model_providers/groq/llm/llama-3.2-1b-preview.yaml +++ b/api/core/model_runtime/model_providers/groq/llm/llama-3.2-1b-preview.yaml @@ -18,6 +18,18 @@ parameter_rules: default: 512 min: 1 max: 8192 + - name: response_format + label: + zh_Hans: 回复格式 + en_US: Response Format + type: string + help: + zh_Hans: 指定模型必须输出的格式 + en_US: specifying the format that the model must output + required: false + options: + - text + - json_object pricing: input: '0.05' output: '0.1' diff --git a/api/core/model_runtime/model_providers/groq/llm/llama-3.2-3b-preview.yaml b/api/core/model_runtime/model_providers/groq/llm/llama-3.2-3b-preview.yaml index f2fdd0a05e..93a8127ec6 100644 --- a/api/core/model_runtime/model_providers/groq/llm/llama-3.2-3b-preview.yaml +++ b/api/core/model_runtime/model_providers/groq/llm/llama-3.2-3b-preview.yaml @@ -18,6 +18,18 @@ parameter_rules: default: 512 min: 1 max: 8192 + - name: response_format + label: + zh_Hans: 回复格式 + en_US: Response Format + type: string + help: + zh_Hans: 指定模型必须输出的格式 + en_US: specifying the format that the model must output + required: false + options: + - text + - json_object pricing: input: '0.05' output: '0.1' diff --git a/api/core/model_runtime/model_providers/groq/llm/llama-3.2-90b-text-preview.yaml b/api/core/model_runtime/model_providers/groq/llm/llama-3.2-90b-text-preview.yaml index 0391a7c890..f9361bff62 100644 --- a/api/core/model_runtime/model_providers/groq/llm/llama-3.2-90b-text-preview.yaml +++ b/api/core/model_runtime/model_providers/groq/llm/llama-3.2-90b-text-preview.yaml @@ -19,6 +19,18 @@ parameter_rules: default: 512 min: 1 max: 8192 + - name: response_format + label: + zh_Hans: 回复格式 + en_US: Response Format + type: string + help: + zh_Hans: 指定模型必须输出的格式 + en_US: specifying the format that the model must output + required: false + options: + - text + - json_object pricing: input: '0.05' output: '0.1' diff --git a/api/core/model_runtime/model_providers/groq/llm/llama-3.2-90b-vision-preview.yaml b/api/core/model_runtime/model_providers/groq/llm/llama-3.2-90b-vision-preview.yaml index e7b93101e8..145b457924 100644 --- a/api/core/model_runtime/model_providers/groq/llm/llama-3.2-90b-vision-preview.yaml +++ b/api/core/model_runtime/model_providers/groq/llm/llama-3.2-90b-vision-preview.yaml @@ -19,6 +19,18 @@ parameter_rules: default: 512 min: 1 max: 8192 + - name: response_format + label: + zh_Hans: 回复格式 + en_US: Response Format + type: string + help: + zh_Hans: 指定模型必须输出的格式 + en_US: specifying the format that the model must output + required: false + options: + - text + - json_object pricing: input: '0.05' output: '0.1' diff --git a/api/core/model_runtime/model_providers/groq/llm/llama-3.3-70b-specdec.yaml b/api/core/model_runtime/model_providers/groq/llm/llama-3.3-70b-specdec.yaml index bda9ec530a..916dfee391 100644 --- a/api/core/model_runtime/model_providers/groq/llm/llama-3.3-70b-specdec.yaml +++ b/api/core/model_runtime/model_providers/groq/llm/llama-3.3-70b-specdec.yaml @@ -5,6 +5,7 @@ label: model_type: llm features: - agent-thought + - multi-tool-call model_properties: mode: chat context_size: 131072 @@ -18,6 +19,18 @@ parameter_rules: default: 1024 min: 1 max: 32768 + - name: response_format + label: + zh_Hans: 回复格式 + en_US: Response Format + type: string + help: + zh_Hans: 指定模型必须输出的格式 + en_US: specifying the format that the model must output + required: false + options: + - text + - json_object pricing: input: "0.05" output: "0.1" diff --git a/api/core/model_runtime/model_providers/groq/llm/llama-3.3-70b-versatile.yaml b/api/core/model_runtime/model_providers/groq/llm/llama-3.3-70b-versatile.yaml index eb609f4db7..a5de4e752f 100644 --- a/api/core/model_runtime/model_providers/groq/llm/llama-3.3-70b-versatile.yaml +++ b/api/core/model_runtime/model_providers/groq/llm/llama-3.3-70b-versatile.yaml @@ -5,6 +5,7 @@ label: model_type: llm features: - agent-thought + - multi-tool-call model_properties: mode: chat context_size: 131072 @@ -18,6 +19,18 @@ parameter_rules: default: 1024 min: 1 max: 32768 + - name: response_format + label: + zh_Hans: 回复格式 + en_US: Response Format + type: string + help: + zh_Hans: 指定模型必须输出的格式 + en_US: specifying the format that the model must output + required: false + options: + - text + - json_object pricing: input: "0.05" output: "0.1" diff --git a/api/core/model_runtime/model_providers/groq/llm/llama-guard-3-8b.yaml b/api/core/model_runtime/model_providers/groq/llm/llama-guard-3-8b.yaml index 03779ccc66..bd8e5d2a3a 100644 --- a/api/core/model_runtime/model_providers/groq/llm/llama-guard-3-8b.yaml +++ b/api/core/model_runtime/model_providers/groq/llm/llama-guard-3-8b.yaml @@ -18,6 +18,18 @@ parameter_rules: default: 512 min: 1 max: 8192 + - name: response_format + label: + zh_Hans: 回复格式 + en_US: Response Format + type: string + help: + zh_Hans: 指定模型必须输出的格式 + en_US: specifying the format that the model must output + required: false + options: + - text + - json_object pricing: input: '0.20' output: '0.20' diff --git a/api/core/model_runtime/model_providers/groq/llm/llama2-70b-4096.yaml b/api/core/model_runtime/model_providers/groq/llm/llama2-70b-4096.yaml index 384912b0dd..6e7ffd7a94 100644 --- a/api/core/model_runtime/model_providers/groq/llm/llama2-70b-4096.yaml +++ b/api/core/model_runtime/model_providers/groq/llm/llama2-70b-4096.yaml @@ -18,6 +18,18 @@ parameter_rules: default: 512 min: 1 max: 4096 + - name: response_format + label: + zh_Hans: 回复格式 + en_US: Response Format + type: string + help: + zh_Hans: 指定模型必须输出的格式 + en_US: specifying the format that the model must output + required: false + options: + - text + - json_object pricing: input: '0.7' output: '0.8' diff --git a/api/core/model_runtime/model_providers/groq/llm/llama3-70b-8192.yaml b/api/core/model_runtime/model_providers/groq/llm/llama3-70b-8192.yaml index 91d0e30765..2c25bb7433 100644 --- a/api/core/model_runtime/model_providers/groq/llm/llama3-70b-8192.yaml +++ b/api/core/model_runtime/model_providers/groq/llm/llama3-70b-8192.yaml @@ -18,6 +18,18 @@ parameter_rules: default: 512 min: 1 max: 8192 + - name: response_format + label: + zh_Hans: 回复格式 + en_US: Response Format + type: string + help: + zh_Hans: 指定模型必须输出的格式 + en_US: specifying the format that the model must output + required: false + options: + - text + - json_object pricing: input: '0.59' output: '0.79' diff --git a/api/core/model_runtime/model_providers/groq/llm/llama3-8b-8192.yaml b/api/core/model_runtime/model_providers/groq/llm/llama3-8b-8192.yaml index b6154f761f..d8a708eaf4 100644 --- a/api/core/model_runtime/model_providers/groq/llm/llama3-8b-8192.yaml +++ b/api/core/model_runtime/model_providers/groq/llm/llama3-8b-8192.yaml @@ -5,6 +5,7 @@ label: model_type: llm features: - agent-thought + - multi-tool-call model_properties: mode: chat context_size: 8192 @@ -18,6 +19,18 @@ parameter_rules: default: 512 min: 1 max: 8192 + - name: response_format + label: + zh_Hans: 回复格式 + en_US: Response Format + type: string + help: + zh_Hans: 指定模型必须输出的格式 + en_US: specifying the format that the model must output + required: false + options: + - text + - json_object pricing: input: '0.05' output: '0.08' diff --git a/api/core/model_runtime/model_providers/groq/llm/llama3-groq-70b-8192-tool-use-preview.yaml b/api/core/model_runtime/model_providers/groq/llm/llama3-groq-70b-8192-tool-use-preview.yaml index 32ccbf1f4d..61c83c980c 100644 --- a/api/core/model_runtime/model_providers/groq/llm/llama3-groq-70b-8192-tool-use-preview.yaml +++ b/api/core/model_runtime/model_providers/groq/llm/llama3-groq-70b-8192-tool-use-preview.yaml @@ -5,6 +5,7 @@ label: model_type: llm features: - agent-thought + - multi-tool-call model_properties: mode: chat context_size: 8192 @@ -18,6 +19,18 @@ parameter_rules: default: 512 min: 1 max: 8192 + - name: response_format + label: + zh_Hans: 回复格式 + en_US: Response Format + type: string + help: + zh_Hans: 指定模型必须输出的格式 + en_US: specifying the format that the model must output + required: false + options: + - text + - json_object pricing: input: '0.05' output: '0.08' diff --git a/api/core/model_runtime/model_providers/openai/llm/gpt-4o.yaml b/api/core/model_runtime/model_providers/openai/llm/gpt-4o.yaml index a4681fe18d..d6be36ad74 100644 --- a/api/core/model_runtime/model_providers/openai/llm/gpt-4o.yaml +++ b/api/core/model_runtime/model_providers/openai/llm/gpt-4o.yaml @@ -37,6 +37,9 @@ parameter_rules: options: - text - json_object + - json_schema + - name: json_schema + use_template: json_schema pricing: input: '2.50' output: '10.00' diff --git a/api/core/model_runtime/model_providers/openai_api_compatible/llm/llm.py b/api/core/model_runtime/model_providers/openai_api_compatible/llm/llm.py index 8e07d56f45..5b0ee7aae3 100644 --- a/api/core/model_runtime/model_providers/openai_api_compatible/llm/llm.py +++ b/api/core/model_runtime/model_providers/openai_api_compatible/llm/llm.py @@ -332,6 +332,23 @@ class OAIAPICompatLargeLanguageModel(_CommonOaiApiCompat, LargeLanguageModel): if not endpoint_url.endswith("/"): endpoint_url += "/" + response_format = model_parameters.get("response_format") + if response_format: + if response_format == "json_schema": + json_schema = model_parameters.get("json_schema") + if not json_schema: + raise ValueError("Must define JSON Schema when the response format is json_schema") + try: + schema = json.loads(json_schema) + except: + raise ValueError(f"not correct json_schema format: {json_schema}") + model_parameters.pop("json_schema") + model_parameters["response_format"] = {"type": "json_schema", "json_schema": schema} + else: + model_parameters["response_format"] = {"type": response_format} + elif "json_schema" in model_parameters: + del model_parameters["json_schema"] + data = {"model": model, "stream": stream, **model_parameters} completion_type = LLMMode.value_of(credentials["mode"])