From d7c0bc8c2399fb7752ff3a20b29bdb2794cf9b98 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Giovanny=20Guti=C3=A9rrez?= <giovanny.gutierrez@commure.com>
Date: Wed, 1 Jan 2025 20:59:34 -0500
Subject: [PATCH] feat: Add response format support for openai compat models
 (#12240)

Co-authored-by: Gio Gutierrez <giovannygutierrez@gmail.com>
---
 .../model_providers/groq/llm/gemma-7b-it.yaml   | 12 ++++++++++++
 .../model_providers/groq/llm/gemma2-9b-it.yaml  | 12 ++++++++++++
 .../groq/llm/llama-3.1-405b-reasoning.yaml      | 12 ++++++++++++
 .../groq/llm/llama-3.1-70b-versatile.yaml       | 13 +++++++++++++
 .../groq/llm/llama-3.1-8b-instant.yaml          | 13 +++++++++++++
 .../groq/llm/llama-3.2-11b-text-preview.yaml    | 12 ++++++++++++
 .../groq/llm/llama-3.2-11b-vision-preview.yaml  | 12 ++++++++++++
 .../groq/llm/llama-3.2-1b-preview.yaml          | 12 ++++++++++++
 .../groq/llm/llama-3.2-3b-preview.yaml          | 12 ++++++++++++
 .../groq/llm/llama-3.2-90b-text-preview.yaml    | 12 ++++++++++++
 .../groq/llm/llama-3.2-90b-vision-preview.yaml  | 12 ++++++++++++
 .../groq/llm/llama-3.3-70b-specdec.yaml         | 13 +++++++++++++
 .../groq/llm/llama-3.3-70b-versatile.yaml       | 13 +++++++++++++
 .../groq/llm/llama-guard-3-8b.yaml              | 12 ++++++++++++
 .../groq/llm/llama2-70b-4096.yaml               | 12 ++++++++++++
 .../groq/llm/llama3-70b-8192.yaml               | 12 ++++++++++++
 .../groq/llm/llama3-8b-8192.yaml                | 13 +++++++++++++
 .../llama3-groq-70b-8192-tool-use-preview.yaml  | 13 +++++++++++++
 .../model_providers/openai/llm/gpt-4o.yaml      |  3 +++
 .../openai_api_compatible/llm/llm.py            | 17 +++++++++++++++++
 20 files changed, 242 insertions(+)

diff --git a/api/core/model_runtime/model_providers/groq/llm/gemma-7b-it.yaml b/api/core/model_runtime/model_providers/groq/llm/gemma-7b-it.yaml
index 02f84e95f6..157baaf315 100644
--- a/api/core/model_runtime/model_providers/groq/llm/gemma-7b-it.yaml
+++ b/api/core/model_runtime/model_providers/groq/llm/gemma-7b-it.yaml
@@ -18,6 +18,18 @@ parameter_rules:
     default: 512
     min: 1
     max: 8192
+  - name: response_format
+    label:
+      zh_Hans: 回复格式
+      en_US: Response Format
+    type: string
+    help:
+      zh_Hans: 指定模型必须输出的格式
+      en_US: specifying the format that the model must output
+    required: false
+    options:
+      - text
+      - json_object
 pricing:
   input: '0.05'
   output: '0.1'
diff --git a/api/core/model_runtime/model_providers/groq/llm/gemma2-9b-it.yaml b/api/core/model_runtime/model_providers/groq/llm/gemma2-9b-it.yaml
index dad496f668..d0294ac6aa 100644
--- a/api/core/model_runtime/model_providers/groq/llm/gemma2-9b-it.yaml
+++ b/api/core/model_runtime/model_providers/groq/llm/gemma2-9b-it.yaml
@@ -18,6 +18,18 @@ parameter_rules:
     default: 512
     min: 1
     max: 8192
+  - name: response_format
+    label:
+      zh_Hans: 回复格式
+      en_US: Response Format
+    type: string
+    help:
+      zh_Hans: 指定模型必须输出的格式
+      en_US: specifying the format that the model must output
+    required: false
+    options:
+      - text
+      - json_object
 pricing:
   input: '0.05'
   output: '0.1'
diff --git a/api/core/model_runtime/model_providers/groq/llm/llama-3.1-405b-reasoning.yaml b/api/core/model_runtime/model_providers/groq/llm/llama-3.1-405b-reasoning.yaml
index 217785cea2..3cbce0c053 100644
--- a/api/core/model_runtime/model_providers/groq/llm/llama-3.1-405b-reasoning.yaml
+++ b/api/core/model_runtime/model_providers/groq/llm/llama-3.1-405b-reasoning.yaml
@@ -18,6 +18,18 @@ parameter_rules:
     default: 512
     min: 1
     max: 8192
+  - name: response_format
+    label:
+      zh_Hans: 回复格式
+      en_US: Response Format
+    type: string
+    help:
+      zh_Hans: 指定模型必须输出的格式
+      en_US: specifying the format that the model must output
+    required: false
+    options:
+      - text
+      - json_object
 pricing:
   input: '0.05'
   output: '0.1'
diff --git a/api/core/model_runtime/model_providers/groq/llm/llama-3.1-70b-versatile.yaml b/api/core/model_runtime/model_providers/groq/llm/llama-3.1-70b-versatile.yaml
index 01323a1b8a..07a0187e47 100644
--- a/api/core/model_runtime/model_providers/groq/llm/llama-3.1-70b-versatile.yaml
+++ b/api/core/model_runtime/model_providers/groq/llm/llama-3.1-70b-versatile.yaml
@@ -6,6 +6,7 @@ label:
 model_type: llm
 features:
   - agent-thought
+  - multi-tool-call
 model_properties:
   mode: chat
   context_size: 131072
@@ -19,6 +20,18 @@ parameter_rules:
     default: 512
     min: 1
     max: 8192
+  - name: response_format
+    label:
+      zh_Hans: 回复格式
+      en_US: Response Format
+    type: string
+    help:
+      zh_Hans: 指定模型必须输出的格式
+      en_US: specifying the format that the model must output
+    required: false
+    options:
+      - text
+      - json_object
 pricing:
   input: '0.05'
   output: '0.1'
diff --git a/api/core/model_runtime/model_providers/groq/llm/llama-3.1-8b-instant.yaml b/api/core/model_runtime/model_providers/groq/llm/llama-3.1-8b-instant.yaml
index a82e64532e..04eae49b96 100644
--- a/api/core/model_runtime/model_providers/groq/llm/llama-3.1-8b-instant.yaml
+++ b/api/core/model_runtime/model_providers/groq/llm/llama-3.1-8b-instant.yaml
@@ -5,6 +5,7 @@ label:
 model_type: llm
 features:
   - agent-thought
+  - multi-tool-call
 model_properties:
   mode: chat
   context_size: 131072
@@ -18,6 +19,18 @@ parameter_rules:
     default: 512
     min: 1
     max: 8192
+  - name: response_format
+    label:
+      zh_Hans: 回复格式
+      en_US: Response Format
+    type: string
+    help:
+      zh_Hans: 指定模型必须输出的格式
+      en_US: specifying the format that the model must output
+    required: false
+    options:
+      - text
+      - json_object
 pricing:
   input: '0.05'
   output: '0.1'
diff --git a/api/core/model_runtime/model_providers/groq/llm/llama-3.2-11b-text-preview.yaml b/api/core/model_runtime/model_providers/groq/llm/llama-3.2-11b-text-preview.yaml
index 3f30d81ae4..e6eadeb072 100644
--- a/api/core/model_runtime/model_providers/groq/llm/llama-3.2-11b-text-preview.yaml
+++ b/api/core/model_runtime/model_providers/groq/llm/llama-3.2-11b-text-preview.yaml
@@ -19,6 +19,18 @@ parameter_rules:
     default: 512
     min: 1
     max: 8192
+  - name: response_format
+    label:
+      zh_Hans: 回复格式
+      en_US: Response Format
+    type: string
+    help:
+      zh_Hans: 指定模型必须输出的格式
+      en_US: specifying the format that the model must output
+    required: false
+    options:
+      - text
+      - json_object
 pricing:
   input: '0.05'
   output: '0.1'
diff --git a/api/core/model_runtime/model_providers/groq/llm/llama-3.2-11b-vision-preview.yaml b/api/core/model_runtime/model_providers/groq/llm/llama-3.2-11b-vision-preview.yaml
index 5632218797..241a7bed10 100644
--- a/api/core/model_runtime/model_providers/groq/llm/llama-3.2-11b-vision-preview.yaml
+++ b/api/core/model_runtime/model_providers/groq/llm/llama-3.2-11b-vision-preview.yaml
@@ -19,6 +19,18 @@ parameter_rules:
     default: 512
     min: 1
     max: 8192
+  - name: response_format
+    label:
+      zh_Hans: 回复格式
+      en_US: Response Format
+    type: string
+    help:
+      zh_Hans: 指定模型必须输出的格式
+      en_US: specifying the format that the model must output
+    required: false
+    options:
+      - text
+      - json_object
 pricing:
   input: '0.05'
   output: '0.1'
diff --git a/api/core/model_runtime/model_providers/groq/llm/llama-3.2-1b-preview.yaml b/api/core/model_runtime/model_providers/groq/llm/llama-3.2-1b-preview.yaml
index a44e4ff508..a6087d3443 100644
--- a/api/core/model_runtime/model_providers/groq/llm/llama-3.2-1b-preview.yaml
+++ b/api/core/model_runtime/model_providers/groq/llm/llama-3.2-1b-preview.yaml
@@ -18,6 +18,18 @@ parameter_rules:
     default: 512
     min: 1
     max: 8192
+  - name: response_format
+    label:
+      zh_Hans: 回复格式
+      en_US: Response Format
+    type: string
+    help:
+      zh_Hans: 指定模型必须输出的格式
+      en_US: specifying the format that the model must output
+    required: false
+    options:
+      - text
+      - json_object
 pricing:
   input: '0.05'
   output: '0.1'
diff --git a/api/core/model_runtime/model_providers/groq/llm/llama-3.2-3b-preview.yaml b/api/core/model_runtime/model_providers/groq/llm/llama-3.2-3b-preview.yaml
index f2fdd0a05e..93a8127ec6 100644
--- a/api/core/model_runtime/model_providers/groq/llm/llama-3.2-3b-preview.yaml
+++ b/api/core/model_runtime/model_providers/groq/llm/llama-3.2-3b-preview.yaml
@@ -18,6 +18,18 @@ parameter_rules:
     default: 512
     min: 1
     max: 8192
+  - name: response_format
+    label:
+      zh_Hans: 回复格式
+      en_US: Response Format
+    type: string
+    help:
+      zh_Hans: 指定模型必须输出的格式
+      en_US: specifying the format that the model must output
+    required: false
+    options:
+      - text
+      - json_object
 pricing:
   input: '0.05'
   output: '0.1'
diff --git a/api/core/model_runtime/model_providers/groq/llm/llama-3.2-90b-text-preview.yaml b/api/core/model_runtime/model_providers/groq/llm/llama-3.2-90b-text-preview.yaml
index 0391a7c890..f9361bff62 100644
--- a/api/core/model_runtime/model_providers/groq/llm/llama-3.2-90b-text-preview.yaml
+++ b/api/core/model_runtime/model_providers/groq/llm/llama-3.2-90b-text-preview.yaml
@@ -19,6 +19,18 @@ parameter_rules:
     default: 512
     min: 1
     max: 8192
+  - name: response_format
+    label:
+      zh_Hans: 回复格式
+      en_US: Response Format
+    type: string
+    help:
+      zh_Hans: 指定模型必须输出的格式
+      en_US: specifying the format that the model must output
+    required: false
+    options:
+      - text
+      - json_object
 pricing:
   input: '0.05'
   output: '0.1'
diff --git a/api/core/model_runtime/model_providers/groq/llm/llama-3.2-90b-vision-preview.yaml b/api/core/model_runtime/model_providers/groq/llm/llama-3.2-90b-vision-preview.yaml
index e7b93101e8..145b457924 100644
--- a/api/core/model_runtime/model_providers/groq/llm/llama-3.2-90b-vision-preview.yaml
+++ b/api/core/model_runtime/model_providers/groq/llm/llama-3.2-90b-vision-preview.yaml
@@ -19,6 +19,18 @@ parameter_rules:
     default: 512
     min: 1
     max: 8192
+  - name: response_format
+    label:
+      zh_Hans: 回复格式
+      en_US: Response Format
+    type: string
+    help:
+      zh_Hans: 指定模型必须输出的格式
+      en_US: specifying the format that the model must output
+    required: false
+    options:
+      - text
+      - json_object
 pricing:
   input: '0.05'
   output: '0.1'
diff --git a/api/core/model_runtime/model_providers/groq/llm/llama-3.3-70b-specdec.yaml b/api/core/model_runtime/model_providers/groq/llm/llama-3.3-70b-specdec.yaml
index bda9ec530a..916dfee391 100644
--- a/api/core/model_runtime/model_providers/groq/llm/llama-3.3-70b-specdec.yaml
+++ b/api/core/model_runtime/model_providers/groq/llm/llama-3.3-70b-specdec.yaml
@@ -5,6 +5,7 @@ label:
 model_type: llm
 features:
   - agent-thought
+  - multi-tool-call
 model_properties:
   mode: chat
   context_size: 131072
@@ -18,6 +19,18 @@ parameter_rules:
     default: 1024
     min: 1
     max: 32768
+  - name: response_format
+    label:
+      zh_Hans: 回复格式
+      en_US: Response Format
+    type: string
+    help:
+      zh_Hans: 指定模型必须输出的格式
+      en_US: specifying the format that the model must output
+    required: false
+    options:
+      - text
+      - json_object
 pricing:
   input: "0.05"
   output: "0.1"
diff --git a/api/core/model_runtime/model_providers/groq/llm/llama-3.3-70b-versatile.yaml b/api/core/model_runtime/model_providers/groq/llm/llama-3.3-70b-versatile.yaml
index eb609f4db7..a5de4e752f 100644
--- a/api/core/model_runtime/model_providers/groq/llm/llama-3.3-70b-versatile.yaml
+++ b/api/core/model_runtime/model_providers/groq/llm/llama-3.3-70b-versatile.yaml
@@ -5,6 +5,7 @@ label:
 model_type: llm
 features:
   - agent-thought
+  - multi-tool-call
 model_properties:
   mode: chat
   context_size: 131072
@@ -18,6 +19,18 @@ parameter_rules:
     default: 1024
     min: 1
     max: 32768
+  - name: response_format
+    label:
+      zh_Hans: 回复格式
+      en_US: Response Format
+    type: string
+    help:
+      zh_Hans: 指定模型必须输出的格式
+      en_US: specifying the format that the model must output
+    required: false
+    options:
+      - text
+      - json_object
 pricing:
   input: "0.05"
   output: "0.1"
diff --git a/api/core/model_runtime/model_providers/groq/llm/llama-guard-3-8b.yaml b/api/core/model_runtime/model_providers/groq/llm/llama-guard-3-8b.yaml
index 03779ccc66..bd8e5d2a3a 100644
--- a/api/core/model_runtime/model_providers/groq/llm/llama-guard-3-8b.yaml
+++ b/api/core/model_runtime/model_providers/groq/llm/llama-guard-3-8b.yaml
@@ -18,6 +18,18 @@ parameter_rules:
     default: 512
     min: 1
     max: 8192
+  - name: response_format
+    label:
+      zh_Hans: 回复格式
+      en_US: Response Format
+    type: string
+    help:
+      zh_Hans: 指定模型必须输出的格式
+      en_US: specifying the format that the model must output
+    required: false
+    options:
+      - text
+      - json_object
 pricing:
   input: '0.20'
   output: '0.20'
diff --git a/api/core/model_runtime/model_providers/groq/llm/llama2-70b-4096.yaml b/api/core/model_runtime/model_providers/groq/llm/llama2-70b-4096.yaml
index 384912b0dd..6e7ffd7a94 100644
--- a/api/core/model_runtime/model_providers/groq/llm/llama2-70b-4096.yaml
+++ b/api/core/model_runtime/model_providers/groq/llm/llama2-70b-4096.yaml
@@ -18,6 +18,18 @@ parameter_rules:
     default: 512
     min: 1
     max: 4096
+  - name: response_format
+    label:
+      zh_Hans: 回复格式
+      en_US: Response Format
+    type: string
+    help:
+      zh_Hans: 指定模型必须输出的格式
+      en_US: specifying the format that the model must output
+    required: false
+    options:
+      - text
+      - json_object
 pricing:
   input: '0.7'
   output: '0.8'
diff --git a/api/core/model_runtime/model_providers/groq/llm/llama3-70b-8192.yaml b/api/core/model_runtime/model_providers/groq/llm/llama3-70b-8192.yaml
index 91d0e30765..2c25bb7433 100644
--- a/api/core/model_runtime/model_providers/groq/llm/llama3-70b-8192.yaml
+++ b/api/core/model_runtime/model_providers/groq/llm/llama3-70b-8192.yaml
@@ -18,6 +18,18 @@ parameter_rules:
     default: 512
     min: 1
     max: 8192
+  - name: response_format
+    label:
+      zh_Hans: 回复格式
+      en_US: Response Format
+    type: string
+    help:
+      zh_Hans: 指定模型必须输出的格式
+      en_US: specifying the format that the model must output
+    required: false
+    options:
+      - text
+      - json_object
 pricing:
   input: '0.59'
   output: '0.79'
diff --git a/api/core/model_runtime/model_providers/groq/llm/llama3-8b-8192.yaml b/api/core/model_runtime/model_providers/groq/llm/llama3-8b-8192.yaml
index b6154f761f..d8a708eaf4 100644
--- a/api/core/model_runtime/model_providers/groq/llm/llama3-8b-8192.yaml
+++ b/api/core/model_runtime/model_providers/groq/llm/llama3-8b-8192.yaml
@@ -5,6 +5,7 @@ label:
 model_type: llm
 features:
   - agent-thought
+  - multi-tool-call
 model_properties:
   mode: chat
   context_size: 8192
@@ -18,6 +19,18 @@ parameter_rules:
     default: 512
     min: 1
     max: 8192
+  - name: response_format
+    label:
+      zh_Hans: 回复格式
+      en_US: Response Format
+    type: string
+    help:
+      zh_Hans: 指定模型必须输出的格式
+      en_US: specifying the format that the model must output
+    required: false
+    options:
+      - text
+      - json_object
 pricing:
   input: '0.05'
   output: '0.08'
diff --git a/api/core/model_runtime/model_providers/groq/llm/llama3-groq-70b-8192-tool-use-preview.yaml b/api/core/model_runtime/model_providers/groq/llm/llama3-groq-70b-8192-tool-use-preview.yaml
index 32ccbf1f4d..61c83c980c 100644
--- a/api/core/model_runtime/model_providers/groq/llm/llama3-groq-70b-8192-tool-use-preview.yaml
+++ b/api/core/model_runtime/model_providers/groq/llm/llama3-groq-70b-8192-tool-use-preview.yaml
@@ -5,6 +5,7 @@ label:
 model_type: llm
 features:
   - agent-thought
+  - multi-tool-call
 model_properties:
   mode: chat
   context_size: 8192
@@ -18,6 +19,18 @@ parameter_rules:
     default: 512
     min: 1
     max: 8192
+  - name: response_format
+    label:
+      zh_Hans: 回复格式
+      en_US: Response Format
+    type: string
+    help:
+      zh_Hans: 指定模型必须输出的格式
+      en_US: specifying the format that the model must output
+    required: false
+    options:
+      - text
+      - json_object
 pricing:
   input: '0.05'
   output: '0.08'
diff --git a/api/core/model_runtime/model_providers/openai/llm/gpt-4o.yaml b/api/core/model_runtime/model_providers/openai/llm/gpt-4o.yaml
index a4681fe18d..d6be36ad74 100644
--- a/api/core/model_runtime/model_providers/openai/llm/gpt-4o.yaml
+++ b/api/core/model_runtime/model_providers/openai/llm/gpt-4o.yaml
@@ -37,6 +37,9 @@ parameter_rules:
     options:
       - text
       - json_object
+      - json_schema
+  - name: json_schema
+    use_template: json_schema
 pricing:
   input: '2.50'
   output: '10.00'
diff --git a/api/core/model_runtime/model_providers/openai_api_compatible/llm/llm.py b/api/core/model_runtime/model_providers/openai_api_compatible/llm/llm.py
index 8e07d56f45..5b0ee7aae3 100644
--- a/api/core/model_runtime/model_providers/openai_api_compatible/llm/llm.py
+++ b/api/core/model_runtime/model_providers/openai_api_compatible/llm/llm.py
@@ -332,6 +332,23 @@ class OAIAPICompatLargeLanguageModel(_CommonOaiApiCompat, LargeLanguageModel):
         if not endpoint_url.endswith("/"):
             endpoint_url += "/"
 
+        response_format = model_parameters.get("response_format")
+        if response_format:
+            if response_format == "json_schema":
+                json_schema = model_parameters.get("json_schema")
+                if not json_schema:
+                    raise ValueError("Must define JSON Schema when the response format is json_schema")
+                try:
+                    schema = json.loads(json_schema)
+                except:
+                    raise ValueError(f"not correct json_schema format: {json_schema}")
+                model_parameters.pop("json_schema")
+                model_parameters["response_format"] = {"type": "json_schema", "json_schema": schema}
+            else:
+                model_parameters["response_format"] = {"type": response_format}
+        elif "json_schema" in model_parameters:
+            del model_parameters["json_schema"]
+
         data = {"model": model, "stream": stream, **model_parameters}
 
         completion_type = LLMMode.value_of(credentials["mode"])