Merge branch 'main' into feat/knowledge-dark-mode

This commit is contained in:
twwu 2025-01-23 15:10:46 +08:00
commit 684f7188f4
18 changed files with 263 additions and 114 deletions

View File

@ -9,7 +9,7 @@ class PackagingInfo(BaseSettings):
CURRENT_VERSION: str = Field(
description="Dify version",
default="0.15.1",
default="0.15.2",
)
COMMIT_SHA: str = Field(

View File

@ -18,72 +18,93 @@ class ModelConfig(BaseModel):
configs: dict[str, ModelConfig] = {
"Doubao-1.5-vision-pro-32k": ModelConfig(
properties=ModelProperties(context_size=32768, max_tokens=12288, mode=LLMMode.CHAT),
features=[ModelFeature.AGENT_THOUGHT, ModelFeature.VISION],
),
"Doubao-1.5-pro-32k": ModelConfig(
properties=ModelProperties(context_size=32768, max_tokens=12288, mode=LLMMode.CHAT),
features=[ModelFeature.AGENT_THOUGHT],
),
"Doubao-1.5-lite-32k": ModelConfig(
properties=ModelProperties(context_size=32768, max_tokens=12288, mode=LLMMode.CHAT),
features=[ModelFeature.AGENT_THOUGHT],
),
"Doubao-1.5-pro-256k": ModelConfig(
properties=ModelProperties(context_size=262144, max_tokens=12288, mode=LLMMode.CHAT),
features=[ModelFeature.AGENT_THOUGHT],
),
"Doubao-vision-pro-32k": ModelConfig(
properties=ModelProperties(context_size=32768, max_tokens=4096, mode=LLMMode.CHAT),
features=[ModelFeature.VISION],
features=[ModelFeature.AGENT_THOUGHT, ModelFeature.VISION],
),
"Doubao-vision-lite-32k": ModelConfig(
properties=ModelProperties(context_size=32768, max_tokens=4096, mode=LLMMode.CHAT),
features=[ModelFeature.VISION],
features=[ModelFeature.AGENT_THOUGHT, ModelFeature.VISION],
),
"Doubao-pro-4k": ModelConfig(
properties=ModelProperties(context_size=4096, max_tokens=4096, mode=LLMMode.CHAT),
features=[ModelFeature.TOOL_CALL],
features=[ModelFeature.AGENT_THOUGHT, ModelFeature.TOOL_CALL],
),
"Doubao-lite-4k": ModelConfig(
properties=ModelProperties(context_size=4096, max_tokens=4096, mode=LLMMode.CHAT),
features=[ModelFeature.TOOL_CALL],
features=[ModelFeature.AGENT_THOUGHT, ModelFeature.TOOL_CALL],
),
"Doubao-pro-32k": ModelConfig(
properties=ModelProperties(context_size=32768, max_tokens=4096, mode=LLMMode.CHAT),
features=[ModelFeature.TOOL_CALL],
features=[ModelFeature.AGENT_THOUGHT, ModelFeature.TOOL_CALL],
),
"Doubao-lite-32k": ModelConfig(
properties=ModelProperties(context_size=32768, max_tokens=4096, mode=LLMMode.CHAT),
features=[ModelFeature.TOOL_CALL],
features=[ModelFeature.AGENT_THOUGHT, ModelFeature.TOOL_CALL],
),
"Doubao-pro-256k": ModelConfig(
properties=ModelProperties(context_size=262144, max_tokens=4096, mode=LLMMode.CHAT),
features=[],
features=[ModelFeature.AGENT_THOUGHT],
),
"Doubao-pro-128k": ModelConfig(
properties=ModelProperties(context_size=131072, max_tokens=4096, mode=LLMMode.CHAT),
features=[ModelFeature.TOOL_CALL],
features=[ModelFeature.AGENT_THOUGHT, ModelFeature.TOOL_CALL],
),
"Doubao-lite-128k": ModelConfig(
properties=ModelProperties(context_size=131072, max_tokens=4096, mode=LLMMode.CHAT), features=[]
properties=ModelProperties(context_size=131072, max_tokens=4096, mode=LLMMode.CHAT),
features=[ModelFeature.AGENT_THOUGHT],
),
"Skylark2-pro-4k": ModelConfig(
properties=ModelProperties(context_size=4096, max_tokens=4096, mode=LLMMode.CHAT), features=[]
properties=ModelProperties(context_size=4096, max_tokens=4096, mode=LLMMode.CHAT),
features=[ModelFeature.AGENT_THOUGHT],
),
"Llama3-8B": ModelConfig(
properties=ModelProperties(context_size=8192, max_tokens=8192, mode=LLMMode.CHAT), features=[]
properties=ModelProperties(context_size=8192, max_tokens=8192, mode=LLMMode.CHAT),
features=[ModelFeature.AGENT_THOUGHT],
),
"Llama3-70B": ModelConfig(
properties=ModelProperties(context_size=8192, max_tokens=8192, mode=LLMMode.CHAT), features=[]
properties=ModelProperties(context_size=8192, max_tokens=8192, mode=LLMMode.CHAT),
features=[ModelFeature.AGENT_THOUGHT],
),
"Moonshot-v1-8k": ModelConfig(
properties=ModelProperties(context_size=8192, max_tokens=4096, mode=LLMMode.CHAT),
features=[ModelFeature.TOOL_CALL],
features=[ModelFeature.AGENT_THOUGHT, ModelFeature.TOOL_CALL],
),
"Moonshot-v1-32k": ModelConfig(
properties=ModelProperties(context_size=32768, max_tokens=16384, mode=LLMMode.CHAT),
features=[ModelFeature.TOOL_CALL],
features=[ModelFeature.AGENT_THOUGHT, ModelFeature.TOOL_CALL],
),
"Moonshot-v1-128k": ModelConfig(
properties=ModelProperties(context_size=131072, max_tokens=65536, mode=LLMMode.CHAT),
features=[ModelFeature.TOOL_CALL],
features=[ModelFeature.AGENT_THOUGHT, ModelFeature.TOOL_CALL],
),
"GLM3-130B": ModelConfig(
properties=ModelProperties(context_size=8192, max_tokens=4096, mode=LLMMode.CHAT),
features=[ModelFeature.TOOL_CALL],
features=[ModelFeature.AGENT_THOUGHT, ModelFeature.TOOL_CALL],
),
"GLM3-130B-Fin": ModelConfig(
properties=ModelProperties(context_size=8192, max_tokens=4096, mode=LLMMode.CHAT),
features=[ModelFeature.TOOL_CALL],
features=[ModelFeature.AGENT_THOUGHT, ModelFeature.TOOL_CALL],
),
"Mistral-7B": ModelConfig(
properties=ModelProperties(context_size=8192, max_tokens=2048, mode=LLMMode.CHAT), features=[]
properties=ModelProperties(context_size=8192, max_tokens=2048, mode=LLMMode.CHAT),
features=[ModelFeature.AGENT_THOUGHT],
),
}

View File

@ -118,6 +118,30 @@ model_credential_schema:
type: select
required: true
options:
- label:
en_US: Doubao-1.5-vision-pro-32k
value: Doubao-1.5-vision-pro-32k
show_on:
- variable: __model_type
value: llm
- label:
en_US: Doubao-1.5-pro-32k
value: Doubao-1.5-pro-32k
show_on:
- variable: __model_type
value: llm
- label:
en_US: Doubao-1.5-lite-32k
value: Doubao-1.5-lite-32k
show_on:
- variable: __model_type
value: llm
- label:
en_US: Doubao-1.5-pro-256k
value: Doubao-1.5-pro-256k
show_on:
- variable: __model_type
value: llm
- label:
en_US: Doubao-vision-pro-32k
value: Doubao-vision-pro-32k

View File

@ -57,6 +57,11 @@ CREATE TABLE IF NOT EXISTS {table_name} (
) using heap;
"""
SQL_CREATE_INDEX = """
CREATE INDEX IF NOT EXISTS embedding_cosine_v1_idx ON {table_name}
USING hnsw (embedding vector_cosine_ops) WITH (m = 16, ef_construction = 64);
"""
class PGVector(BaseVector):
def __init__(self, collection_name: str, config: PGVectorConfig):
@ -205,7 +210,10 @@ class PGVector(BaseVector):
with self._get_cursor() as cur:
cur.execute("CREATE EXTENSION IF NOT EXISTS vector")
cur.execute(SQL_CREATE_TABLE.format(table_name=self.table_name, dimension=dimension))
# TODO: create index https://github.com/pgvector/pgvector?tab=readme-ov-file#indexing
# PG hnsw index only support 2000 dimension or less
# ref: https://github.com/pgvector/pgvector?tab=readme-ov-file#indexing
if dimension <= 2000:
cur.execute(SQL_CREATE_INDEX.format(table_name=self.table_name))
redis_client.set(collection_exist_cache_key, 1, ex=3600)

View File

@ -1,6 +1,6 @@
import json
import time
from typing import cast
from typing import Any, cast
import requests
@ -14,48 +14,47 @@ class FirecrawlApp:
if self.api_key is None and self.base_url == "https://api.firecrawl.dev":
raise ValueError("No API key provided")
def scrape_url(self, url, params=None) -> dict:
headers = {"Content-Type": "application/json", "Authorization": f"Bearer {self.api_key}"}
json_data = {"url": url}
def scrape_url(self, url, params=None) -> dict[str, Any]:
# Documentation: https://docs.firecrawl.dev/api-reference/endpoint/scrape
headers = self._prepare_headers()
json_data = {
"url": url,
"formats": ["markdown"],
"onlyMainContent": True,
"timeout": 30000,
}
if params:
json_data.update(params)
response = requests.post(f"{self.base_url}/v0/scrape", headers=headers, json=json_data)
response = self._post_request(f"{self.base_url}/v1/scrape", json_data, headers)
if response.status_code == 200:
response_data = response.json()
if response_data["success"] == True:
data = response_data["data"]
return {
"title": data.get("metadata").get("title"),
"description": data.get("metadata").get("description"),
"source_url": data.get("metadata").get("sourceURL"),
"markdown": data.get("markdown"),
}
else:
raise Exception(f"Failed to scrape URL. Error: {response_data['error']}")
elif response.status_code in {402, 409, 500}:
error_message = response.json().get("error", "Unknown error occurred")
raise Exception(f"Failed to scrape URL. Status code: {response.status_code}. Error: {error_message}")
data = response_data["data"]
return self._extract_common_fields(data)
elif response.status_code in {402, 409, 500, 429, 408}:
self._handle_error(response, "scrape URL")
return {} # Avoid additional exception after handling error
else:
raise Exception(f"Failed to scrape URL. Status code: {response.status_code}")
def crawl_url(self, url, params=None) -> str:
# Documentation: https://docs.firecrawl.dev/api-reference/endpoint/crawl-post
headers = self._prepare_headers()
json_data = {"url": url}
if params:
json_data.update(params)
response = self._post_request(f"{self.base_url}/v0/crawl", json_data, headers)
response = self._post_request(f"{self.base_url}/v1/crawl", json_data, headers)
if response.status_code == 200:
job_id = response.json().get("jobId")
# There's also another two fields in the response: "success" (bool) and "url" (str)
job_id = response.json().get("id")
return cast(str, job_id)
else:
self._handle_error(response, "start crawl job")
# FIXME: unreachable code for mypy
return "" # unreachable
def check_crawl_status(self, job_id) -> dict:
def check_crawl_status(self, job_id) -> dict[str, Any]:
headers = self._prepare_headers()
response = self._get_request(f"{self.base_url}/v0/crawl/status/{job_id}", headers)
response = self._get_request(f"{self.base_url}/v1/crawl/{job_id}", headers)
if response.status_code == 200:
crawl_status_response = response.json()
if crawl_status_response.get("status") == "completed":
@ -66,42 +65,48 @@ class FirecrawlApp:
url_data_list = []
for item in data:
if isinstance(item, dict) and "metadata" in item and "markdown" in item:
url_data = {
"title": item.get("metadata", {}).get("title"),
"description": item.get("metadata", {}).get("description"),
"source_url": item.get("metadata", {}).get("sourceURL"),
"markdown": item.get("markdown"),
}
url_data = self._extract_common_fields(item)
url_data_list.append(url_data)
if url_data_list:
file_key = "website_files/" + job_id + ".txt"
if storage.exists(file_key):
storage.delete(file_key)
storage.save(file_key, json.dumps(url_data_list).encode("utf-8"))
return {
"status": "completed",
"total": crawl_status_response.get("total"),
"current": crawl_status_response.get("current"),
"data": url_data_list,
}
try:
if storage.exists(file_key):
storage.delete(file_key)
storage.save(file_key, json.dumps(url_data_list).encode("utf-8"))
except Exception as e:
raise Exception(f"Error saving crawl data: {e}")
return self._format_crawl_status_response("completed", crawl_status_response, url_data_list)
else:
return {
"status": crawl_status_response.get("status"),
"total": crawl_status_response.get("total"),
"current": crawl_status_response.get("current"),
"data": [],
}
return self._format_crawl_status_response(
crawl_status_response.get("status"), crawl_status_response, []
)
else:
self._handle_error(response, "check crawl status")
# FIXME: unreachable code for mypy
return {} # unreachable
def _prepare_headers(self):
def _format_crawl_status_response(
self, status: str, crawl_status_response: dict[str, Any], url_data_list: list[dict[str, Any]]
) -> dict[str, Any]:
return {
"status": status,
"total": crawl_status_response.get("total"),
"current": crawl_status_response.get("completed"),
"data": url_data_list,
}
def _extract_common_fields(self, item: dict[str, Any]) -> dict[str, Any]:
return {
"title": item.get("metadata", {}).get("title"),
"description": item.get("metadata", {}).get("description"),
"source_url": item.get("metadata", {}).get("sourceURL"),
"markdown": item.get("markdown"),
}
def _prepare_headers(self) -> dict[str, Any]:
return {"Content-Type": "application/json", "Authorization": f"Bearer {self.api_key}"}
def _post_request(self, url, data, headers, retries=3, backoff_factor=0.5):
def _post_request(self, url, data, headers, retries=3, backoff_factor=0.5) -> requests.Response:
for attempt in range(retries):
response = requests.post(url, headers=headers, json=data)
if response.status_code == 502:
@ -110,7 +115,7 @@ class FirecrawlApp:
return response
return response
def _get_request(self, url, headers, retries=3, backoff_factor=0.5):
def _get_request(self, url, headers, retries=3, backoff_factor=0.5) -> requests.Response:
for attempt in range(retries):
response = requests.get(url, headers=headers)
if response.status_code == 502:
@ -119,6 +124,6 @@ class FirecrawlApp:
return response
return response
def _handle_error(self, response, action):
def _handle_error(self, response, action) -> None:
error_message = response.json().get("error", "Unknown error occurred")
raise Exception(f"Failed to {action}. Status code: {response.status_code}. Error: {error_message}")

View File

@ -0,0 +1,26 @@
from typing import Any, Union
import requests
from core.tools.entities.tool_entities import ToolInvokeMessage
from core.tools.tool.builtin_tool import BuiltinTool
class GiteeAIToolRiskControl(BuiltinTool):
def _invoke(
self, user_id: str, tool_parameters: dict[str, Any]
) -> Union[ToolInvokeMessage, list[ToolInvokeMessage]]:
headers = {
"content-type": "application/json",
"authorization": f"Bearer {self.runtime.credentials['api_key']}",
}
inputs = [{"type": "text", "text": tool_parameters.get("input-text")}]
model = tool_parameters.get("model", "Security-semantic-filtering")
payload = {"model": model, "input": inputs}
url = "https://ai.gitee.com/v1/moderations"
response = requests.post(url, json=payload, headers=headers)
if response.status_code != 200:
return self.create_text_message(f"Got Error Response:{response.text}")
return [self.create_text_message(response.content.decode("utf-8"))]

View File

@ -0,0 +1,32 @@
identity:
name: risk control
author: gitee_ai
label:
en_US: risk control identification
zh_Hans: 风控识别
icon: icon.svg
description:
human:
en_US: Ensuring the protection and compliance of sensitive information through the filtering and analysis of data semantics
zh_Hans: 通过对数据语义的过滤和分析,确保敏感信息的保护和合规性
llm: This tool is used to risk control identification.
parameters:
- name: model
type: string
required: true
default: Security-semantic-filtering
label:
en_US: Service Model
zh_Hans: 服务模型
form: form
- name: input-text
type: string
required: true
label:
en_US: Input Text
zh_Hans: 输入文本
human_description:
en_US: The text input for filtering and analysis.
zh_Hans: 用于分析过滤的文本
llm_description: The text input for filtering and analysis.
form: llm

View File

@ -21,10 +21,12 @@ class FirecrawlAuth(ApiKeyAuthBase):
headers = self._prepare_headers()
options = {
"url": "https://example.com",
"crawlerOptions": {"excludes": [], "includes": [], "limit": 1},
"pageOptions": {"onlyMainContent": True},
"excludes": [],
"includes": [],
"limit": 1,
"scrapeOptions": {"onlyMainContent": True},
}
response = self._post_request(f"{self.base_url}/v0/crawl", options, headers)
response = self._post_request(f"{self.base_url}/v1/crawl", options, headers)
if response.status_code == 200:
return True
else:

View File

@ -38,30 +38,24 @@ class WebsiteService:
only_main_content = options.get("only_main_content", False)
if not crawl_sub_pages:
params = {
"crawlerOptions": {
"includes": [],
"excludes": [],
"generateImgAltText": True,
"limit": 1,
"returnOnlyUrls": False,
"pageOptions": {"onlyMainContent": only_main_content, "includeHtml": False},
}
"includes": [],
"excludes": [],
"generateImgAltText": True,
"limit": 1,
"scrapeOptions": {"onlyMainContent": only_main_content},
}
else:
includes = options.get("includes").split(",") if options.get("includes") else []
excludes = options.get("excludes").split(",") if options.get("excludes") else []
params = {
"crawlerOptions": {
"includes": includes,
"excludes": excludes,
"generateImgAltText": True,
"limit": options.get("limit", 1),
"returnOnlyUrls": False,
"pageOptions": {"onlyMainContent": only_main_content, "includeHtml": False},
}
"includes": includes,
"excludes": excludes,
"generateImgAltText": True,
"limit": options.get("limit", 1),
"scrapeOptions": {"onlyMainContent": only_main_content},
}
if options.get("max_depth"):
params["crawlerOptions"]["maxDepth"] = options.get("max_depth")
params["maxDepth"] = options.get("max_depth")
job_id = firecrawl_app.crawl_url(url, params)
website_crawl_time_cache_key = f"website_crawl_{job_id}"
time = str(datetime.datetime.now().timestamp())
@ -228,7 +222,7 @@ class WebsiteService:
# decrypt api_key
api_key = encrypter.decrypt_token(tenant_id=tenant_id, token=credentials.get("config").get("api_key"))
firecrawl_app = FirecrawlApp(api_key=api_key, base_url=credentials.get("config").get("base_url", None))
params = {"pageOptions": {"onlyMainContent": only_main_content, "includeHtml": False}}
params = {"onlyMainContent": only_main_content}
result = firecrawl_app.scrape_url(url, params)
return result
else:

View File

@ -10,19 +10,18 @@ def test_firecrawl_web_extractor_crawl_mode(mocker):
base_url = "https://api.firecrawl.dev"
firecrawl_app = FirecrawlApp(api_key=api_key, base_url=base_url)
params = {
"crawlerOptions": {
"includes": [],
"excludes": [],
"generateImgAltText": True,
"maxDepth": 1,
"limit": 1,
"returnOnlyUrls": False,
}
"includes": [],
"excludes": [],
"generateImgAltText": True,
"maxDepth": 1,
"limit": 1,
}
mocked_firecrawl = {
"jobId": "test",
"id": "test",
}
mocker.patch("requests.post", return_value=_mock_response(mocked_firecrawl))
job_id = firecrawl_app.crawl_url(url, params)
print(job_id)
print(f"job_id: {job_id}")
assert job_id is not None
assert isinstance(job_id, str)

View File

@ -2,7 +2,7 @@ version: '3'
services:
# API service
api:
image: langgenius/dify-api:0.15.1
image: langgenius/dify-api:0.15.2
restart: always
environment:
# Startup mode, 'api' starts the API server.
@ -227,7 +227,7 @@ services:
# worker service
# The Celery worker for processing the queue.
worker:
image: langgenius/dify-api:0.15.1
image: langgenius/dify-api:0.15.2
restart: always
environment:
CONSOLE_WEB_URL: ''
@ -397,7 +397,7 @@ services:
# Frontend web application.
web:
image: langgenius/dify-web:0.15.1
image: langgenius/dify-web:0.15.2
restart: always
environment:
# The base URL of console application api server, refers to the Console base URL of WEB service if console domain is

View File

@ -2,7 +2,7 @@ x-shared-env: &shared-api-worker-env
services:
# API service
api:
image: langgenius/dify-api:0.15.1
image: langgenius/dify-api:0.15.2
restart: always
environment:
# Use the shared environment variables.
@ -25,7 +25,7 @@ services:
# worker service
# The Celery worker for processing the queue.
worker:
image: langgenius/dify-api:0.15.1
image: langgenius/dify-api:0.15.2
restart: always
environment:
# Use the shared environment variables.
@ -47,7 +47,7 @@ services:
# Frontend web application.
web:
image: langgenius/dify-web:0.15.1
image: langgenius/dify-web:0.15.2
restart: always
environment:
CONSOLE_API_URL: ${CONSOLE_API_URL:-}

View File

@ -393,7 +393,7 @@ x-shared-env: &shared-api-worker-env
services:
# API service
api:
image: langgenius/dify-api:0.15.1
image: langgenius/dify-api:0.15.2
restart: always
environment:
# Use the shared environment variables.
@ -416,7 +416,7 @@ services:
# worker service
# The Celery worker for processing the queue.
worker:
image: langgenius/dify-api:0.15.1
image: langgenius/dify-api:0.15.2
restart: always
environment:
# Use the shared environment variables.
@ -438,7 +438,7 @@ services:
# Frontend web application.
web:
image: langgenius/dify-web:0.15.1
image: langgenius/dify-web:0.15.2
restart: always
environment:
CONSOLE_API_URL: ${CONSOLE_API_URL:-}

View File

@ -10,6 +10,7 @@ import {
import { Tab } from '@headlessui/react'
import { Tag } from './tag'
import classNames from '@/utils/classnames'
import { writeTextToClipboard } from '@/utils/clipboard'
const languageNames = {
js: 'JavaScript',
@ -71,7 +72,7 @@ function CopyButton({ code }: { code: string }) {
: 'bg-white/5 hover:bg-white/7.5 dark:bg-white/2.5 dark:hover:bg-white/5',
)}
onClick={() => {
window.navigator.clipboard.writeText(code).then(() => {
writeTextToClipboard(code).then(() => {
setCopyCount(count => count + 1)
})
}}

View File

@ -46,7 +46,7 @@ const ProviderList = () => {
if (tagFilterValue.length > 0 && (!collection.labels || collection.labels.every(label => !tagFilterValue.includes(label))))
return false
if (keywords)
return collection.name.toLowerCase().includes(keywords.toLowerCase())
return Object.values(collection.label).some(value => value.toLowerCase().includes(keywords.toLowerCase()))
return true
})
}, [activeTab, tagFilterValue, keywords, collectionList])

View File

@ -546,7 +546,9 @@ export const getVarType = ({
else {
(valueSelector as ValueSelector).slice(1).forEach((key, i) => {
const isLast = i === valueSelector.length - 2
curr = curr?.find((v: any) => v.variable === key)
if (Array.isArray(curr))
curr = curr?.find((v: any) => v.variable === key)
if (isLast) {
type = curr?.type
}

View File

@ -1,6 +1,6 @@
{
"name": "dify-web",
"version": "0.15.1",
"version": "0.15.2",
"private": true,
"engines": {
"node": ">=18.17.0"

35
web/utils/clipboard.ts Normal file
View File

@ -0,0 +1,35 @@
export async function writeTextToClipboard(text: string): Promise<void> {
if (navigator.clipboard && navigator.clipboard.writeText)
return navigator.clipboard.writeText(text)
return fallbackCopyTextToClipboard(text)
}
async function fallbackCopyTextToClipboard(text: string): Promise<void> {
const textArea = document.createElement('textarea')
textArea.value = text
textArea.style.position = 'fixed' // Avoid scrolling to bottom
document.body.appendChild(textArea)
textArea.focus()
textArea.select()
try {
const successful = document.execCommand('copy')
if (successful)
return Promise.resolve()
return Promise.reject(new Error('document.execCommand failed'))
}
catch (err) {
return Promise.reject(convertAnyToError(err))
}
finally {
document.body.removeChild(textArea)
}
}
function convertAnyToError(err: any): Error {
if (err instanceof Error)
return err
return new Error(`Caught: ${String(err)}`)
}