diff --git a/api/.ruff.toml b/api/.ruff.toml index f30275a943..89a2da35d6 100644 --- a/api/.ruff.toml +++ b/api/.ruff.toml @@ -85,11 +85,11 @@ ignore = [ ] "tests/*" = [ "F811", # redefined-while-unused - "F401", # unused-import ] [lint.pyflakes] -extend-generics = [ +allowed-unused-imports = [ "_pytest.monkeypatch", "tests.integration_tests", + "tests.unit_tests", ] diff --git a/api/Dockerfile b/api/Dockerfile index b5b8f69829..df676f1926 100644 --- a/api/Dockerfile +++ b/api/Dockerfile @@ -55,7 +55,7 @@ RUN apt-get update \ && echo "deb http://deb.debian.org/debian testing main" > /etc/apt/sources.list \ && apt-get update \ # For Security - && apt-get install -y --no-install-recommends expat=2.6.4-1 libldap-2.5-0=2.5.18+dfsg-3+b1 perl=5.40.0-8 libsqlite3-0=3.46.1-1 zlib1g=1:1.3.dfsg+really1.3.1-1+b1 \ + && apt-get install -y --no-install-recommends expat=2.6.4-1 libldap-2.5-0=2.5.19+dfsg-1 perl=5.40.0-8 libsqlite3-0=3.46.1-1 zlib1g=1:1.3.dfsg+really1.3.1-1+b1 \ # install a chinese font to support the use of tools like matplotlib && apt-get install -y fonts-noto-cjk \ && apt-get autoremove -y \ diff --git a/api/app.py b/api/app.py index c6a0829080..e6649e59df 100644 --- a/api/app.py +++ b/api/app.py @@ -1,12 +1,8 @@ -from libs import version_utils - -# preparation before creating app -version_utils.check_supported_python_version() +import os +import sys def is_db_command(): - import sys - if len(sys.argv) > 1 and sys.argv[0].endswith("flask") and sys.argv[1] == "db": return True return False @@ -18,10 +14,18 @@ if is_db_command(): app = create_migrations_app() else: - from app_factory import create_app - from libs import threadings_utils + if os.environ.get("FLASK_DEBUG", "False") != "True": + from gevent import monkey # type: ignore - threadings_utils.apply_gevent_threading_patch() + # gevent + monkey.patch_all() + + from grpc.experimental import gevent as grpc_gevent # type: ignore + + # grpc gevent + grpc_gevent.init_gevent() + + from app_factory import create_app app = create_app() celery = app.extensions["celery"] diff --git a/api/configs/feature/__init__.py b/api/configs/feature/__init__.py index f10252e455..e11993ddc7 100644 --- a/api/configs/feature/__init__.py +++ b/api/configs/feature/__init__.py @@ -765,6 +765,13 @@ class LoginConfig(BaseSettings): ) +class AccountConfig(BaseSettings): + ACCOUNT_DELETION_TOKEN_EXPIRY_MINUTES: PositiveInt = Field( + description="Duration in minutes for which a account deletion token remains valid", + default=5, + ) + + class FeatureConfig( # place the configs in alphabet order AppExecutionConfig, @@ -792,6 +799,7 @@ class FeatureConfig( WorkflowNodeExecutionConfig, WorkspaceConfig, LoginConfig, + AccountConfig, # hosted services config HostedServiceConfig, CeleryBeatConfig, diff --git a/api/controllers/console/app/workflow.py b/api/controllers/console/app/workflow.py index 26a3a022d4..6942ac6fbe 100644 --- a/api/controllers/console/app/workflow.py +++ b/api/controllers/console/app/workflow.py @@ -2,7 +2,7 @@ import json import logging from flask import abort, request -from flask_restful import Resource, marshal_with, reqparse # type: ignore +from flask_restful import Resource, inputs, marshal_with, reqparse # type: ignore from werkzeug.exceptions import Forbidden, InternalServerError, NotFound import services @@ -14,7 +14,7 @@ from controllers.console.wraps import account_initialization_required, setup_req from core.app.apps.base_app_queue_manager import AppQueueManager from core.app.entities.app_invoke_entities import InvokeFrom from factories import variable_factory -from fields.workflow_fields import workflow_fields +from fields.workflow_fields import workflow_fields, workflow_pagination_fields from fields.workflow_run_fields import workflow_run_node_execution_fields from libs import helper from libs.helper import TimestampField, uuid_value @@ -440,6 +440,31 @@ class WorkflowConfigApi(Resource): } +class PublishedAllWorkflowApi(Resource): + @setup_required + @login_required + @account_initialization_required + @get_app_model(mode=[AppMode.ADVANCED_CHAT, AppMode.WORKFLOW]) + @marshal_with(workflow_pagination_fields) + def get(self, app_model: App): + """ + Get published workflows + """ + if not current_user.is_editor: + raise Forbidden() + + parser = reqparse.RequestParser() + parser.add_argument("page", type=inputs.int_range(1, 99999), required=False, default=1, location="args") + parser.add_argument("limit", type=inputs.int_range(1, 100), required=False, default=20, location="args") + args = parser.parse_args() + page = args.get("page") + limit = args.get("limit") + workflow_service = WorkflowService() + workflows, has_more = workflow_service.get_all_published_workflow(app_model=app_model, page=page, limit=limit) + + return {"items": workflows, "page": page, "limit": limit, "has_more": has_more} + + api.add_resource(DraftWorkflowApi, "/apps//workflows/draft") api.add_resource(WorkflowConfigApi, "/apps//workflows/draft/config") api.add_resource(AdvancedChatDraftWorkflowRunApi, "/apps//advanced-chat/workflows/draft/run") @@ -454,6 +479,7 @@ api.add_resource( WorkflowDraftRunIterationNodeApi, "/apps//workflows/draft/iteration/nodes//run" ) api.add_resource(PublishedWorkflowApi, "/apps//workflows/publish") +api.add_resource(PublishedAllWorkflowApi, "/apps//workflows") api.add_resource(DefaultBlockConfigsApi, "/apps//workflows/default-workflow-block-configs") api.add_resource( DefaultBlockConfigApi, "/apps//workflows/default-workflow-block-configs/" diff --git a/api/controllers/console/auth/error.py b/api/controllers/console/auth/error.py index e6e30c3c0b..8ef10c7bbb 100644 --- a/api/controllers/console/auth/error.py +++ b/api/controllers/console/auth/error.py @@ -53,3 +53,9 @@ class EmailCodeLoginRateLimitExceededError(BaseHTTPException): error_code = "email_code_login_rate_limit_exceeded" description = "Too many login emails have been sent. Please try again in 5 minutes." code = 429 + + +class EmailCodeAccountDeletionRateLimitExceededError(BaseHTTPException): + error_code = "email_code_account_deletion_rate_limit_exceeded" + description = "Too many account deletion emails have been sent. Please try again in 5 minutes." + code = 429 diff --git a/api/controllers/console/auth/forgot_password.py b/api/controllers/console/auth/forgot_password.py index 140b9e145f..a9c4300b9a 100644 --- a/api/controllers/console/auth/forgot_password.py +++ b/api/controllers/console/auth/forgot_password.py @@ -6,13 +6,8 @@ from flask_restful import Resource, reqparse # type: ignore from constants.languages import languages from controllers.console import api -from controllers.console.auth.error import ( - EmailCodeError, - InvalidEmailError, - InvalidTokenError, - PasswordMismatchError, -) -from controllers.console.error import AccountNotFound, EmailSendIpLimitError +from controllers.console.auth.error import EmailCodeError, InvalidEmailError, InvalidTokenError, PasswordMismatchError +from controllers.console.error import AccountInFreezeError, AccountNotFound, EmailSendIpLimitError from controllers.console.wraps import setup_required from events.tenant_event import tenant_was_created from extensions.ext_database import db @@ -20,6 +15,7 @@ from libs.helper import email, extract_remote_ip from libs.password import hash_password, valid_password from models.account import Account from services.account_service import AccountService, TenantService +from services.errors.account import AccountRegisterError from services.errors.workspace import WorkSpaceNotAllowedCreateError from services.feature_service import FeatureService @@ -129,6 +125,8 @@ class ForgotPasswordResetApi(Resource): ) except WorkSpaceNotAllowedCreateError: pass + except AccountRegisterError as are: + raise AccountInFreezeError() return {"result": "success"} diff --git a/api/controllers/console/auth/login.py b/api/controllers/console/auth/login.py index 78a80fc8d7..41362e9fa2 100644 --- a/api/controllers/console/auth/login.py +++ b/api/controllers/console/auth/login.py @@ -5,6 +5,7 @@ from flask import request from flask_restful import Resource, reqparse # type: ignore import services +from configs import dify_config from constants.languages import languages from controllers.console import api from controllers.console.auth.error import ( @@ -16,6 +17,7 @@ from controllers.console.auth.error import ( ) from controllers.console.error import ( AccountBannedError, + AccountInFreezeError, AccountNotFound, EmailSendIpLimitError, NotAllowedCreateWorkspace, @@ -26,6 +28,8 @@ from libs.helper import email, extract_remote_ip from libs.password import valid_password from models.account import Account from services.account_service import AccountService, RegisterService, TenantService +from services.billing_service import BillingService +from services.errors.account import AccountRegisterError from services.errors.workspace import WorkSpaceNotAllowedCreateError from services.feature_service import FeatureService @@ -44,6 +48,9 @@ class LoginApi(Resource): parser.add_argument("language", type=str, required=False, default="en-US", location="json") args = parser.parse_args() + if dify_config.BILLING_ENABLED and BillingService.is_email_in_freeze(args["email"]): + raise AccountInFreezeError() + is_login_error_rate_limit = AccountService.is_login_error_rate_limit(args["email"]) if is_login_error_rate_limit: raise EmailPasswordLoginLimitError() @@ -113,8 +120,10 @@ class ResetPasswordSendEmailApi(Resource): language = "zh-Hans" else: language = "en-US" - - account = AccountService.get_user_through_email(args["email"]) + try: + account = AccountService.get_user_through_email(args["email"]) + except AccountRegisterError as are: + raise AccountInFreezeError() if account is None: if FeatureService.get_system_features().is_allow_register: token = AccountService.send_reset_password_email(email=args["email"], language=language) @@ -142,8 +151,11 @@ class EmailCodeLoginSendEmailApi(Resource): language = "zh-Hans" else: language = "en-US" + try: + account = AccountService.get_user_through_email(args["email"]) + except AccountRegisterError as are: + raise AccountInFreezeError() - account = AccountService.get_user_through_email(args["email"]) if account is None: if FeatureService.get_system_features().is_allow_register: token = AccountService.send_email_code_login_email(email=args["email"], language=language) @@ -177,7 +189,10 @@ class EmailCodeLoginApi(Resource): raise EmailCodeError() AccountService.revoke_email_code_login_token(args["token"]) - account = AccountService.get_user_through_email(user_email) + try: + account = AccountService.get_user_through_email(user_email) + except AccountRegisterError as are: + raise AccountInFreezeError() if account: tenant = TenantService.get_join_tenants(account) if not tenant: @@ -196,6 +211,8 @@ class EmailCodeLoginApi(Resource): ) except WorkSpaceNotAllowedCreateError: return NotAllowedCreateWorkspace() + except AccountRegisterError as are: + raise AccountInFreezeError() token_pair = AccountService.login(account, ip_address=extract_remote_ip(request)) AccountService.reset_login_error_rate_limit(args["email"]) return {"result": "success", "data": token_pair.model_dump()} diff --git a/api/controllers/console/auth/oauth.py b/api/controllers/console/auth/oauth.py index 333b241427..2a08362c6d 100644 --- a/api/controllers/console/auth/oauth.py +++ b/api/controllers/console/auth/oauth.py @@ -16,7 +16,7 @@ from libs.oauth import GitHubOAuth, GoogleOAuth, OAuthUserInfo from models import Account from models.account import AccountStatus from services.account_service import AccountService, RegisterService, TenantService -from services.errors.account import AccountNotFoundError +from services.errors.account import AccountNotFoundError, AccountRegisterError from services.errors.workspace import WorkSpaceNotAllowedCreateError, WorkSpaceNotFoundError from services.feature_service import FeatureService @@ -99,6 +99,8 @@ class OAuthCallback(Resource): f"{dify_config.CONSOLE_WEB_URL}/signin" "?message=Workspace not found, please contact system admin to invite you to join in a workspace." ) + except AccountRegisterError as e: + return redirect(f"{dify_config.CONSOLE_WEB_URL}/signin?message={e.description}") # Check account status if account.status == AccountStatus.BANNED.value: diff --git a/api/controllers/console/error.py b/api/controllers/console/error.py index 1b4e6deae6..ee87138a44 100644 --- a/api/controllers/console/error.py +++ b/api/controllers/console/error.py @@ -92,3 +92,12 @@ class UnauthorizedAndForceLogout(BaseHTTPException): error_code = "unauthorized_and_force_logout" description = "Unauthorized and force logout." code = 401 + + +class AccountInFreezeError(BaseHTTPException): + error_code = "account_in_freeze" + code = 400 + description = ( + "This email account has been deleted within the past 30 days" + "and is temporarily unavailable for new account registration." + ) diff --git a/api/controllers/console/explore/message.py b/api/controllers/console/explore/message.py index 690297048e..405d5ed607 100644 --- a/api/controllers/console/explore/message.py +++ b/api/controllers/console/explore/message.py @@ -66,10 +66,17 @@ class MessageFeedbackApi(InstalledAppResource): parser = reqparse.RequestParser() parser.add_argument("rating", type=str, choices=["like", "dislike", None], location="json") + parser.add_argument("content", type=str, location="json") args = parser.parse_args() try: - MessageService.create_feedback(app_model, message_id, current_user, args.get("rating"), args.get("content")) + MessageService.create_feedback( + app_model=app_model, + message_id=message_id, + user=current_user, + rating=args.get("rating"), + content=args.get("content"), + ) except services.errors.message.MessageNotExistsError: raise NotFound("Message Not Exists.") diff --git a/api/controllers/console/workspace/account.py b/api/controllers/console/workspace/account.py index 96ed4b7a57..f1ec0f3d29 100644 --- a/api/controllers/console/workspace/account.py +++ b/api/controllers/console/workspace/account.py @@ -11,6 +11,7 @@ from controllers.console import api from controllers.console.workspace.error import ( AccountAlreadyInitedError, CurrentPasswordIncorrectError, + InvalidAccountDeletionCodeError, InvalidInvitationCodeError, RepeatPasswordNotMatchError, ) @@ -21,6 +22,7 @@ from libs.helper import TimestampField, timezone from libs.login import login_required from models import AccountIntegrate, InvitationCode from services.account_service import AccountService +from services.billing_service import BillingService from services.errors.account import CurrentPasswordIncorrectError as ServiceCurrentPasswordIncorrectError @@ -242,6 +244,54 @@ class AccountIntegrateApi(Resource): return {"data": integrate_data} +class AccountDeleteVerifyApi(Resource): + @setup_required + @login_required + @account_initialization_required + def get(self): + account = current_user + + token, code = AccountService.generate_account_deletion_verification_code(account) + AccountService.send_account_deletion_verification_email(account, code) + + return {"result": "success", "data": token} + + +class AccountDeleteApi(Resource): + @setup_required + @login_required + @account_initialization_required + def post(self): + account = current_user + + parser = reqparse.RequestParser() + parser.add_argument("token", type=str, required=True, location="json") + parser.add_argument("code", type=str, required=True, location="json") + args = parser.parse_args() + + if not AccountService.verify_account_deletion_code(args["token"], args["code"]): + raise InvalidAccountDeletionCodeError() + + AccountService.delete_account(account) + + return {"result": "success"} + + +class AccountDeleteUpdateFeedbackApi(Resource): + @setup_required + def post(self): + account = current_user + + parser = reqparse.RequestParser() + parser.add_argument("email", type=str, required=True, location="json") + parser.add_argument("feedback", type=str, required=True, location="json") + args = parser.parse_args() + + BillingService.update_account_deletion_feedback(args["email"], args["feedback"]) + + return {"result": "success"} + + # Register API resources api.add_resource(AccountInitApi, "/account/init") api.add_resource(AccountProfileApi, "/account/profile") @@ -252,5 +302,8 @@ api.add_resource(AccountInterfaceThemeApi, "/account/interface-theme") api.add_resource(AccountTimezoneApi, "/account/timezone") api.add_resource(AccountPasswordApi, "/account/password") api.add_resource(AccountIntegrateApi, "/account/integrates") +api.add_resource(AccountDeleteVerifyApi, "/account/delete/verify") +api.add_resource(AccountDeleteApi, "/account/delete") +api.add_resource(AccountDeleteUpdateFeedbackApi, "/account/delete/feedback") # api.add_resource(AccountEmailApi, '/account/email') # api.add_resource(AccountEmailVerifyApi, '/account/email-verify') diff --git a/api/controllers/console/workspace/error.py b/api/controllers/console/workspace/error.py index 9e13c7b924..8b70ca62b9 100644 --- a/api/controllers/console/workspace/error.py +++ b/api/controllers/console/workspace/error.py @@ -35,3 +35,9 @@ class AccountNotInitializedError(BaseHTTPException): error_code = "account_not_initialized" description = "The account has not been initialized yet. Please proceed with the initialization process first." code = 400 + + +class InvalidAccountDeletionCodeError(BaseHTTPException): + error_code = "invalid_account_deletion_code" + description = "Invalid account deletion code." + code = 400 diff --git a/api/controllers/console/workspace/members.py b/api/controllers/console/workspace/members.py index 1afb41ea87..a2b41c1d38 100644 --- a/api/controllers/console/workspace/members.py +++ b/api/controllers/console/workspace/members.py @@ -122,7 +122,7 @@ class MemberUpdateRoleApi(Resource): return {"code": "invalid-role", "message": "Invalid role"}, 400 member = db.session.get(Account, str(member_id)) - if member: + if not member: abort(404) try: diff --git a/api/controllers/service_api/app/message.py b/api/controllers/service_api/app/message.py index bed89a99a5..773ea0e0c6 100644 --- a/api/controllers/service_api/app/message.py +++ b/api/controllers/service_api/app/message.py @@ -108,7 +108,13 @@ class MessageFeedbackApi(Resource): args = parser.parse_args() try: - MessageService.create_feedback(app_model, message_id, end_user, args.get("rating"), args.get("content")) + MessageService.create_feedback( + app_model=app_model, + message_id=message_id, + user=end_user, + rating=args.get("rating"), + content=args.get("content"), + ) except services.errors.message.MessageNotExistsError: raise NotFound("Message Not Exists.") diff --git a/api/controllers/service_api/dataset/document.py b/api/controllers/service_api/dataset/document.py index 84c58c62df..ea664b8f1b 100644 --- a/api/controllers/service_api/dataset/document.py +++ b/api/controllers/service_api/dataset/document.py @@ -8,12 +8,16 @@ from werkzeug.exceptions import NotFound import services.dataset_service from controllers.common.errors import FilenameNotExistsError from controllers.service_api import api -from controllers.service_api.app.error import ProviderNotInitializeError +from controllers.service_api.app.error import ( + FileTooLargeError, + NoFileUploadedError, + ProviderNotInitializeError, + TooManyFilesError, + UnsupportedFileTypeError, +) from controllers.service_api.dataset.error import ( ArchivedDocumentImmutableError, DocumentIndexingError, - NoFileUploadedError, - TooManyFilesError, ) from controllers.service_api.wraps import DatasetApiResource, cloud_edition_billing_resource_check from core.errors.error import ProviderTokenNotInitError @@ -238,13 +242,18 @@ class DocumentUpdateByFileApi(DatasetApiResource): if not file.filename: raise FilenameNotExistsError - upload_file = FileService.upload_file( - filename=file.filename, - content=file.read(), - mimetype=file.mimetype, - user=current_user, - source="datasets", - ) + try: + upload_file = FileService.upload_file( + filename=file.filename, + content=file.read(), + mimetype=file.mimetype, + user=current_user, + source="datasets", + ) + except services.errors.file.FileTooLargeError as file_too_large_error: + raise FileTooLargeError(file_too_large_error.description) + except services.errors.file.UnsupportedFileTypeError: + raise UnsupportedFileTypeError() data_source = {"type": "upload_file", "info_list": {"file_info_list": {"file_ids": [upload_file.id]}}} args["data_source"] = data_source # validate args diff --git a/api/core/agent/base_agent_runner.py b/api/core/agent/base_agent_runner.py index 8d69bdcec2..ae086ba8ed 100644 --- a/api/core/agent/base_agent_runner.py +++ b/api/core/agent/base_agent_runner.py @@ -339,13 +339,13 @@ class BaseAgentRunner(AppRunner): raise ValueError(f"Agent thought {agent_thought.id} not found") agent_thought = queried_thought - if thought is not None: + if thought: agent_thought.thought = thought - if tool_name is not None: + if tool_name: agent_thought.tool = tool_name - if tool_input is not None: + if tool_input: if isinstance(tool_input, dict): try: tool_input = json.dumps(tool_input, ensure_ascii=False) @@ -354,7 +354,7 @@ class BaseAgentRunner(AppRunner): agent_thought.tool_input = tool_input - if observation is not None: + if observation: if isinstance(observation, dict): try: observation = json.dumps(observation, ensure_ascii=False) @@ -363,7 +363,7 @@ class BaseAgentRunner(AppRunner): agent_thought.observation = observation - if answer is not None: + if answer: agent_thought.answer = answer if messages_ids is not None and len(messages_ids) > 0: diff --git a/api/core/app/task_pipeline/workflow_cycle_manage.py b/api/core/app/task_pipeline/workflow_cycle_manage.py index 7215105a56..e21475271a 100644 --- a/api/core/app/task_pipeline/workflow_cycle_manage.py +++ b/api/core/app/task_pipeline/workflow_cycle_manage.py @@ -274,7 +274,7 @@ class WorkflowCycleManage: self, *, session: Session, workflow_run: WorkflowRun, event: QueueNodeStartedEvent ) -> WorkflowNodeExecution: workflow_node_execution = WorkflowNodeExecution() - workflow_node_execution.id = event.node_execution_id + workflow_node_execution.id = str(uuid4()) workflow_node_execution.tenant_id = workflow_run.tenant_id workflow_node_execution.app_id = workflow_run.app_id workflow_node_execution.workflow_id = workflow_run.workflow_id @@ -391,7 +391,7 @@ class WorkflowCycleManage: execution_metadata = json.dumps(merged_metadata) workflow_node_execution = WorkflowNodeExecution() - workflow_node_execution.id = event.node_execution_id + workflow_node_execution.id = str(uuid4()) workflow_node_execution.tenant_id = workflow_run.tenant_id workflow_node_execution.app_id = workflow_run.app_id workflow_node_execution.workflow_id = workflow_run.workflow_id @@ -824,7 +824,7 @@ class WorkflowCycleManage: return workflow_run def _get_workflow_node_execution(self, session: Session, node_execution_id: str) -> WorkflowNodeExecution: - stmt = select(WorkflowNodeExecution).where(WorkflowNodeExecution.id == node_execution_id) + stmt = select(WorkflowNodeExecution).where(WorkflowNodeExecution.node_execution_id == node_execution_id) workflow_node_execution = session.scalar(stmt) if not workflow_node_execution: raise WorkflowNodeExecutionNotFoundError(node_execution_id) diff --git a/api/core/model_runtime/model_providers/wenxin/_common.py b/api/core/model_runtime/model_providers/wenxin/_common.py index c77a499982..1247a11fe8 100644 --- a/api/core/model_runtime/model_providers/wenxin/_common.py +++ b/api/core/model_runtime/model_providers/wenxin/_common.py @@ -122,6 +122,7 @@ class _CommonWenxin: "bge-large-zh": "https://aip.baidubce.com/rpc/2.0/ai_custom/v1/wenxinworkshop/embeddings/bge_large_zh", "tao-8k": "https://aip.baidubce.com/rpc/2.0/ai_custom/v1/wenxinworkshop/embeddings/tao_8k", "bce-reranker-base_v1": "https://aip.baidubce.com/rpc/2.0/ai_custom/v1/wenxinworkshop/reranker/bce_reranker_base", + "ernie-lite-pro-128k": "https://aip.baidubce.com/rpc/2.0/ai_custom/v1/wenxinworkshop/chat/ernie-lite-pro-128k", } function_calling_supports = [ diff --git a/api/core/model_runtime/model_providers/wenxin/llm/ernie-lite-pro-128k.yaml b/api/core/model_runtime/model_providers/wenxin/llm/ernie-lite-pro-128k.yaml new file mode 100644 index 0000000000..4f5832c859 --- /dev/null +++ b/api/core/model_runtime/model_providers/wenxin/llm/ernie-lite-pro-128k.yaml @@ -0,0 +1,42 @@ +model: ernie-lite-pro-128k +label: + en_US: Ernie-Lite-Pro-128K +model_type: llm +features: + - agent-thought +model_properties: + mode: chat + context_size: 128000 +parameter_rules: + - name: temperature + use_template: temperature + min: 0.1 + max: 1.0 + default: 0.8 + - name: top_p + use_template: top_p + - name: min_output_tokens + label: + en_US: "Min Output Tokens" + zh_Hans: "最小输出Token数" + use_template: max_tokens + min: 2 + max: 2048 + help: + zh_Hans: 指定模型最小输出token数 + en_US: Specifies the lower limit on the length of generated results. + - name: max_output_tokens + label: + en_US: "Max Output Tokens" + zh_Hans: "最大输出Token数" + use_template: max_tokens + min: 2 + max: 2048 + default: 2048 + help: + zh_Hans: 指定模型最大输出token数 + en_US: Specifies the upper limit on the length of generated results. If the generated results are truncated, you can increase this parameter. + - name: presence_penalty + use_template: presence_penalty + - name: frequency_penalty + use_template: frequency_penalty diff --git a/api/core/rag/datasource/keyword/jieba/jieba_keyword_table_handler.py b/api/core/rag/datasource/keyword/jieba/jieba_keyword_table_handler.py index 8b17e8dc0a..a6214d955b 100644 --- a/api/core/rag/datasource/keyword/jieba/jieba_keyword_table_handler.py +++ b/api/core/rag/datasource/keyword/jieba/jieba_keyword_table_handler.py @@ -1,5 +1,5 @@ import re -from typing import Optional +from typing import Optional, cast class JiebaKeywordTableHandler: @@ -8,18 +8,20 @@ class JiebaKeywordTableHandler: from core.rag.datasource.keyword.jieba.stopwords import STOPWORDS - jieba.analyse.default_tfidf.stop_words = STOPWORDS + jieba.analyse.default_tfidf.stop_words = STOPWORDS # type: ignore def extract_keywords(self, text: str, max_keywords_per_chunk: Optional[int] = 10) -> set[str]: """Extract keywords with JIEBA tfidf.""" - import jieba # type: ignore + import jieba.analyse # type: ignore keywords = jieba.analyse.extract_tags( sentence=text, topK=max_keywords_per_chunk, ) + # jieba.analyse.extract_tags returns list[Any] when withFlag is False by default. + keywords = cast(list[str], keywords) - return set(self._expand_tokens_with_subtokens(keywords)) + return set(self._expand_tokens_with_subtokens(set(keywords))) def _expand_tokens_with_subtokens(self, tokens: set[str]) -> set[str]: """Get subtokens from a list of tokens., filtering for stopwords.""" diff --git a/api/core/rag/extractor/notion_extractor.py b/api/core/rag/extractor/notion_extractor.py index fdc2e46d14..41355d3fac 100644 --- a/api/core/rag/extractor/notion_extractor.py +++ b/api/core/rag/extractor/notion_extractor.py @@ -138,17 +138,24 @@ class NotionExtractor(BaseExtractor): block_url = BLOCK_CHILD_URL_TMPL.format(block_id=page_id) while True: query_dict: dict[str, Any] = {} if not start_cursor else {"start_cursor": start_cursor} - res = requests.request( - "GET", - block_url, - headers={ - "Authorization": "Bearer " + self._notion_access_token, - "Content-Type": "application/json", - "Notion-Version": "2022-06-28", - }, - params=query_dict, - ) - data = res.json() + try: + res = requests.request( + "GET", + block_url, + headers={ + "Authorization": "Bearer " + self._notion_access_token, + "Content-Type": "application/json", + "Notion-Version": "2022-06-28", + }, + params=query_dict, + ) + if res.status_code != 200: + raise ValueError(f"Error fetching Notion block data: {res.text}") + data = res.json() + except requests.RequestException as e: + raise ValueError("Error fetching Notion block data") from e + if "results" not in data or not isinstance(data["results"], list): + raise ValueError("Error fetching Notion block data") for result in data["results"]: result_type = result["type"] result_obj = result[result_type] diff --git a/api/core/tools/errors.py b/api/core/tools/errors.py index 6febf137b0..c5f9ca4774 100644 --- a/api/core/tools/errors.py +++ b/api/core/tools/errors.py @@ -31,3 +31,7 @@ class ToolApiSchemaError(ValueError): class ToolEngineInvokeError(Exception): meta: ToolInvokeMeta + + def __init__(self, meta, **kwargs): + self.meta = meta + super().__init__(**kwargs) diff --git a/api/core/tools/provider/builtin/aws/tools/bedrock_retrieve.py b/api/core/tools/provider/builtin/aws/tools/bedrock_retrieve.py index 050b468b74..aca369b438 100644 --- a/api/core/tools/provider/builtin/aws/tools/bedrock_retrieve.py +++ b/api/core/tools/provider/builtin/aws/tools/bedrock_retrieve.py @@ -21,7 +21,7 @@ class BedrockRetrieveTool(BuiltinTool): retrieval_configuration = {"vectorSearchConfiguration": {"numberOfResults": num_results}} - # 如果有元数据过滤条件,则添加到检索配置中 + # Add metadata filter to retrieval configuration if present if metadata_filter: retrieval_configuration["vectorSearchConfiguration"]["filter"] = metadata_filter @@ -77,7 +77,7 @@ class BedrockRetrieveTool(BuiltinTool): if not query: return self.create_text_message("Please input query") - # 获取元数据过滤条件(如果存在) + # Get metadata filter conditions (if they exist) metadata_filter_str = tool_parameters.get("metadata_filter") metadata_filter = json.loads(metadata_filter_str) if metadata_filter_str else None @@ -86,7 +86,7 @@ class BedrockRetrieveTool(BuiltinTool): query_input=query, knowledge_base_id=self.knowledge_base_id, num_results=self.topk, - metadata_filter=metadata_filter, # 将元数据过滤条件传递给检索方法 + metadata_filter=metadata_filter, ) line = 5 @@ -109,7 +109,7 @@ class BedrockRetrieveTool(BuiltinTool): if not parameters.get("query"): raise ValueError("query is required") - # 可选:可以验证元数据过滤条件是否为有效的 JSON 字符串(如果提供) + # Optional: Validate if metadata filter is a valid JSON string (if provided) metadata_filter_str = parameters.get("metadata_filter") if metadata_filter_str and not isinstance(json.loads(metadata_filter_str), dict): raise ValueError("metadata_filter must be a valid JSON object") diff --git a/api/core/tools/provider/builtin/aws/tools/bedrock_retrieve.yaml b/api/core/tools/provider/builtin/aws/tools/bedrock_retrieve.yaml index 9e51d52def..31961a0cf0 100644 --- a/api/core/tools/provider/builtin/aws/tools/bedrock_retrieve.yaml +++ b/api/core/tools/provider/builtin/aws/tools/bedrock_retrieve.yaml @@ -73,9 +73,9 @@ parameters: llm_description: AWS region where the Bedrock Knowledge Base is located form: form - - name: metadata_filter - type: string - required: false + - name: metadata_filter # Additional parameter for metadata filtering + type: string # String type, expects JSON-formatted filter conditions + required: false # Optional field - can be omitted label: en_US: Metadata Filter zh_Hans: 元数据过滤器 diff --git a/api/core/tools/provider/builtin/aws/tools/sagemaker_chinese_toxicity_detector.py b/api/core/tools/provider/builtin/aws/tools/sagemaker_chinese_toxicity_detector.py index e05e2d9bf7..3d88f28dbd 100644 --- a/api/core/tools/provider/builtin/aws/tools/sagemaker_chinese_toxicity_detector.py +++ b/api/core/tools/provider/builtin/aws/tools/sagemaker_chinese_toxicity_detector.py @@ -6,8 +6,8 @@ import boto3 from core.tools.entities.tool_entities import ToolInvokeMessage from core.tools.tool.builtin_tool import BuiltinTool -# 定义标签映射 -LABEL_MAPPING = {"LABEL_0": "SAFE", "LABEL_1": "NO_SAFE"} +# Define label mappings +LABEL_MAPPING = {0: "SAFE", 1: "NO_SAFE"} class ContentModerationTool(BuiltinTool): @@ -28,12 +28,12 @@ class ContentModerationTool(BuiltinTool): # Handle nested JSON if present if isinstance(json_obj, dict) and "body" in json_obj: body_content = json.loads(json_obj["body"]) - raw_label = body_content.get("label") + prediction_result = body_content.get("prediction") else: - raw_label = json_obj.get("label") + prediction_result = json_obj.get("prediction") - # 映射标签并返回 - result = LABEL_MAPPING.get(raw_label, "NO_SAFE") # 如果映射中没有找到,默认返回NO_SAFE + # Map labels and return + result = LABEL_MAPPING.get(prediction_result, "NO_SAFE") # If not found in mapping, default to NO_SAFE return result def _invoke( diff --git a/api/core/tools/provider/builtin/aws/tools/sagemaker_text_rerank.py b/api/core/tools/provider/builtin/aws/tools/sagemaker_text_rerank.py index 715b1ddedd..8320bd84ef 100644 --- a/api/core/tools/provider/builtin/aws/tools/sagemaker_text_rerank.py +++ b/api/core/tools/provider/builtin/aws/tools/sagemaker_text_rerank.py @@ -10,8 +10,7 @@ from core.tools.tool.builtin_tool import BuiltinTool class SageMakerReRankTool(BuiltinTool): sagemaker_client: Any = None - sagemaker_endpoint: str | None = None - topk: int | None = None + sagemaker_endpoint: str = None def _sagemaker_rerank(self, query_input: str, docs: list[str], rerank_endpoint: str): inputs = [query_input] * len(docs) @@ -47,8 +46,7 @@ class SageMakerReRankTool(BuiltinTool): self.sagemaker_endpoint = tool_parameters.get("sagemaker_endpoint") line = 2 - if not self.topk: - self.topk = tool_parameters.get("topk", 5) + topk = tool_parameters.get("topk", 5) line = 3 query = tool_parameters.get("query", "") @@ -75,7 +73,7 @@ class SageMakerReRankTool(BuiltinTool): sorted_candidate_docs = sorted(candidate_docs, key=operator.itemgetter("score"), reverse=True) line = 9 - return [self.create_json_message(res) for res in sorted_candidate_docs[: self.topk]] + return [self.create_json_message(res) for res in sorted_candidate_docs[:topk]] except Exception as e: return self.create_text_message(f"Exception {str(e)}, line : {line}") diff --git a/api/core/tools/provider/builtin/comfyui/tools/comfyui_client.py b/api/core/tools/provider/builtin/comfyui/tools/comfyui_client.py index f994cdbf66..2bf10ce8ff 100644 --- a/api/core/tools/provider/builtin/comfyui/tools/comfyui_client.py +++ b/api/core/tools/provider/builtin/comfyui/tools/comfyui_client.py @@ -125,7 +125,7 @@ class ComfyUiClient: for output in history["outputs"].values(): for img in output.get("images", []): image_data = self.get_image(img["filename"], img["subfolder"], img["type"]) - images.append(image_data) + images.append((image_data, img["filename"])) return images finally: ws.close() diff --git a/api/core/tools/provider/builtin/comfyui/tools/comfyui_workflow.py b/api/core/tools/provider/builtin/comfyui/tools/comfyui_workflow.py index 8783736277..eb085f221e 100644 --- a/api/core/tools/provider/builtin/comfyui/tools/comfyui_workflow.py +++ b/api/core/tools/provider/builtin/comfyui/tools/comfyui_workflow.py @@ -1,4 +1,5 @@ import json +import mimetypes from typing import Any from core.file import FileType @@ -75,10 +76,12 @@ class ComfyUIWorkflowTool(BuiltinTool): images = comfyui.generate_image_by_prompt(prompt) result = [] - for img in images: + for image_data, filename in images: result.append( self.create_blob_message( - blob=img, meta={"mime_type": "image/png"}, save_as=self.VariableKey.IMAGE.value + blob=image_data, + meta={"mime_type": mimetypes.guess_type(filename)[0]}, + save_as=self.VariableKey.IMAGE.value, ) ) return result diff --git a/api/core/tools/tool/workflow_tool.py b/api/core/tools/tool/workflow_tool.py index edff4a2d07..eea66ee4ed 100644 --- a/api/core/tools/tool/workflow_tool.py +++ b/api/core/tools/tool/workflow_tool.py @@ -1,12 +1,13 @@ import json import logging from copy import deepcopy -from typing import Any, Optional, Union +from typing import Any, Optional, Union, cast from core.file import FILE_MODEL_IDENTITY, File, FileTransferMethod from core.tools.entities.tool_entities import ToolInvokeMessage, ToolParameter, ToolProviderType from core.tools.tool.tool import Tool from extensions.ext_database import db +from factories.file_factory import build_from_mapping from models.account import Account from models.model import App, EndUser from models.workflow import Workflow @@ -194,10 +195,18 @@ class WorkflowTool(Tool): if isinstance(value, list): for item in value: if isinstance(item, dict) and item.get("dify_model_identity") == FILE_MODEL_IDENTITY: - file = File.model_validate(item) + item["tool_file_id"] = item.get("related_id") + file = build_from_mapping( + mapping=item, + tenant_id=str(cast(Tool.Runtime, self.runtime).tenant_id), + ) files.append(file) elif isinstance(value, dict) and value.get("dify_model_identity") == FILE_MODEL_IDENTITY: - file = File.model_validate(value) + value["tool_file_id"] = value.get("related_id") + file = build_from_mapping( + mapping=value, + tenant_id=str(cast(Tool.Runtime, self.runtime).tenant_id), + ) files.append(file) result[key] = value diff --git a/api/core/workflow/graph_engine/entities/graph.py b/api/core/workflow/graph_engine/entities/graph.py index b3bcc3b2cc..5c672c985b 100644 --- a/api/core/workflow/graph_engine/entities/graph.py +++ b/api/core/workflow/graph_engine/entities/graph.py @@ -613,10 +613,10 @@ class Graph(BaseModel): for (node_id, node_id2), branch_node_ids in duplicate_end_node_ids.items(): # check which node is after if cls._is_node2_after_node1(node1_id=node_id, node2_id=node_id2, edge_mapping=edge_mapping): - if node_id in merge_branch_node_ids: + if node_id in merge_branch_node_ids and node_id2 in merge_branch_node_ids: del merge_branch_node_ids[node_id2] elif cls._is_node2_after_node1(node1_id=node_id2, node2_id=node_id, edge_mapping=edge_mapping): - if node_id2 in merge_branch_node_ids: + if node_id in merge_branch_node_ids and node_id2 in merge_branch_node_ids: del merge_branch_node_ids[node_id] branches_merge_node_ids: dict[str, str] = {} diff --git a/api/events/event_handlers/update_app_dataset_join_when_app_model_config_updated.py b/api/events/event_handlers/update_app_dataset_join_when_app_model_config_updated.py index 408ed31096..14396e9920 100644 --- a/api/events/event_handlers/update_app_dataset_join_when_app_model_config_updated.py +++ b/api/events/event_handlers/update_app_dataset_join_when_app_model_config_updated.py @@ -15,11 +15,11 @@ def handle(sender, **kwargs): app_dataset_joins = db.session.query(AppDatasetJoin).filter(AppDatasetJoin.app_id == app.id).all() - removed_dataset_ids: set[int] = set() + removed_dataset_ids: set[str] = set() if not app_dataset_joins: added_dataset_ids = dataset_ids else: - old_dataset_ids: set[int] = set() + old_dataset_ids: set[str] = set() old_dataset_ids.update(app_dataset_join.dataset_id for app_dataset_join in app_dataset_joins) added_dataset_ids = dataset_ids - old_dataset_ids @@ -39,8 +39,8 @@ def handle(sender, **kwargs): db.session.commit() -def get_dataset_ids_from_model_config(app_model_config: AppModelConfig) -> set[int]: - dataset_ids: set[int] = set() +def get_dataset_ids_from_model_config(app_model_config: AppModelConfig) -> set[str]: + dataset_ids: set[str] = set() if not app_model_config: return dataset_ids diff --git a/api/events/event_handlers/update_app_dataset_join_when_app_published_workflow_updated.py b/api/events/event_handlers/update_app_dataset_join_when_app_published_workflow_updated.py index 7a31c82f6a..dd2efed94b 100644 --- a/api/events/event_handlers/update_app_dataset_join_when_app_published_workflow_updated.py +++ b/api/events/event_handlers/update_app_dataset_join_when_app_published_workflow_updated.py @@ -17,11 +17,11 @@ def handle(sender, **kwargs): dataset_ids = get_dataset_ids_from_workflow(published_workflow) app_dataset_joins = db.session.query(AppDatasetJoin).filter(AppDatasetJoin.app_id == app.id).all() - removed_dataset_ids: set[int] = set() + removed_dataset_ids: set[str] = set() if not app_dataset_joins: added_dataset_ids = dataset_ids else: - old_dataset_ids: set[int] = set() + old_dataset_ids: set[str] = set() old_dataset_ids.update(app_dataset_join.dataset_id for app_dataset_join in app_dataset_joins) added_dataset_ids = dataset_ids - old_dataset_ids @@ -41,8 +41,8 @@ def handle(sender, **kwargs): db.session.commit() -def get_dataset_ids_from_workflow(published_workflow: Workflow) -> set[int]: - dataset_ids: set[int] = set() +def get_dataset_ids_from_workflow(published_workflow: Workflow) -> set[str]: + dataset_ids: set[str] = set() graph = published_workflow.graph_dict if not graph: return dataset_ids @@ -60,7 +60,7 @@ def get_dataset_ids_from_workflow(published_workflow: Workflow) -> set[int]: for node in knowledge_retrieval_nodes: try: node_data = KnowledgeRetrievalNodeData(**node.get("data", {})) - dataset_ids.update(int(dataset_id) for dataset_id in node_data.dataset_ids) + dataset_ids.update(dataset_id for dataset_id in node_data.dataset_ids) except Exception as e: continue diff --git a/api/extensions/ext_celery.py b/api/extensions/ext_celery.py index 30f216ff95..26bd6b3577 100644 --- a/api/extensions/ext_celery.py +++ b/api/extensions/ext_celery.py @@ -69,6 +69,7 @@ def init_app(app: DifyApp) -> Celery: "schedule.create_tidb_serverless_task", "schedule.update_tidb_serverless_status_task", "schedule.clean_messages", + "schedule.mail_clean_document_notify_task", ] day = dify_config.CELERY_BEAT_SCHEDULER_TIME beat_schedule = { @@ -92,6 +93,11 @@ def init_app(app: DifyApp) -> Celery: "task": "schedule.clean_messages.clean_messages", "schedule": timedelta(days=day), }, + # every Monday + "mail_clean_document_notify_task": { + "task": "schedule.mail_clean_document_notify_task.mail_clean_document_notify_task", + "schedule": crontab(minute="0", hour="10", day_of_week="1"), + }, } celery_app.conf.update(beat_schedule=beat_schedule, imports=imports) diff --git a/api/fields/workflow_fields.py b/api/fields/workflow_fields.py index bd093d4063..32f979a5f2 100644 --- a/api/fields/workflow_fields.py +++ b/api/fields/workflow_fields.py @@ -45,6 +45,7 @@ workflow_fields = { "graph": fields.Raw(attribute="graph_dict"), "features": fields.Raw(attribute="features_dict"), "hash": fields.String(attribute="unique_hash"), + "version": fields.String(attribute="version"), "created_by": fields.Nested(simple_account_fields, attribute="created_by_account"), "created_at": TimestampField, "updated_by": fields.Nested(simple_account_fields, attribute="updated_by_account", allow_null=True), @@ -61,3 +62,10 @@ workflow_partial_fields = { "updated_by": fields.String, "updated_at": TimestampField, } + +workflow_pagination_fields = { + "items": fields.List(fields.Nested(workflow_fields), attribute="items"), + "page": fields.Integer, + "limit": fields.Integer(attribute="limit"), + "has_more": fields.Boolean(attribute="has_more"), +} diff --git a/api/libs/threadings_utils.py b/api/libs/threadings_utils.py deleted file mode 100644 index e4d63fd314..0000000000 --- a/api/libs/threadings_utils.py +++ /dev/null @@ -1,19 +0,0 @@ -from configs import dify_config - - -def apply_gevent_threading_patch(): - """ - Run threading patch by gevent - to make standard library threading compatible. - Patching should be done as early as possible in the lifecycle of the program. - :return: - """ - if not dify_config.DEBUG: - from gevent import monkey # type: ignore - from grpc.experimental import gevent as grpc_gevent # type: ignore - - # gevent - monkey.patch_all() - - # grpc gevent - grpc_gevent.init_gevent() diff --git a/api/libs/version_utils.py b/api/libs/version_utils.py deleted file mode 100644 index 10edf8a058..0000000000 --- a/api/libs/version_utils.py +++ /dev/null @@ -1,12 +0,0 @@ -import sys - - -def check_supported_python_version(): - python_version = sys.version_info - if not ((3, 11) <= python_version < (3, 13)): - print( - "Aborted to launch the service " - f" with unsupported Python version {python_version.major}.{python_version.minor}." - " Please ensure Python 3.11 or 3.12." - ) - raise SystemExit(1) diff --git a/api/migrations/README b/api/migrations/README index 220678df7a..0e04844159 100644 --- a/api/migrations/README +++ b/api/migrations/README @@ -1,2 +1 @@ Single-database configuration for Flask. - diff --git a/api/models/workflow.py b/api/models/workflow.py index 32a0860b77..8642df8adb 100644 --- a/api/models/workflow.py +++ b/api/models/workflow.py @@ -414,6 +414,18 @@ class WorkflowRun(db.Model): # type: ignore[name-defined] finished_at = db.Column(db.DateTime) exceptions_count = db.Column(db.Integer, server_default=db.text("0")) + @property + def created_by_account(self): + created_by_role = CreatedByRole(self.created_by_role) + return db.session.get(Account, self.created_by) if created_by_role == CreatedByRole.ACCOUNT else None + + @property + def created_by_end_user(self): + from models.model import EndUser + + created_by_role = CreatedByRole(self.created_by_role) + return db.session.get(EndUser, self.created_by) if created_by_role == CreatedByRole.END_USER else None + @property def graph_dict(self): return json.loads(self.graph) if self.graph else {} diff --git a/api/schedule/clean_messages.py b/api/schedule/clean_messages.py index 48bdc872f4..5e4d3ec323 100644 --- a/api/schedule/clean_messages.py +++ b/api/schedule/clean_messages.py @@ -28,7 +28,6 @@ def clean_messages(): plan_sandbox_clean_message_day = datetime.datetime.now() - datetime.timedelta( days=dify_config.PLAN_SANDBOX_CLEAN_MESSAGE_DAY_SETTING ) - page = 1 while True: try: # Main query with join and filter @@ -79,4 +78,4 @@ def clean_messages(): db.session.query(Message).filter(Message.id == message.id).delete() db.session.commit() end_at = time.perf_counter() - click.echo(click.style("Cleaned unused dataset from db success latency: {}".format(end_at - start_at), fg="green")) + click.echo(click.style("Cleaned messages from db success latency: {}".format(end_at - start_at), fg="green")) diff --git a/api/schedule/mail_clean_document_notify_task.py b/api/schedule/mail_clean_document_notify_task.py index 766954a257..fe6839288d 100644 --- a/api/schedule/mail_clean_document_notify_task.py +++ b/api/schedule/mail_clean_document_notify_task.py @@ -3,14 +3,18 @@ import time from collections import defaultdict import click -from celery import shared_task # type: ignore +from flask import render_template # type: ignore +import app +from configs import dify_config +from extensions.ext_database import db from extensions.ext_mail import mail from models.account import Account, Tenant, TenantAccountJoin from models.dataset import Dataset, DatasetAutoDisableLog +from services.feature_service import FeatureService -@shared_task(queue="mail") +@app.celery.task(queue="dataset") def send_document_clean_notify_task(): """ Async Send document clean notify mail @@ -29,35 +33,58 @@ def send_document_clean_notify_task(): # group by tenant_id dataset_auto_disable_logs_map: dict[str, list[DatasetAutoDisableLog]] = defaultdict(list) for dataset_auto_disable_log in dataset_auto_disable_logs: + if dataset_auto_disable_log.tenant_id not in dataset_auto_disable_logs_map: + dataset_auto_disable_logs_map[dataset_auto_disable_log.tenant_id] = [] dataset_auto_disable_logs_map[dataset_auto_disable_log.tenant_id].append(dataset_auto_disable_log) - + url = f"{dify_config.CONSOLE_WEB_URL}/datasets" for tenant_id, tenant_dataset_auto_disable_logs in dataset_auto_disable_logs_map.items(): - knowledge_details = [] - tenant = Tenant.query.filter(Tenant.id == tenant_id).first() - if not tenant: - continue - current_owner_join = TenantAccountJoin.query.filter_by(tenant_id=tenant.id, role="owner").first() - if not current_owner_join: - continue - account = Account.query.filter(Account.id == current_owner_join.account_id).first() - if not account: - continue + features = FeatureService.get_features(tenant_id) + plan = features.billing.subscription.plan + if plan != "sandbox": + knowledge_details = [] + # check tenant + tenant = Tenant.query.filter(Tenant.id == tenant_id).first() + if not tenant: + continue + # check current owner + current_owner_join = TenantAccountJoin.query.filter_by(tenant_id=tenant.id, role="owner").first() + if not current_owner_join: + continue + account = Account.query.filter(Account.id == current_owner_join.account_id).first() + if not account: + continue - dataset_auto_dataset_map = {} # type: ignore + dataset_auto_dataset_map = {} # type: ignore + for dataset_auto_disable_log in tenant_dataset_auto_disable_logs: + if dataset_auto_disable_log.dataset_id not in dataset_auto_dataset_map: + dataset_auto_dataset_map[dataset_auto_disable_log.dataset_id] = [] + dataset_auto_dataset_map[dataset_auto_disable_log.dataset_id].append( + dataset_auto_disable_log.document_id + ) + + for dataset_id, document_ids in dataset_auto_dataset_map.items(): + dataset = Dataset.query.filter(Dataset.id == dataset_id).first() + if dataset: + document_count = len(document_ids) + knowledge_details.append(rf"Knowledge base {dataset.name}: {document_count} documents") + if knowledge_details: + html_content = render_template( + "clean_document_job_mail_template-US.html", + userName=account.email, + knowledge_details=knowledge_details, + url=url, + ) + mail.send( + to=account.email, subject="Dify Knowledge base auto disable notification", html=html_content + ) + + # update notified to True for dataset_auto_disable_log in tenant_dataset_auto_disable_logs: - dataset_auto_dataset_map[dataset_auto_disable_log.dataset_id].append( - dataset_auto_disable_log.document_id - ) - - for dataset_id, document_ids in dataset_auto_dataset_map.items(): - dataset = Dataset.query.filter(Dataset.id == dataset_id).first() - if dataset: - document_count = len(document_ids) - knowledge_details.append(f"
  • Knowledge base {dataset.name}: {document_count} documents
  • ") - + dataset_auto_disable_log.notified = True + db.session.commit() end_at = time.perf_counter() logging.info( click.style("Send document clean notify mail succeeded: latency: {}".format(end_at - start_at), fg="green") ) except Exception: - logging.exception("Send invite member mail to failed") + logging.exception("Send document clean notify mail failed") diff --git a/api/services/account_service.py b/api/services/account_service.py index 2d37db391c..64477480db 100644 --- a/api/services/account_service.py +++ b/api/services/account_service.py @@ -32,6 +32,7 @@ from models.account import ( TenantStatus, ) from models.model import DifySetup +from services.billing_service import BillingService from services.errors.account import ( AccountAlreadyInTenantError, AccountLoginError, @@ -50,6 +51,8 @@ from services.errors.account import ( ) from services.errors.workspace import WorkSpaceNotAllowedCreateError from services.feature_service import FeatureService +from tasks.delete_account_task import delete_account_task +from tasks.mail_account_deletion_task import send_account_deletion_verification_code from tasks.mail_email_code_login import send_email_code_login_mail_task from tasks.mail_invite_member_task import send_invite_member_mail_task from tasks.mail_reset_password_task import send_reset_password_mail_task @@ -70,6 +73,9 @@ class AccountService: email_code_login_rate_limiter = RateLimiter( prefix="email_code_login_rate_limit", max_attempts=1, time_window=60 * 1 ) + email_code_account_deletion_rate_limiter = RateLimiter( + prefix="email_code_account_deletion_rate_limit", max_attempts=1, time_window=60 * 1 + ) LOGIN_MAX_ERROR_LIMITS = 5 @staticmethod @@ -201,6 +207,15 @@ class AccountService: from controllers.console.error import AccountNotFound raise AccountNotFound() + + if dify_config.BILLING_ENABLED and BillingService.is_email_in_freeze(email): + raise AccountRegisterError( + description=( + "This email account has been deleted within the past " + "30 days and is temporarily unavailable for new account registration" + ) + ) + account = Account() account.email = email account.name = name @@ -240,6 +255,42 @@ class AccountService: return account + @staticmethod + def generate_account_deletion_verification_code(account: Account) -> tuple[str, str]: + code = "".join([str(random.randint(0, 9)) for _ in range(6)]) + token = TokenManager.generate_token( + account=account, token_type="account_deletion", additional_data={"code": code} + ) + return token, code + + @classmethod + def send_account_deletion_verification_email(cls, account: Account, code: str): + email = account.email + if cls.email_code_account_deletion_rate_limiter.is_rate_limited(email): + from controllers.console.auth.error import EmailCodeAccountDeletionRateLimitExceededError + + raise EmailCodeAccountDeletionRateLimitExceededError() + + send_account_deletion_verification_code.delay(to=email, code=code) + + cls.email_code_account_deletion_rate_limiter.increment_rate_limit(email) + + @staticmethod + def verify_account_deletion_code(token: str, code: str) -> bool: + token_data = TokenManager.get_token_data(token, "account_deletion") + if token_data is None: + return False + + if token_data["code"] != code: + return False + + return True + + @staticmethod + def delete_account(account: Account) -> None: + """Delete account. This method only adds a task to the queue for deletion.""" + delete_account_task.delay(account.id) + @staticmethod def link_account_integrate(provider: str, open_id: str, account: Account) -> None: """Link account integrate""" @@ -379,6 +430,7 @@ class AccountService: def send_email_code_login_email( cls, account: Optional[Account] = None, email: Optional[str] = None, language: Optional[str] = "en-US" ): + email = account.email if account else email if email is None: raise ValueError("Email must be provided.") if cls.email_code_login_rate_limiter.is_rate_limited(email): @@ -408,6 +460,14 @@ class AccountService: @classmethod def get_user_through_email(cls, email: str): + if dify_config.BILLING_ENABLED and BillingService.is_email_in_freeze(email): + raise AccountRegisterError( + description=( + "This email account has been deleted within the past " + "30 days and is temporarily unavailable for new account registration" + ) + ) + account = db.session.query(Account).filter(Account.email == email).first() if not account: return None @@ -824,6 +884,10 @@ class RegisterService: db.session.commit() except WorkSpaceNotAllowedCreateError: db.session.rollback() + except AccountRegisterError as are: + db.session.rollback() + logging.exception("Register failed") + raise are except Exception as e: db.session.rollback() logging.exception("Register failed") diff --git a/api/services/app_dsl_service.py b/api/services/app_dsl_service.py index 528a0dbcd3..b6d6d05e58 100644 --- a/api/services/app_dsl_service.py +++ b/api/services/app_dsl_service.py @@ -176,6 +176,9 @@ class AppDslService: data["kind"] = "app" imported_version = data.get("version", "0.1.0") + # check if imported_version is a float-like string + if not isinstance(imported_version, str): + raise ValueError(f"Invalid version type, expected str, got {type(imported_version)}") status = _check_version_compatibility(imported_version) # Extract app data diff --git a/api/services/audio_service.py b/api/services/audio_service.py index ef52301c0a..f4178a69a4 100644 --- a/api/services/audio_service.py +++ b/api/services/audio_service.py @@ -139,7 +139,7 @@ class AudioService: return Response(stream_with_context(response), content_type="audio/mpeg") return response else: - if not text: + if text is None: raise ValueError("Text is required") response = invoke_tts(text, app_model, voice) if isinstance(response, Generator): diff --git a/api/services/billing_service.py b/api/services/billing_service.py index ed611a8be4..3a13c10102 100644 --- a/api/services/billing_service.py +++ b/api/services/billing_service.py @@ -70,3 +70,24 @@ class BillingService: if not TenantAccountRole.is_privileged_role(join.role): raise ValueError("Only team owner or team admin can perform this action") + + @classmethod + def delete_account(cls, account_id: str): + """Delete account.""" + params = {"account_id": account_id} + return cls._send_request("DELETE", "/account/", params=params) + + @classmethod + def is_email_in_freeze(cls, email: str) -> bool: + params = {"email": email} + try: + response = cls._send_request("GET", "/account/in-freeze", params=params) + return bool(response.get("data", False)) + except Exception: + return False + + @classmethod + def update_account_deletion_feedback(cls, email: str, feedback: str): + """Update account deletion feedback.""" + json = {"email": email, "feedback": feedback} + return cls._send_request("POST", "/account/delete-feedback", json=json) diff --git a/api/services/dataset_service.py b/api/services/dataset_service.py index 1fd18568f5..4821eb6696 100644 --- a/api/services/dataset_service.py +++ b/api/services/dataset_service.py @@ -86,25 +86,30 @@ class DatasetService: else: return [], 0 else: - # show all datasets that the user has permission to access - if permitted_dataset_ids: - query = query.filter( - db.or_( - Dataset.permission == DatasetPermissionEnum.ALL_TEAM, - db.and_(Dataset.permission == DatasetPermissionEnum.ONLY_ME, Dataset.created_by == user.id), - db.and_( - Dataset.permission == DatasetPermissionEnum.PARTIAL_TEAM, - Dataset.id.in_(permitted_dataset_ids), - ), + if user.current_role not in (TenantAccountRole.OWNER, TenantAccountRole.ADMIN): + # show all datasets that the user has permission to access + if permitted_dataset_ids: + query = query.filter( + db.or_( + Dataset.permission == DatasetPermissionEnum.ALL_TEAM, + db.and_( + Dataset.permission == DatasetPermissionEnum.ONLY_ME, Dataset.created_by == user.id + ), + db.and_( + Dataset.permission == DatasetPermissionEnum.PARTIAL_TEAM, + Dataset.id.in_(permitted_dataset_ids), + ), + ) ) - ) - else: - query = query.filter( - db.or_( - Dataset.permission == DatasetPermissionEnum.ALL_TEAM, - db.and_(Dataset.permission == DatasetPermissionEnum.ONLY_ME, Dataset.created_by == user.id), + else: + query = query.filter( + db.or_( + Dataset.permission == DatasetPermissionEnum.ALL_TEAM, + db.and_( + Dataset.permission == DatasetPermissionEnum.ONLY_ME, Dataset.created_by == user.id + ), + ) ) - ) else: # if no user, only show datasets that are shared with all team members query = query.filter(Dataset.permission == DatasetPermissionEnum.ALL_TEAM) @@ -377,14 +382,19 @@ class DatasetService: if dataset.tenant_id != user.current_tenant_id: logging.debug(f"User {user.id} does not have permission to access dataset {dataset.id}") raise NoPermissionError("You do not have permission to access this dataset.") - if dataset.permission == DatasetPermissionEnum.ONLY_ME and dataset.created_by != user.id: - logging.debug(f"User {user.id} does not have permission to access dataset {dataset.id}") - raise NoPermissionError("You do not have permission to access this dataset.") - if dataset.permission == "partial_members": - user_permission = DatasetPermission.query.filter_by(dataset_id=dataset.id, account_id=user.id).first() - if not user_permission and dataset.tenant_id != user.current_tenant_id and dataset.created_by != user.id: + if user.current_role not in (TenantAccountRole.OWNER, TenantAccountRole.ADMIN): + if dataset.permission == DatasetPermissionEnum.ONLY_ME and dataset.created_by != user.id: logging.debug(f"User {user.id} does not have permission to access dataset {dataset.id}") raise NoPermissionError("You do not have permission to access this dataset.") + if dataset.permission == "partial_members": + user_permission = DatasetPermission.query.filter_by(dataset_id=dataset.id, account_id=user.id).first() + if ( + not user_permission + and dataset.tenant_id != user.current_tenant_id + and dataset.created_by != user.id + ): + logging.debug(f"User {user.id} does not have permission to access dataset {dataset.id}") + raise NoPermissionError("You do not have permission to access this dataset.") @staticmethod def check_dataset_operator_permission(user: Optional[Account] = None, dataset: Optional[Dataset] = None): @@ -394,15 +404,16 @@ class DatasetService: if not user: raise ValueError("User not found") - if dataset.permission == DatasetPermissionEnum.ONLY_ME: - if dataset.created_by != user.id: - raise NoPermissionError("You do not have permission to access this dataset.") + if user.current_role not in (TenantAccountRole.OWNER, TenantAccountRole.ADMIN): + if dataset.permission == DatasetPermissionEnum.ONLY_ME: + if dataset.created_by != user.id: + raise NoPermissionError("You do not have permission to access this dataset.") - elif dataset.permission == DatasetPermissionEnum.PARTIAL_TEAM: - if not any( - dp.dataset_id == dataset.id for dp in DatasetPermission.query.filter_by(account_id=user.id).all() - ): - raise NoPermissionError("You do not have permission to access this dataset.") + elif dataset.permission == DatasetPermissionEnum.PARTIAL_TEAM: + if not any( + dp.dataset_id == dataset.id for dp in DatasetPermission.query.filter_by(account_id=user.id).all() + ): + raise NoPermissionError("You do not have permission to access this dataset.") @staticmethod def get_dataset_queries(dataset_id: str, page: int, per_page: int): @@ -441,7 +452,7 @@ class DatasetService: class DocumentService: - DEFAULT_RULES = { + DEFAULT_RULES: dict[str, Any] = { "mode": "custom", "rules": { "pre_processing_rules": [ @@ -455,7 +466,7 @@ class DocumentService: }, } - DOCUMENT_METADATA_SCHEMA = { + DOCUMENT_METADATA_SCHEMA: dict[str, Any] = { "book": { "title": str, "language": str, diff --git a/api/services/errors/base.py b/api/services/errors/base.py index 4d39f956b8..35ea28468e 100644 --- a/api/services/errors/base.py +++ b/api/services/errors/base.py @@ -1,6 +1,6 @@ from typing import Optional -class BaseServiceError(Exception): +class BaseServiceError(ValueError): def __init__(self, description: Optional[str] = None): self.description = description diff --git a/api/services/message_service.py b/api/services/message_service.py index c4447a84da..c17122ef64 100644 --- a/api/services/message_service.py +++ b/api/services/message_service.py @@ -152,6 +152,7 @@ class MessageService: @classmethod def create_feedback( cls, + *, app_model: App, message_id: str, user: Optional[Union[Account, EndUser]], diff --git a/api/services/tools/api_tools_manage_service.py b/api/services/tools/api_tools_manage_service.py index 0e3bd3a7b8..988f9df927 100644 --- a/api/services/tools/api_tools_manage_service.py +++ b/api/services/tools/api_tools_manage_service.py @@ -425,7 +425,7 @@ class ApiToolManageService: "tenant_id": tenant_id, } ) - result = tool.validate_credentials(credentials, parameters) + result = runtime_tool.validate_credentials(credentials, parameters) except Exception as e: return {"error": str(e)} diff --git a/api/services/workflow_service.py b/api/services/workflow_service.py index 81b197a247..9f7a9c770d 100644 --- a/api/services/workflow_service.py +++ b/api/services/workflow_service.py @@ -5,6 +5,8 @@ from datetime import UTC, datetime from typing import Any, Optional, cast from uuid import uuid4 +from sqlalchemy import desc + from core.app.apps.advanced_chat.app_config_manager import AdvancedChatAppConfigManager from core.app.apps.workflow.app_config_manager import WorkflowAppConfigManager from core.model_runtime.utils.encoders import jsonable_encoder @@ -76,6 +78,28 @@ class WorkflowService: return workflow + def get_all_published_workflow(self, app_model: App, page: int, limit: int) -> tuple[list[Workflow], bool]: + """ + Get published workflow with pagination + """ + if not app_model.workflow_id: + return [], False + + workflows = ( + db.session.query(Workflow) + .filter(Workflow.app_id == app_model.id) + .order_by(desc(Workflow.version)) + .offset((page - 1) * limit) + .limit(limit + 1) + .all() + ) + + has_more = len(workflows) > limit + if has_more: + workflows = workflows[:-1] + + return workflows, has_more + def sync_draft_workflow( self, *, diff --git a/api/tasks/add_document_to_index_task.py b/api/tasks/add_document_to_index_task.py index 9a172b2d9d..bd7fcdadea 100644 --- a/api/tasks/add_document_to_index_task.py +++ b/api/tasks/add_document_to_index_task.py @@ -38,7 +38,11 @@ def add_document_to_index_task(dataset_document_id: str): try: segments = ( db.session.query(DocumentSegment) - .filter(DocumentSegment.document_id == dataset_document.id, DocumentSegment.enabled == True) + .filter( + DocumentSegment.document_id == dataset_document.id, + DocumentSegment.enabled == False, + DocumentSegment.status == "completed", + ) .order_by(DocumentSegment.position.asc()) .all() ) @@ -85,6 +89,16 @@ def add_document_to_index_task(dataset_document_id: str): db.session.query(DatasetAutoDisableLog).filter( DatasetAutoDisableLog.document_id == dataset_document.id ).delete() + + # update segment to enable + db.session.query(DocumentSegment).filter(DocumentSegment.document_id == dataset_document.id).update( + { + DocumentSegment.enabled: True, + DocumentSegment.disabled_at: None, + DocumentSegment.disabled_by: None, + DocumentSegment.updated_at: datetime.datetime.now(datetime.UTC).replace(tzinfo=None), + } + ) db.session.commit() end_at = time.perf_counter() diff --git a/api/tasks/delete_account_task.py b/api/tasks/delete_account_task.py new file mode 100644 index 0000000000..52c884ca29 --- /dev/null +++ b/api/tasks/delete_account_task.py @@ -0,0 +1,26 @@ +import logging + +from celery import shared_task # type: ignore + +from extensions.ext_database import db +from models.account import Account +from services.billing_service import BillingService +from tasks.mail_account_deletion_task import send_deletion_success_task + +logger = logging.getLogger(__name__) + + +@shared_task(queue="dataset") +def delete_account_task(account_id): + account = db.session.query(Account).filter(Account.id == account_id).first() + try: + BillingService.delete_account(account_id) + except Exception as e: + logger.exception(f"Failed to delete account {account_id} from billing service.") + raise + + if not account: + logger.error(f"Account {account_id} not found.") + return + # send success email + send_deletion_success_task.delay(account.email) diff --git a/api/tasks/mail_account_deletion_task.py b/api/tasks/mail_account_deletion_task.py new file mode 100644 index 0000000000..49a3a6d280 --- /dev/null +++ b/api/tasks/mail_account_deletion_task.py @@ -0,0 +1,70 @@ +import logging +import time + +import click +from celery import shared_task # type: ignore +from flask import render_template + +from extensions.ext_mail import mail + + +@shared_task(queue="mail") +def send_deletion_success_task(to): + """Send email to user regarding account deletion. + + Args: + log (AccountDeletionLog): Account deletion log object + """ + if not mail.is_inited(): + return + + logging.info(click.style(f"Start send account deletion success email to {to}", fg="green")) + start_at = time.perf_counter() + + try: + html_content = render_template( + "delete_account_success_template_en-US.html", + to=to, + email=to, + ) + mail.send(to=to, subject="Your Dify.AI Account Has Been Successfully Deleted", html=html_content) + + end_at = time.perf_counter() + logging.info( + click.style( + "Send account deletion success email to {}: latency: {}".format(to, end_at - start_at), fg="green" + ) + ) + except Exception: + logging.exception("Send account deletion success email to {} failed".format(to)) + + +@shared_task(queue="mail") +def send_account_deletion_verification_code(to, code): + """Send email to user regarding account deletion verification code. + + Args: + to (str): Recipient email address + code (str): Verification code + """ + if not mail.is_inited(): + return + + logging.info(click.style(f"Start send account deletion verification code email to {to}", fg="green")) + start_at = time.perf_counter() + + try: + html_content = render_template("delete_account_code_email_template_en-US.html", to=to, code=code) + mail.send(to=to, subject="Dify.AI Account Deletion and Verification", html=html_content) + + end_at = time.perf_counter() + logging.info( + click.style( + "Send account deletion verification code email to {} succeeded: latency: {}".format( + to, end_at - start_at + ), + fg="green", + ) + ) + except Exception: + logging.exception("Send account deletion verification code email to {} failed".format(to)) diff --git a/api/tasks/remove_document_from_index_task.py b/api/tasks/remove_document_from_index_task.py index 1d580b3802..d0c4382f58 100644 --- a/api/tasks/remove_document_from_index_task.py +++ b/api/tasks/remove_document_from_index_task.py @@ -1,3 +1,4 @@ +import datetime import logging import time @@ -46,6 +47,16 @@ def remove_document_from_index_task(document_id: str): index_processor.clean(dataset, index_node_ids, with_keywords=True, delete_child_chunks=False) except Exception: logging.exception(f"clean dataset {dataset.id} from index failed") + # update segment to disable + db.session.query(DocumentSegment).filter(DocumentSegment.document_id == document.id).update( + { + DocumentSegment.enabled: False, + DocumentSegment.disabled_at: datetime.datetime.now(datetime.UTC).replace(tzinfo=None), + DocumentSegment.disabled_by: document.disabled_by, + DocumentSegment.updated_at: datetime.datetime.now(datetime.UTC).replace(tzinfo=None), + } + ) + db.session.commit() end_at = time.perf_counter() logging.info( diff --git a/api/templates/clean_document_job_mail_template-US.html b/api/templates/clean_document_job_mail_template-US.html index b7c9538f9f..88e78f41c7 100644 --- a/api/templates/clean_document_job_mail_template-US.html +++ b/api/templates/clean_document_job_mail_template-US.html @@ -45,14 +45,14 @@ .content ul li { margin-bottom: 10px; } - .cta-button { + .cta-button, .cta-button:hover, .cta-button:active, .cta-button:visited, .cta-button:focus { display: block; margin: 20px auto; padding: 10px 20px; background-color: #4e89f9; - color: #ffffff; + color: #ffffff !important; text-align: center; - text-decoration: none; + text-decoration: none !important; border-radius: 5px; width: fit-content; } @@ -69,7 +69,7 @@