dify/api/controllers/console/datasets/external.py

281 lines
10 KiB
Python
Raw Normal View History

2024-08-20 11:13:29 +08:00
from flask import request
from flask_login import current_user
2024-09-18 14:36:51 +08:00
from flask_restful import Resource, marshal, reqparse
2024-09-24 18:00:45 +08:00
from werkzeug.exceptions import Forbidden, InternalServerError, NotFound
2024-08-20 11:13:29 +08:00
import services
from controllers.console import api
from controllers.console.app.error import ProviderNotInitializeError
2024-09-18 14:36:51 +08:00
from controllers.console.datasets.error import DatasetNameDuplicateError
2024-08-20 11:13:29 +08:00
from controllers.console.setup import setup_required
from controllers.console.wraps import account_initialization_required
2024-09-18 14:36:51 +08:00
from fields.dataset_fields import dataset_detail_fields
2024-08-20 11:13:29 +08:00
from libs.login import login_required
2024-09-19 17:07:33 +08:00
from services.dataset_service import DatasetService
2024-08-20 11:13:29 +08:00
from services.external_knowledge_service import ExternalDatasetService
2024-09-19 17:07:33 +08:00
from services.hit_testing_service import HitTestingService
2024-08-20 11:13:29 +08:00
def _validate_name(name):
if not name or len(name) < 1 or len(name) > 100:
2024-09-18 14:36:51 +08:00
raise ValueError("Name must be between 1 to 100 characters.")
2024-08-20 11:13:29 +08:00
return name
2024-08-23 16:43:47 +08:00
2024-08-20 11:13:29 +08:00
def _validate_description_length(description):
2024-09-24 21:37:55 +08:00
if description and len(description) > 400:
2024-09-18 14:36:51 +08:00
raise ValueError("Description cannot exceed 400 characters.")
2024-08-20 11:13:29 +08:00
return description
2024-08-23 16:43:47 +08:00
2024-08-20 11:13:29 +08:00
class ExternalApiTemplateListApi(Resource):
@setup_required
@login_required
@account_initialization_required
def get(self):
2024-09-18 14:36:51 +08:00
page = request.args.get("page", default=1, type=int)
limit = request.args.get("limit", default=20, type=int)
search = request.args.get("keyword", default=None, type=str)
2024-08-20 11:13:29 +08:00
api_templates, total = ExternalDatasetService.get_external_api_templates(
2024-09-18 14:36:51 +08:00
page, limit, current_user.current_tenant_id, search
2024-08-20 11:13:29 +08:00
)
response = {
2024-09-18 14:36:51 +08:00
"data": [item.to_dict() for item in api_templates],
"has_more": len(api_templates) == limit,
"limit": limit,
"total": total,
"page": page,
2024-08-20 11:13:29 +08:00
}
return response, 200
@setup_required
@login_required
@account_initialization_required
def post(self):
parser = reqparse.RequestParser()
2024-09-18 14:36:51 +08:00
parser.add_argument(
"name",
nullable=False,
required=True,
help="Name is required. Name must be between 1 to 100 characters.",
type=_validate_name,
)
parser.add_argument(
"settings",
2024-09-18 15:29:30 +08:00
type=dict,
2024-09-18 14:36:51 +08:00
location="json",
nullable=False,
required=True,
)
2024-08-20 11:13:29 +08:00
args = parser.parse_args()
2024-09-18 14:36:51 +08:00
ExternalDatasetService.validate_api_list(args["settings"])
2024-08-20 11:13:29 +08:00
# The role of the current user in the ta table must be admin, owner, or editor, or dataset_operator
if not current_user.is_dataset_editor:
raise Forbidden()
try:
api_template = ExternalDatasetService.create_api_template(
2024-09-18 14:36:51 +08:00
tenant_id=current_user.current_tenant_id, user_id=current_user.id, args=args
2024-08-20 11:13:29 +08:00
)
except services.errors.dataset.DatasetNameDuplicateError:
raise DatasetNameDuplicateError()
return api_template.to_dict(), 201
class ExternalApiTemplateApi(Resource):
@setup_required
@login_required
@account_initialization_required
2024-09-24 19:54:17 +08:00
def get(self, external_knowledge_api_id):
external_knowledge_api_id = str(external_knowledge_api_id)
api_template = ExternalDatasetService.get_api_template(external_knowledge_api_id)
2024-08-20 11:13:29 +08:00
if api_template is None:
raise NotFound("API template not found.")
return api_template.to_dict(), 200
@setup_required
@login_required
@account_initialization_required
def patch(self, api_template_id):
api_template_id = str(api_template_id)
parser = reqparse.RequestParser()
2024-09-18 14:36:51 +08:00
parser.add_argument(
"name",
nullable=False,
required=True,
help="type is required. Name must be between 1 to 100 characters.",
type=_validate_name,
)
parser.add_argument(
"settings",
2024-09-18 15:29:30 +08:00
type=dict,
2024-09-18 14:36:51 +08:00
location="json",
nullable=False,
required=True,
)
2024-08-20 11:13:29 +08:00
args = parser.parse_args()
2024-09-18 14:36:51 +08:00
ExternalDatasetService.validate_api_list(args["settings"])
2024-08-20 11:13:29 +08:00
api_template = ExternalDatasetService.update_api_template(
tenant_id=current_user.current_tenant_id,
user_id=current_user.id,
api_template_id=api_template_id,
2024-09-18 14:36:51 +08:00
args=args,
2024-08-20 11:13:29 +08:00
)
return api_template.to_dict(), 200
@setup_required
@login_required
@account_initialization_required
def delete(self, api_template_id):
api_template_id = str(api_template_id)
# The role of the current user in the ta table must be admin, owner, or editor
if not current_user.is_editor or current_user.is_dataset_operator:
raise Forbidden()
ExternalDatasetService.delete_api_template(current_user.current_tenant_id, api_template_id)
2024-09-18 14:36:51 +08:00
return {"result": "success"}, 204
2024-08-20 11:13:29 +08:00
class ExternalApiUseCheckApi(Resource):
@setup_required
@login_required
@account_initialization_required
2024-09-24 19:54:17 +08:00
def get(self, external_knowledge_api_id):
external_knowledge_api_id = str(external_knowledge_api_id)
2024-08-20 11:13:29 +08:00
2024-09-24 22:28:23 +08:00
external_api_template_is_using, count = ExternalDatasetService.external_api_template_use_check(external_knowledge_api_id)
return {"is_using": external_api_template_is_using, "count": count}, 200
2024-08-20 11:13:29 +08:00
class ExternalDatasetInitApi(Resource):
@setup_required
@login_required
@account_initialization_required
def post(self):
# The role of the current user in the ta table must be admin, owner, or editor
if not current_user.is_editor:
raise Forbidden()
parser = reqparse.RequestParser()
2024-09-18 14:36:51 +08:00
parser.add_argument("api_template_id", type=str, required=True, nullable=True, location="json")
2024-08-23 16:43:47 +08:00
# parser.add_argument('name', nullable=False, required=True,
# help='name is required. Name must be between 1 to 100 characters.',
# type=_validate_name)
# parser.add_argument('description', type=str, required=True, nullable=True, location='json')
2024-09-18 14:36:51 +08:00
parser.add_argument("data_source", type=dict, required=True, nullable=True, location="json")
parser.add_argument("process_parameter", type=dict, required=True, nullable=True, location="json")
2024-08-20 11:13:29 +08:00
args = parser.parse_args()
# The role of the current user in the ta table must be admin, owner, or editor, or dataset_operator
if not current_user.is_dataset_editor:
raise Forbidden()
# validate args
ExternalDatasetService.document_create_args_validate(
2024-09-18 14:36:51 +08:00
current_user.current_tenant_id, args["api_template_id"], args["process_parameter"]
2024-08-20 11:13:29 +08:00
)
try:
dataset, documents, batch = ExternalDatasetService.init_external_dataset(
tenant_id=current_user.current_tenant_id,
user_id=current_user.id,
args=args,
)
except Exception as ex:
raise ProviderNotInitializeError(ex.description)
2024-09-18 14:36:51 +08:00
response = {"dataset": dataset, "documents": documents, "batch": batch}
2024-08-20 11:13:29 +08:00
return response
2024-09-11 16:59:19 +08:00
class ExternalDatasetCreateApi(Resource):
@setup_required
@login_required
@account_initialization_required
def post(self):
# The role of the current user in the ta table must be admin, owner, or editor
if not current_user.is_editor:
raise Forbidden()
parser = reqparse.RequestParser()
2024-09-18 14:36:51 +08:00
parser.add_argument("external_api_template_id", type=str, required=True, nullable=False, location="json")
parser.add_argument("external_knowledge_id", type=str, required=True, nullable=False, location="json")
parser.add_argument(
"name",
nullable=False,
required=True,
help="name is required. Name must be between 1 to 100 characters.",
type=_validate_name,
)
2024-09-24 17:52:16 +08:00
parser.add_argument("description", type=str, required=False, nullable=True, location="json")
parser.add_argument("external_retrieval_model", type=dict, required=False, location="json")
2024-09-11 16:59:19 +08:00
args = parser.parse_args()
# The role of the current user in the ta table must be admin, owner, or editor, or dataset_operator
if not current_user.is_dataset_editor:
raise Forbidden()
try:
dataset = ExternalDatasetService.create_external_dataset(
tenant_id=current_user.current_tenant_id,
user_id=current_user.id,
args=args,
)
except services.errors.dataset.DatasetNameDuplicateError:
raise DatasetNameDuplicateError()
return marshal(dataset, dataset_detail_fields), 201
2024-09-19 17:07:33 +08:00
class ExternalKnowledgeHitTestingApi(Resource):
@setup_required
@login_required
@account_initialization_required
def post(self, dataset_id):
dataset_id_str = str(dataset_id)
dataset = DatasetService.get_dataset(dataset_id_str)
if dataset is None:
raise NotFound("Dataset not found.")
try:
DatasetService.check_dataset_permission(dataset, current_user)
except services.errors.account.NoPermissionError as e:
raise Forbidden(str(e))
parser = reqparse.RequestParser()
parser.add_argument("query", type=str, location="json")
parser.add_argument("external_retrieval_model", type=dict, required=False, location="json")
args = parser.parse_args()
HitTestingService.hit_testing_args_check(args)
try:
response = HitTestingService.external_retrieve(
dataset=dataset,
query=args["query"],
account=current_user,
external_retrieval_model=args["external_retrieval_model"],
)
return response
except Exception as e:
raise InternalServerError(str(e))
api.add_resource(ExternalKnowledgeHitTestingApi, "/datasets/<uuid:dataset_id>/external-hit-testing")
2024-09-24 17:52:16 +08:00
api.add_resource(ExternalDatasetCreateApi, "/datasets/external")
2024-09-24 19:54:17 +08:00
api.add_resource(ExternalApiTemplateListApi, "/datasets/external-knowledge-api")
api.add_resource(ExternalApiTemplateApi, "/datasets/external-knowledge-api/<uuid:external_knowledge_api_id>")
api.add_resource(ExternalApiUseCheckApi, "/datasets/external-knowledge-api/<uuid:external_knowledge_api_id>/use-check")