update to external knowledge api

This commit is contained in:
jyong 2024-09-27 16:17:45 +08:00
parent 2a1cba9f4d
commit 9c9352bc73
3 changed files with 44 additions and 26 deletions

View File

@ -13,13 +13,7 @@ class TestExternalApi(Resource):
@account_initialization_required @account_initialization_required
def post(self): def post(self):
parser = reqparse.RequestParser() parser = reqparse.RequestParser()
parser.add_argument( parser.add_argument("retrieval_setting", nullable=False, required=True, type=dict, location="json")
"retrieval_setting",
nullable=False,
required=True,
type=dict,
location="json"
)
parser.add_argument( parser.add_argument(
"query", "query",
nullable=False, nullable=False,

View File

@ -20,9 +20,12 @@ from models.dataset import (
ExternalKnowledgeBindings, ExternalKnowledgeBindings,
) )
from models.model import UploadFile from models.model import UploadFile
from services.entities.external_knowledge_entities.external_knowledge_entities import ExternalKnowledgeApiSetting, Authorization from services.entities.external_knowledge_entities.external_knowledge_entities import (
Authorization,
ExternalKnowledgeApiSetting,
)
from services.errors.dataset import DatasetNameDuplicateError from services.errors.dataset import DatasetNameDuplicateError
from urllib.parse import urlparse
class ExternalDatasetService: class ExternalDatasetService:
@staticmethod @staticmethod
@ -61,14 +64,14 @@ class ExternalDatasetService:
db.session.add(external_knowledge_api) db.session.add(external_knowledge_api)
db.session.commit() db.session.commit()
return external_knowledge_api return external_knowledge_api
@staticmethod @staticmethod
def check_endpoint_and_api_key(settings: dict): def check_endpoint_and_api_key(settings: dict):
if "endpoint" not in settings or not settings["endpoint"]: if "endpoint" not in settings or not settings["endpoint"]:
raise ValueError("endpoint is required") raise ValueError("endpoint is required")
if "api_key" not in settings or not settings["api_key"]: if "api_key" not in settings or not settings["api_key"]:
raise ValueError("api_key is required") raise ValueError("api_key is required")
endpoint = f"{settings['endpoint']}/retrieval" endpoint = f"{settings['endpoint']}/retrieval"
api_key = settings["api_key"] api_key = settings["api_key"]
if not validators.url(endpoint): if not validators.url(endpoint):
@ -90,7 +93,9 @@ class ExternalDatasetService:
@staticmethod @staticmethod
def update_external_knowledge_api(tenant_id, user_id, external_knowledge_api_id, args) -> ExternalKnowledgeApis: def update_external_knowledge_api(tenant_id, user_id, external_knowledge_api_id, args) -> ExternalKnowledgeApis:
external_knowledge_api = ExternalKnowledgeApis.query.filter_by(id=external_knowledge_api_id, tenant_id=tenant_id).first() external_knowledge_api = ExternalKnowledgeApis.query.filter_by(
id=external_knowledge_api_id, tenant_id=tenant_id
).first()
if external_knowledge_api is None: if external_knowledge_api is None:
raise ValueError("api template not found") raise ValueError("api template not found")
@ -105,7 +110,9 @@ class ExternalDatasetService:
@staticmethod @staticmethod
def delete_external_knowledge_api(tenant_id: str, external_knowledge_api_id: str): def delete_external_knowledge_api(tenant_id: str, external_knowledge_api_id: str):
external_knowledge_api = ExternalKnowledgeApis.query.filter_by(id=external_knowledge_api_id, tenant_id=tenant_id).first() external_knowledge_api = ExternalKnowledgeApis.query.filter_by(
id=external_knowledge_api_id, tenant_id=tenant_id
).first()
if external_knowledge_api is None: if external_knowledge_api is None:
raise ValueError("api template not found") raise ValueError("api template not found")
@ -130,7 +137,9 @@ class ExternalDatasetService:
@staticmethod @staticmethod
def document_create_args_validate(tenant_id: str, external_knowledge_api_id: str, process_parameter: dict): def document_create_args_validate(tenant_id: str, external_knowledge_api_id: str, process_parameter: dict):
external_knowledge_api = ExternalKnowledgeApis.query.filter_by(id=external_knowledge_api_id, tenant_id=tenant_id).first() external_knowledge_api = ExternalKnowledgeApis.query.filter_by(
id=external_knowledge_api_id, tenant_id=tenant_id
).first()
if external_knowledge_api is None: if external_knowledge_api is None:
raise ValueError("api template not found") raise ValueError("api template not found")
settings = json.loads(external_knowledge_api.settings) settings = json.loads(external_knowledge_api.settings)
@ -150,7 +159,9 @@ class ExternalDatasetService:
raise ValueError("data source is required") raise ValueError("data source is required")
process_parameter = args.get("process_parameter") process_parameter = args.get("process_parameter")
external_knowledge_api = ExternalKnowledgeApis.query.filter_by(id=external_knowledge_api_id, tenant_id=tenant_id).first() external_knowledge_api = ExternalKnowledgeApis.query.filter_by(
id=external_knowledge_api_id, tenant_id=tenant_id
).first()
if external_knowledge_api is None: if external_knowledge_api is None:
raise ValueError("api template not found") raise ValueError("api template not found")
@ -204,7 +215,9 @@ class ExternalDatasetService:
return dataset return dataset
@staticmethod @staticmethod
def process_external_api(settings: ExternalKnowledgeApiSetting, files: Union[None, dict[str, Any]]) -> httpx.Response: def process_external_api(
settings: ExternalKnowledgeApiSetting, files: Union[None, dict[str, Any]]
) -> httpx.Response:
""" """
do http request depending on api bundle do http request depending on api bundle
""" """
@ -322,7 +335,9 @@ class ExternalDatasetService:
"headers": headers, "headers": headers,
"params": request_params, "params": request_params,
} }
response = ExternalDatasetService.process_external_api(ExternalKnowledgeApiSetting(**external_knowledge_api_setting), None) response = ExternalDatasetService.process_external_api(
ExternalKnowledgeApiSetting(**external_knowledge_api_setting), None
)
if response.status_code == 200: if response.status_code == 200:
return response.json().get("records", []) return response.json().get("records", [])
return [] return []
@ -338,7 +353,10 @@ class ExternalDatasetService:
response = client.retrieve( response = client.retrieve(
knowledgeBaseId=external_knowledge_id, knowledgeBaseId=external_knowledge_id,
retrievalConfiguration={ retrievalConfiguration={
"vectorSearchConfiguration": {"numberOfResults": retrieval_setting.get("top_k"), "overrideSearchType": "HYBRID"} "vectorSearchConfiguration": {
"numberOfResults": retrieval_setting.get("top_k"),
"overrideSearchType": "HYBRID",
}
}, },
retrievalQuery={"text": query}, retrievalQuery={"text": query},
) )
@ -347,7 +365,7 @@ class ExternalDatasetService:
if response.get("retrievalResults"): if response.get("retrievalResults"):
retrieval_results = response.get("retrievalResults") retrieval_results = response.get("retrievalResults")
for retrieval_result in retrieval_results: for retrieval_result in retrieval_results:
if retrieval_result.get("score") < retrieval_setting.get("score_threshold", .0): if retrieval_result.get("score") < retrieval_setting.get("score_threshold", 0.0):
continue continue
result = { result = {
"metadata": retrieval_result.get("metadata"), "metadata": retrieval_result.get("metadata"),
@ -356,6 +374,4 @@ class ExternalDatasetService:
"content": retrieval_result.get("content").get("text"), "content": retrieval_result.get("content").get("text"),
} }
results.append(result) results.append(result)
return { return {"records": results}
"records": results
}

View File

@ -14,7 +14,9 @@ from services.external_knowledge_service import ExternalDatasetService
@shared_task(queue="dataset") @shared_task(queue="dataset")
def external_document_indexing_task(dataset_id: str, external_knowledge_api_id: str, data_source: dict, process_parameter: dict): def external_document_indexing_task(
dataset_id: str, external_knowledge_api_id: str, data_source: dict, process_parameter: dict
):
""" """
Async process document Async process document
:param dataset_id: :param dataset_id:
@ -35,14 +37,18 @@ def external_document_indexing_task(dataset_id: str, external_knowledge_api_id:
# get external api template # get external api template
external_knowledge_api = ( external_knowledge_api = (
db.session.query(ExternalKnowledgeApis) db.session.query(ExternalKnowledgeApis)
.filter(ExternalKnowledgeApis.id == external_knowledge_api_id, ExternalKnowledgeApis.tenant_id == dataset.tenant_id) .filter(
ExternalKnowledgeApis.id == external_knowledge_api_id, ExternalKnowledgeApis.tenant_id == dataset.tenant_id
)
.first() .first()
) )
if not external_knowledge_api: if not external_knowledge_api:
logging.info( logging.info(
click.style( click.style(
"Processed external dataset: {} failed, api template: {} not exit.".format(dataset_id, external_knowledge_api_id), "Processed external dataset: {} failed, api template: {} not exit.".format(
dataset_id, external_knowledge_api_id
),
fg="red", fg="red",
) )
) )
@ -59,7 +65,9 @@ def external_document_indexing_task(dataset_id: str, external_knowledge_api_id:
if file: if file:
files[file.id] = (file.name, storage.load_once(file.key), file.mime_type) files[file.id] = (file.name, storage.load_once(file.key), file.mime_type)
try: try:
settings = ExternalDatasetService.get_external_knowledge_api_settings(json.loads(external_knowledge_api.settings)) settings = ExternalDatasetService.get_external_knowledge_api_settings(
json.loads(external_knowledge_api.settings)
)
# assemble headers # assemble headers
headers = ExternalDatasetService.assembling_headers(settings.authorization, settings.headers) headers = ExternalDatasetService.assembling_headers(settings.authorization, settings.headers)