From 9633c5dab6ebd48236bb988e991955321a57f168 Mon Sep 17 00:00:00 2001 From: Joe <79627742+ZhouhaoJiang@users.noreply.github.com> Date: Mon, 28 Oct 2024 11:48:16 +0800 Subject: [PATCH 1/4] fix: enterprise create workspace (#9921) --- api/controllers/inner_api/workspace/workspace.py | 2 +- api/services/account_service.py | 8 ++++++-- 2 files changed, 7 insertions(+), 3 deletions(-) diff --git a/api/controllers/inner_api/workspace/workspace.py b/api/controllers/inner_api/workspace/workspace.py index 914b60f263..fee840b30d 100644 --- a/api/controllers/inner_api/workspace/workspace.py +++ b/api/controllers/inner_api/workspace/workspace.py @@ -21,7 +21,7 @@ class EnterpriseWorkspace(Resource): if account is None: return {"message": "owner account not found."}, 404 - tenant = TenantService.create_tenant(args["name"]) + tenant = TenantService.create_tenant(args["name"], is_from_dashboard=True) TenantService.create_tenant_member(tenant, account, role="owner") tenant_was_created.send(tenant) diff --git a/api/services/account_service.py b/api/services/account_service.py index 27b1540c8d..dceca06185 100644 --- a/api/services/account_service.py +++ b/api/services/account_service.py @@ -486,9 +486,13 @@ def _get_login_cache_key(*, account_id: str, token: str): class TenantService: @staticmethod - def create_tenant(name: str, is_setup: Optional[bool] = False) -> Tenant: + def create_tenant(name: str, is_setup: Optional[bool] = False, is_from_dashboard: Optional[bool] = False) -> Tenant: """Create tenant""" - if not FeatureService.get_system_features().is_allow_create_workspace and not is_setup: + if ( + not FeatureService.get_system_features().is_allow_create_workspace + and not is_setup + and not is_from_dashboard + ): from controllers.console.error import NotAllowedCreateWorkspace raise NotAllowedCreateWorkspace() From af680848957ef26eb78d8ba209608476056617cc Mon Sep 17 00:00:00 2001 From: Jyong <76649700+JohnJyong@users.noreply.github.com> Date: Mon, 28 Oct 2024 13:52:35 +0800 Subject: [PATCH 2/4] add document lock for multi-thread (#9873) --- api/services/dataset_service.py | 312 ++++++++++++++++---------------- 1 file changed, 157 insertions(+), 155 deletions(-) diff --git a/api/services/dataset_service.py b/api/services/dataset_service.py index ca084bde56..414ef0224a 100644 --- a/api/services/dataset_service.py +++ b/api/services/dataset_service.py @@ -760,166 +760,168 @@ class DocumentService: ) db.session.add(dataset_process_rule) db.session.commit() - position = DocumentService.get_documents_position(dataset.id) - document_ids = [] - duplicate_document_ids = [] - if document_data["data_source"]["type"] == "upload_file": - upload_file_list = document_data["data_source"]["info_list"]["file_info_list"]["file_ids"] - for file_id in upload_file_list: - file = ( - db.session.query(UploadFile) - .filter(UploadFile.tenant_id == dataset.tenant_id, UploadFile.id == file_id) - .first() - ) - - # raise error if file not found - if not file: - raise FileNotExistsError() - - file_name = file.name - data_source_info = { - "upload_file_id": file_id, - } - # check duplicate - if document_data.get("duplicate", False): - document = Document.query.filter_by( - dataset_id=dataset.id, - tenant_id=current_user.current_tenant_id, - data_source_type="upload_file", - enabled=True, - name=file_name, - ).first() - if document: - document.dataset_process_rule_id = dataset_process_rule.id - document.updated_at = datetime.datetime.utcnow() - document.created_from = created_from - document.doc_form = document_data["doc_form"] - document.doc_language = document_data["doc_language"] - document.data_source_info = json.dumps(data_source_info) - document.batch = batch - document.indexing_status = "waiting" - db.session.add(document) - documents.append(document) - duplicate_document_ids.append(document.id) - continue - document = DocumentService.build_document( - dataset, - dataset_process_rule.id, - document_data["data_source"]["type"], - document_data["doc_form"], - document_data["doc_language"], - data_source_info, - created_from, - position, - account, - file_name, - batch, - ) - db.session.add(document) - db.session.flush() - document_ids.append(document.id) - documents.append(document) - position += 1 - elif document_data["data_source"]["type"] == "notion_import": - notion_info_list = document_data["data_source"]["info_list"]["notion_info_list"] - exist_page_ids = [] - exist_document = {} - documents = Document.query.filter_by( - dataset_id=dataset.id, - tenant_id=current_user.current_tenant_id, - data_source_type="notion_import", - enabled=True, - ).all() - if documents: - for document in documents: - data_source_info = json.loads(document.data_source_info) - exist_page_ids.append(data_source_info["notion_page_id"]) - exist_document[data_source_info["notion_page_id"]] = document.id - for notion_info in notion_info_list: - workspace_id = notion_info["workspace_id"] - data_source_binding = DataSourceOauthBinding.query.filter( - db.and_( - DataSourceOauthBinding.tenant_id == current_user.current_tenant_id, - DataSourceOauthBinding.provider == "notion", - DataSourceOauthBinding.disabled == False, - DataSourceOauthBinding.source_info["workspace_id"] == f'"{workspace_id}"', + lock_name = "add_document_lock_dataset_id_{}".format(dataset.id) + with redis_client.lock(lock_name, timeout=600): + position = DocumentService.get_documents_position(dataset.id) + document_ids = [] + duplicate_document_ids = [] + if document_data["data_source"]["type"] == "upload_file": + upload_file_list = document_data["data_source"]["info_list"]["file_info_list"]["file_ids"] + for file_id in upload_file_list: + file = ( + db.session.query(UploadFile) + .filter(UploadFile.tenant_id == dataset.tenant_id, UploadFile.id == file_id) + .first() ) - ).first() - if not data_source_binding: - raise ValueError("Data source binding not found.") - for page in notion_info["pages"]: - if page["page_id"] not in exist_page_ids: - data_source_info = { - "notion_workspace_id": workspace_id, - "notion_page_id": page["page_id"], - "notion_page_icon": page["page_icon"], - "type": page["type"], - } - document = DocumentService.build_document( - dataset, - dataset_process_rule.id, - document_data["data_source"]["type"], - document_data["doc_form"], - document_data["doc_language"], - data_source_info, - created_from, - position, - account, - page["page_name"], - batch, + + # raise error if file not found + if not file: + raise FileNotExistsError() + + file_name = file.name + data_source_info = { + "upload_file_id": file_id, + } + # check duplicate + if document_data.get("duplicate", False): + document = Document.query.filter_by( + dataset_id=dataset.id, + tenant_id=current_user.current_tenant_id, + data_source_type="upload_file", + enabled=True, + name=file_name, + ).first() + if document: + document.dataset_process_rule_id = dataset_process_rule.id + document.updated_at = datetime.datetime.utcnow() + document.created_from = created_from + document.doc_form = document_data["doc_form"] + document.doc_language = document_data["doc_language"] + document.data_source_info = json.dumps(data_source_info) + document.batch = batch + document.indexing_status = "waiting" + db.session.add(document) + documents.append(document) + duplicate_document_ids.append(document.id) + continue + document = DocumentService.build_document( + dataset, + dataset_process_rule.id, + document_data["data_source"]["type"], + document_data["doc_form"], + document_data["doc_language"], + data_source_info, + created_from, + position, + account, + file_name, + batch, + ) + db.session.add(document) + db.session.flush() + document_ids.append(document.id) + documents.append(document) + position += 1 + elif document_data["data_source"]["type"] == "notion_import": + notion_info_list = document_data["data_source"]["info_list"]["notion_info_list"] + exist_page_ids = [] + exist_document = {} + documents = Document.query.filter_by( + dataset_id=dataset.id, + tenant_id=current_user.current_tenant_id, + data_source_type="notion_import", + enabled=True, + ).all() + if documents: + for document in documents: + data_source_info = json.loads(document.data_source_info) + exist_page_ids.append(data_source_info["notion_page_id"]) + exist_document[data_source_info["notion_page_id"]] = document.id + for notion_info in notion_info_list: + workspace_id = notion_info["workspace_id"] + data_source_binding = DataSourceOauthBinding.query.filter( + db.and_( + DataSourceOauthBinding.tenant_id == current_user.current_tenant_id, + DataSourceOauthBinding.provider == "notion", + DataSourceOauthBinding.disabled == False, + DataSourceOauthBinding.source_info["workspace_id"] == f'"{workspace_id}"', ) - db.session.add(document) - db.session.flush() - document_ids.append(document.id) - documents.append(document) - position += 1 + ).first() + if not data_source_binding: + raise ValueError("Data source binding not found.") + for page in notion_info["pages"]: + if page["page_id"] not in exist_page_ids: + data_source_info = { + "notion_workspace_id": workspace_id, + "notion_page_id": page["page_id"], + "notion_page_icon": page["page_icon"], + "type": page["type"], + } + document = DocumentService.build_document( + dataset, + dataset_process_rule.id, + document_data["data_source"]["type"], + document_data["doc_form"], + document_data["doc_language"], + data_source_info, + created_from, + position, + account, + page["page_name"], + batch, + ) + db.session.add(document) + db.session.flush() + document_ids.append(document.id) + documents.append(document) + position += 1 + else: + exist_document.pop(page["page_id"]) + # delete not selected documents + if len(exist_document) > 0: + clean_notion_document_task.delay(list(exist_document.values()), dataset.id) + elif document_data["data_source"]["type"] == "website_crawl": + website_info = document_data["data_source"]["info_list"]["website_info_list"] + urls = website_info["urls"] + for url in urls: + data_source_info = { + "url": url, + "provider": website_info["provider"], + "job_id": website_info["job_id"], + "only_main_content": website_info.get("only_main_content", False), + "mode": "crawl", + } + if len(url) > 255: + document_name = url[:200] + "..." else: - exist_document.pop(page["page_id"]) - # delete not selected documents - if len(exist_document) > 0: - clean_notion_document_task.delay(list(exist_document.values()), dataset.id) - elif document_data["data_source"]["type"] == "website_crawl": - website_info = document_data["data_source"]["info_list"]["website_info_list"] - urls = website_info["urls"] - for url in urls: - data_source_info = { - "url": url, - "provider": website_info["provider"], - "job_id": website_info["job_id"], - "only_main_content": website_info.get("only_main_content", False), - "mode": "crawl", - } - if len(url) > 255: - document_name = url[:200] + "..." - else: - document_name = url - document = DocumentService.build_document( - dataset, - dataset_process_rule.id, - document_data["data_source"]["type"], - document_data["doc_form"], - document_data["doc_language"], - data_source_info, - created_from, - position, - account, - document_name, - batch, - ) - db.session.add(document) - db.session.flush() - document_ids.append(document.id) - documents.append(document) - position += 1 - db.session.commit() + document_name = url + document = DocumentService.build_document( + dataset, + dataset_process_rule.id, + document_data["data_source"]["type"], + document_data["doc_form"], + document_data["doc_language"], + data_source_info, + created_from, + position, + account, + document_name, + batch, + ) + db.session.add(document) + db.session.flush() + document_ids.append(document.id) + documents.append(document) + position += 1 + db.session.commit() - # trigger async task - if document_ids: - document_indexing_task.delay(dataset.id, document_ids) - if duplicate_document_ids: - duplicate_document_indexing_task.delay(dataset.id, duplicate_document_ids) + # trigger async task + if document_ids: + document_indexing_task.delay(dataset.id, document_ids) + if duplicate_document_ids: + duplicate_document_indexing_task.delay(dataset.id, duplicate_document_ids) - return documents, batch + return documents, batch @staticmethod def check_documents_upload_quota(count: int, features: FeatureModel): From aafa4a3c8bf93676743167c8f69c63b27719c1e5 Mon Sep 17 00:00:00 2001 From: seikyo-cho-lvgs Date: Mon, 28 Oct 2024 14:53:04 +0900 Subject: [PATCH 3/4] Remove invalid languages error (#9928) Co-authored-by: crazywoola <427733928@qq.com> --- web/i18n/server.ts | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/web/i18n/server.ts b/web/i18n/server.ts index 933899e6a1..e976f3ed41 100644 --- a/web/i18n/server.ts +++ b/web/i18n/server.ts @@ -47,10 +47,8 @@ export const getLocaleOnServer = (): Locale => { } // Validate languages - if (!Array.isArray(languages) || languages.length === 0 || !languages.every(lang => typeof lang === 'string' && /^[\w-]+$/.test(lang))) { - console.error(`Invalid languages: ${languages}`) + if (!Array.isArray(languages) || languages.length === 0 || !languages.every(lang => typeof lang === 'string' && /^[\w-]+$/.test(lang))) languages = [i18n.defaultLocale] - } // match locale const matchedLocale = match(languages, locales, i18n.defaultLocale) as Locale From 705946cc40515614d94377bfea288a59703de98f Mon Sep 17 00:00:00 2001 From: Joel Date: Mon, 28 Oct 2024 15:36:28 +0800 Subject: [PATCH 4/4] fix: tool var type error (#9937) --- .../workflow/nodes/tool/components/input-var-list.tsx | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/web/app/components/workflow/nodes/tool/components/input-var-list.tsx b/web/app/components/workflow/nodes/tool/components/input-var-list.tsx index 9d9940b8e9..e47082f4b7 100644 --- a/web/app/components/workflow/nodes/tool/components/input-var-list.tsx +++ b/web/app/components/workflow/nodes/tool/components/input-var-list.tsx @@ -72,7 +72,7 @@ const InputVarList: FC = ({ } else { draft[variable] = { - type: varKindType, + type: VarKindType.variable, value: varValue, } } @@ -171,7 +171,7 @@ const InputVarList: FC = ({ value={varInput?.type === VarKindType.constant ? (varInput?.value || '') : (varInput?.value || [])} onChange={handleNotMixedTypeChange(variable)} onOpen={handleOpen(index)} - defaultVarKindType={varInput?.type} + defaultVarKindType={VarKindType.variable} filterVar={isNumber ? filterVar : undefined} availableVars={isSelect ? availableVars : undefined} schema={schema}