fix qa index processor tenant id is None error (#2713)
Co-authored-by: jyong <jyong@dify.ai>
This commit is contained in:
parent
7f3dec7bee
commit
31070ffbca
@ -62,7 +62,8 @@ class IndexingRunner:
|
|||||||
text_docs = self._extract(index_processor, dataset_document, processing_rule.to_dict())
|
text_docs = self._extract(index_processor, dataset_document, processing_rule.to_dict())
|
||||||
|
|
||||||
# transform
|
# transform
|
||||||
documents = self._transform(index_processor, dataset, text_docs, processing_rule.to_dict())
|
documents = self._transform(index_processor, dataset, text_docs, dataset_document.doc_language,
|
||||||
|
processing_rule.to_dict())
|
||||||
# save segment
|
# save segment
|
||||||
self._load_segments(dataset, dataset_document, documents)
|
self._load_segments(dataset, dataset_document, documents)
|
||||||
|
|
||||||
@ -120,7 +121,8 @@ class IndexingRunner:
|
|||||||
text_docs = self._extract(index_processor, dataset_document, processing_rule.to_dict())
|
text_docs = self._extract(index_processor, dataset_document, processing_rule.to_dict())
|
||||||
|
|
||||||
# transform
|
# transform
|
||||||
documents = self._transform(index_processor, dataset, text_docs, processing_rule.to_dict())
|
documents = self._transform(index_processor, dataset, text_docs, dataset_document.doc_language,
|
||||||
|
processing_rule.to_dict())
|
||||||
# save segment
|
# save segment
|
||||||
self._load_segments(dataset, dataset_document, documents)
|
self._load_segments(dataset, dataset_document, documents)
|
||||||
|
|
||||||
@ -750,7 +752,7 @@ class IndexingRunner:
|
|||||||
index_processor.load(dataset, documents)
|
index_processor.load(dataset, documents)
|
||||||
|
|
||||||
def _transform(self, index_processor: BaseIndexProcessor, dataset: Dataset,
|
def _transform(self, index_processor: BaseIndexProcessor, dataset: Dataset,
|
||||||
text_docs: list[Document], process_rule: dict) -> list[Document]:
|
text_docs: list[Document], doc_language: str, process_rule: dict) -> list[Document]:
|
||||||
# get embedding model instance
|
# get embedding model instance
|
||||||
embedding_model_instance = None
|
embedding_model_instance = None
|
||||||
if dataset.indexing_technique == 'high_quality':
|
if dataset.indexing_technique == 'high_quality':
|
||||||
@ -768,7 +770,8 @@ class IndexingRunner:
|
|||||||
)
|
)
|
||||||
|
|
||||||
documents = index_processor.transform(text_docs, embedding_model_instance=embedding_model_instance,
|
documents = index_processor.transform(text_docs, embedding_model_instance=embedding_model_instance,
|
||||||
process_rule=process_rule)
|
process_rule=process_rule, tenant_id=dataset.tenant_id,
|
||||||
|
doc_language=doc_language)
|
||||||
|
|
||||||
return documents
|
return documents
|
||||||
|
|
||||||
|
@ -7,7 +7,6 @@ from typing import Optional
|
|||||||
|
|
||||||
import pandas as pd
|
import pandas as pd
|
||||||
from flask import Flask, current_app
|
from flask import Flask, current_app
|
||||||
from flask_login import current_user
|
|
||||||
from werkzeug.datastructures import FileStorage
|
from werkzeug.datastructures import FileStorage
|
||||||
|
|
||||||
from core.generator.llm_generator import LLMGenerator
|
from core.generator.llm_generator import LLMGenerator
|
||||||
@ -31,7 +30,7 @@ class QAIndexProcessor(BaseIndexProcessor):
|
|||||||
|
|
||||||
def transform(self, documents: list[Document], **kwargs) -> list[Document]:
|
def transform(self, documents: list[Document], **kwargs) -> list[Document]:
|
||||||
splitter = self._get_splitter(processing_rule=kwargs.get('process_rule'),
|
splitter = self._get_splitter(processing_rule=kwargs.get('process_rule'),
|
||||||
embedding_model_instance=None)
|
embedding_model_instance=kwargs.get('embedding_model_instance'))
|
||||||
|
|
||||||
# Split the text documents into nodes.
|
# Split the text documents into nodes.
|
||||||
all_documents = []
|
all_documents = []
|
||||||
@ -66,10 +65,10 @@ class QAIndexProcessor(BaseIndexProcessor):
|
|||||||
for doc in sub_documents:
|
for doc in sub_documents:
|
||||||
document_format_thread = threading.Thread(target=self._format_qa_document, kwargs={
|
document_format_thread = threading.Thread(target=self._format_qa_document, kwargs={
|
||||||
'flask_app': current_app._get_current_object(),
|
'flask_app': current_app._get_current_object(),
|
||||||
'tenant_id': current_user.current_tenant.id,
|
'tenant_id': kwargs.get('tenant_id'),
|
||||||
'document_node': doc,
|
'document_node': doc,
|
||||||
'all_qa_documents': all_qa_documents,
|
'all_qa_documents': all_qa_documents,
|
||||||
'document_language': kwargs.get('document_language', 'English')})
|
'document_language': kwargs.get('doc_language', 'English')})
|
||||||
threads.append(document_format_thread)
|
threads.append(document_format_thread)
|
||||||
document_format_thread.start()
|
document_format_thread.start()
|
||||||
for thread in threads:
|
for thread in threads:
|
||||||
|
Loading…
Reference in New Issue
Block a user