improve preview document tokenizer (#13328)

This commit is contained in:
Jyong 2025-02-07 16:08:25 +08:00 committed by GitHub
parent 7e1d9894fb
commit d4a09805a3
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194

View File

@ -47,6 +47,8 @@ class ParentChildIndexProcessor(BaseIndexProcessor):
embedding_model_instance=kwargs.get("embedding_model_instance"),
)
for document in documents:
if kwargs.get("preview") and len(all_documents) >= 10:
return all_documents
# document clean
document_text = CleanProcessor.clean(document.page_content, process_rule)
document.page_content = document_text