diff --git a/api/tasks/clean_dataset_task.py b/api/tasks/clean_dataset_task.py index 1232a8df1f..14e17220b4 100644 --- a/api/tasks/clean_dataset_task.py +++ b/api/tasks/clean_dataset_task.py @@ -7,7 +7,7 @@ from celery import shared_task from core.index.index import IndexBuilder from extensions.ext_database import db from models.dataset import DocumentSegment, Dataset, DatasetKeywordTable, DatasetQuery, DatasetProcessRule, \ - AppDatasetJoin + AppDatasetJoin, Document @shared_task @@ -32,7 +32,7 @@ def clean_dataset_task(dataset_id: str, tenant_id: str, indexing_technique: str, index_struct=index_struct ) - documents = db.session.query(DocumentSegment).filter(DocumentSegment.dataset_id == dataset_id).all() + documents = db.session.query(Document).filter(Document.dataset_id == dataset_id).all() segments = db.session.query(DocumentSegment).filter(DocumentSegment.dataset_id == dataset_id).all() vector_index = IndexBuilder.get_index(dataset, 'high_quality')