fix: adjust code style

This commit is contained in:
Aurora 2025-02-27 15:41:29 +08:00
parent b9da80b23a
commit 22741e4617

View File

@ -4,17 +4,17 @@ import json
import logging import logging
import os import os
import tempfile import tempfile
from collections.abc import Mapping, Sequence from collections.abc import Iterator, Mapping, Sequence
from typing import Any, cast, Union, Iterator from typing import Any, Union, cast
import docx
import pandas as pd import pandas as pd
import pypdfium2 # type: ignore import pypdfium2 # type: ignore
import yaml # type: ignore import yaml # type: ignore
import docx
from docx.document import Document as _Document from docx.document import Document as _Document
from docx.table import Table, _Cell
from docx.text.paragraph import Paragraph
from docx.oxml.ns import qn from docx.oxml.ns import qn
from docx.table import Table, _Cell
from docx.text.paragraph import Paragraph
from configs import dify_config from configs import dify_config
from core.file import File, FileTransferMethod, file_manager from core.file import File, FileTransferMethod, file_manager
@ -27,7 +27,8 @@ from core.workflow.nodes.enums import NodeType
from models.workflow import WorkflowNodeExecutionStatus from models.workflow import WorkflowNodeExecutionStatus
from .entities import DocumentExtractorNodeData from .entities import DocumentExtractorNodeData
from .exc import DocumentExtractorError, FileDownloadError, TextExtractionError, UnsupportedFileTypeError from .exc import (DocumentExtractorError, FileDownloadError,
TextExtractionError, UnsupportedFileTypeError)
logger = logging.getLogger(__name__) logger = logging.getLogger(__name__)
@ -278,8 +279,7 @@ def _extract_text_from_docx(file_content: bytes) -> str:
return "\n".join(text) return "\n".join(text)
except Exception as e: except Exception as e:
logger.exception(f"Failed to extract text from DOCX: {e}") raise TextExtractionError(f"Failed to extract text from DOCX: {str(e)}") from e
return ""
def _download_file_content(file: File) -> bytes: def _download_file_content(file: File) -> bytes:
@ -454,6 +454,7 @@ def _iter_block_items(parent: Union[_Document, _Cell]) -> Iterator[Union[Paragra
elif child.tag == qn("w:tbl"): elif child.tag == qn("w:tbl"):
yield Table(child, parent) yield Table(child, parent)
def _has_valid_iterchildren(element) -> bool: def _has_valid_iterchildren(element) -> bool:
""" """
Check if the element has a valid iterchildren() method. Check if the element has a valid iterchildren() method.