diff --git a/api/core/rag/extractor/pdf_extractor.py b/api/core/rag/extractor/pdf_extractor.py index 04033dec3f..13c8f57874 100644 --- a/api/core/rag/extractor/pdf_extractor.py +++ b/api/core/rag/extractor/pdf_extractor.py @@ -55,14 +55,18 @@ class PdfExtractor(BaseExtractor): import pypdfium2 # type: ignore with blob.as_bytes_io() as file_path: - pdf_reader = pypdfium2.PdfDocument(file_path, autoclose=True) try: + pdf_reader = pypdfium2.PdfDocument(file_path, autoclose=True) for page_number, page in enumerate(pdf_reader): - text_page = page.get_textpage() - content = text_page.get_text_range() - text_page.close() - page.close() - metadata = {"source": blob.source, "page": page_number} - yield Document(page_content=content, metadata=metadata) + try: + text_page = page.get_textpage() + try: + content = text_page.get_text_range() + metadata = {"source": blob.source, "page": page_number} + yield Document(page_content=content, metadata=metadata) + finally: + text_page.close() + finally: + page.close() finally: pdf_reader.close() diff --git a/api/core/workflow/nodes/http_request/executor.py b/api/core/workflow/nodes/http_request/executor.py index bf28222de0..a37aa0fcc4 100644 --- a/api/core/workflow/nodes/http_request/executor.py +++ b/api/core/workflow/nodes/http_request/executor.py @@ -266,21 +266,26 @@ class Executor: return headers def _validate_and_parse_response(self, response: httpx.Response) -> Response: - executor_response = Response(response) + try: + executor_response = Response(response) - threshold_size = ( - dify_config.HTTP_REQUEST_NODE_MAX_BINARY_SIZE - if executor_response.is_file - else dify_config.HTTP_REQUEST_NODE_MAX_TEXT_SIZE - ) - if executor_response.size > threshold_size: - raise ResponseSizeError( - f"{'File' if executor_response.is_file else 'Text'} size is too large," - f" max size is {threshold_size / 1024 / 1024:.2f} MB," - f" but current size is {executor_response.readable_size}." + threshold_size = ( + dify_config.HTTP_REQUEST_NODE_MAX_BINARY_SIZE + if executor_response.is_file + else dify_config.HTTP_REQUEST_NODE_MAX_TEXT_SIZE ) + if executor_response.size > threshold_size: + raise ResponseSizeError( + f"{'File' if executor_response.is_file else 'Text'} size is too large," + f" max size is {threshold_size / 1024 / 1024:.2f} MB," + f" but current size is {executor_response.readable_size}." + ) - return executor_response + return executor_response + except Exception as e: + # Ensure response is closed when an exception occurs + response.close() + raise e def _do_http_request(self, headers: dict[str, Any]) -> httpx.Response: """ @@ -316,21 +321,29 @@ class Executor: "follow_redirects": True, "max_retries": self.max_retries, } - # request_args = {k: v for k, v in request_args.items() if v is not None} - try: - response = getattr(ssrf_proxy, self.method.lower())(**request_args) - except (ssrf_proxy.MaxRetriesExceededError, httpx.RequestError) as e: - raise HttpRequestNodeError(str(e)) - # FIXME: fix type ignore, this maybe httpx type issue - return response # type: ignore + + # Use with statement to ensure proper resource cleanup + with httpx.Client() as client: + try: + response = getattr(client, self.method.lower())(**request_args) + # Create a new Response object and copy required data + # This allows safe closure of the original response + copied_response = response.copy() + response.close() + return copied_response + except (ssrf_proxy.MaxRetriesExceededError, httpx.RequestError) as e: + raise HttpRequestNodeError(str(e)) def invoke(self) -> Response: - # assemble headers - headers = self._assembling_headers() - # do http request - response = self._do_http_request(headers) - # validate response - return self._validate_and_parse_response(response) + response = None + try: + headers = self._assembling_headers() + response = self._do_http_request(headers) + return self._validate_and_parse_response(response) + except Exception as e: + if response is not None: + response.close() + raise e def to_log(self): url_parts = urlparse(self.url)