From 475b8d731e2353c034c60eea80acf3f1f8268065 Mon Sep 17 00:00:00 2001 From: Gen Sato <52241300+halogen22@users.noreply.github.com> Date: Tue, 18 Mar 2025 12:00:20 +0900 Subject: [PATCH] Fix HTTP Request node to give priority to file extension of content-disposition (#12653) --- .../workflow/nodes/http_request/entities.py | 19 ++++--- api/core/workflow/nodes/http_request/node.py | 54 +++++++++++-------- 2 files changed, 46 insertions(+), 27 deletions(-) diff --git a/api/core/workflow/nodes/http_request/entities.py b/api/core/workflow/nodes/http_request/entities.py index 1a23171108..054e30f0aa 100644 --- a/api/core/workflow/nodes/http_request/entities.py +++ b/api/core/workflow/nodes/http_request/entities.py @@ -109,14 +109,12 @@ class Response: 3. MIME type analysis """ content_type = self.content_type.split(";")[0].strip().lower() - content_disposition = self.response.headers.get("content-disposition", "") + parsed_content_disposition = self.parsed_content_disposition # Check if it's explicitly marked as an attachment - if content_disposition: - msg = Message() - msg["content-disposition"] = content_disposition - disp_type = msg.get_content_disposition() # Returns 'attachment', 'inline', or None - filename = msg.get_filename() # Returns filename if present, None otherwise + if parsed_content_disposition: + disp_type = parsed_content_disposition.get_content_disposition() # Returns 'attachment', 'inline', or None + filename = parsed_content_disposition.get_filename() # Returns filename if present, None otherwise if disp_type == "attachment" or filename is not None: return True @@ -182,3 +180,12 @@ class Response: return f"{(self.size / 1024):.2f} KB" else: return f"{(self.size / 1024 / 1024):.2f} MB" + + @property + def parsed_content_disposition(self) -> Optional[Message]: + content_disposition = self.headers.get("content-disposition", "") + if content_disposition: + msg = Message() + msg["content-disposition"] = content_disposition + return msg + return None diff --git a/api/core/workflow/nodes/http_request/node.py b/api/core/workflow/nodes/http_request/node.py index 861119f26c..467161d5ed 100644 --- a/api/core/workflow/nodes/http_request/node.py +++ b/api/core/workflow/nodes/http_request/node.py @@ -169,32 +169,44 @@ class HttpRequestNode(BaseNode[HttpRequestNodeData]): """ Extract files from response by checking both Content-Type header and URL """ - files = [] + files: list[File] = [] is_file = response.is_file content_type = response.content_type content = response.content + parsed_content_disposition = response.parsed_content_disposition + content_disposition_type = None - if is_file: - # Guess file extension from URL or Content-Type header - filename = url.split("?")[0].split("/")[-1] or "" - mime_type = content_type or mimetypes.guess_type(filename)[0] or "application/octet-stream" + if not is_file: + return files - tool_file = ToolFileManager.create_file_by_raw( - user_id=self.user_id, - tenant_id=self.tenant_id, - conversation_id=None, - file_binary=content, - mimetype=mime_type, - ) + if parsed_content_disposition: + content_disposition_filename = parsed_content_disposition.get_filename() + if content_disposition_filename: + # If filename is available from content-disposition, use it to guess the content type + content_disposition_type = mimetypes.guess_type(content_disposition_filename)[0] - mapping = { - "tool_file_id": tool_file.id, - "transfer_method": FileTransferMethod.TOOL_FILE.value, - } - file = file_factory.build_from_mapping( - mapping=mapping, - tenant_id=self.tenant_id, - ) - files.append(file) + # Guess file extension from URL or Content-Type header + filename = url.split("?")[0].split("/")[-1] or "" + mime_type = ( + content_disposition_type or content_type or mimetypes.guess_type(filename)[0] or "application/octet-stream" + ) + + tool_file = ToolFileManager.create_file_by_raw( + user_id=self.user_id, + tenant_id=self.tenant_id, + conversation_id=None, + file_binary=content, + mimetype=mime_type, + ) + + mapping = { + "tool_file_id": tool_file.id, + "transfer_method": FileTransferMethod.TOOL_FILE.value, + } + file = file_factory.build_from_mapping( + mapping=mapping, + tenant_id=self.tenant_id, + ) + files.append(file) return files