Compare commits
2 Commits
main
...
fix/docx-e
Author | SHA1 | Date | |
---|---|---|---|
![]() |
517bbc281a | ||
![]() |
c135ec4b08 |
@ -31,24 +31,25 @@ def make_request(method, url, max_retries=SSRF_DEFAULT_MAX_RETRIES, **kwargs):
|
|||||||
allow_redirects = kwargs.pop("allow_redirects")
|
allow_redirects = kwargs.pop("allow_redirects")
|
||||||
if "follow_redirects" not in kwargs:
|
if "follow_redirects" not in kwargs:
|
||||||
kwargs["follow_redirects"] = allow_redirects
|
kwargs["follow_redirects"] = allow_redirects
|
||||||
|
stream = kwargs.pop("stream", False)
|
||||||
retries = 0
|
retries = 0
|
||||||
while retries <= max_retries:
|
while retries <= max_retries:
|
||||||
try:
|
try:
|
||||||
if SSRF_PROXY_ALL_URL:
|
client_args = {"proxy": SSRF_PROXY_ALL_URL} if SSRF_PROXY_ALL_URL else {}
|
||||||
with httpx.Client(proxy=SSRF_PROXY_ALL_URL) as client:
|
if proxy_mounts:
|
||||||
response = client.request(method=method, url=url, **kwargs)
|
client_args["mounts"] = proxy_mounts
|
||||||
elif proxy_mounts:
|
|
||||||
with httpx.Client(mounts=proxy_mounts) as client:
|
with httpx.Client(**client_args) as client:
|
||||||
response = client.request(method=method, url=url, **kwargs)
|
|
||||||
else:
|
|
||||||
with httpx.Client() as client:
|
|
||||||
response = client.request(method=method, url=url, **kwargs)
|
response = client.request(method=method, url=url, **kwargs)
|
||||||
|
|
||||||
if response.status_code not in STATUS_FORCELIST:
|
if response.status_code not in STATUS_FORCELIST:
|
||||||
|
if stream:
|
||||||
|
return response.iter_bytes()
|
||||||
return response
|
return response
|
||||||
else:
|
else:
|
||||||
logging.warning(f"Received status code {response.status_code} for URL {url} which is in the force list")
|
logging.warning(
|
||||||
|
f"Received status code {response.status_code} for URL {url} which is in the force list"
|
||||||
|
)
|
||||||
|
|
||||||
except httpx.RequestError as e:
|
except httpx.RequestError as e:
|
||||||
logging.warning(f"Request to URL {url} failed on attempt {retries + 1}: {e}")
|
logging.warning(f"Request to URL {url} failed on attempt {retries + 1}: {e}")
|
||||||
|
@ -14,6 +14,7 @@ import requests
|
|||||||
from docx import Document as DocxDocument
|
from docx import Document as DocxDocument
|
||||||
|
|
||||||
from configs import dify_config
|
from configs import dify_config
|
||||||
|
from core.helper import ssrf_proxy
|
||||||
from core.rag.extractor.extractor_base import BaseExtractor
|
from core.rag.extractor.extractor_base import BaseExtractor
|
||||||
from core.rag.models.document import Document
|
from core.rag.models.document import Document
|
||||||
from extensions.ext_database import db
|
from extensions.ext_database import db
|
||||||
@ -80,13 +81,12 @@ class WordExtractor(BaseExtractor):
|
|||||||
os.makedirs(image_folder, exist_ok=True)
|
os.makedirs(image_folder, exist_ok=True)
|
||||||
image_count = 0
|
image_count = 0
|
||||||
image_map = {}
|
image_map = {}
|
||||||
|
|
||||||
for rel in doc.part.rels.values():
|
for rel in doc.part.rels.values():
|
||||||
if "image" in rel.target_ref:
|
if "image" in rel.target_ref:
|
||||||
image_count += 1
|
image_count += 1
|
||||||
if rel.is_external:
|
if rel.is_external:
|
||||||
url = rel.reltype
|
url = rel.reltype
|
||||||
response = requests.get(url, stream=True)
|
response = ssrf_proxy.get(url, stream=True)
|
||||||
if response.status_code == 200:
|
if response.status_code == 200:
|
||||||
image_ext = mimetypes.guess_extension(response.headers["Content-Type"])
|
image_ext = mimetypes.guess_extension(response.headers["Content-Type"])
|
||||||
file_uuid = str(uuid.uuid4())
|
file_uuid = str(uuid.uuid4())
|
||||||
|
Loading…
Reference in New Issue
Block a user