Fix the issue of decoding a non-UTF-8 encoded file using UTF-8 encodi… (#378)

This commit is contained in:
Columbus 2023-06-16 14:12:07 +08:00 committed by GitHub
parent 673288d58e
commit eeb2c28526
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23

View File

@ -1,6 +1,7 @@
import datetime import datetime
import hashlib import hashlib
import tempfile import tempfile
import chardet
import time import time
import uuid import uuid
from pathlib import Path from pathlib import Path
@ -141,7 +142,8 @@ class FilePreviewApi(Resource):
# ['txt', 'markdown', 'md'] # ['txt', 'markdown', 'md']
with open(filepath, "rb") as fp: with open(filepath, "rb") as fp:
data = fp.read() data = fp.read()
text = data.decode(encoding='utf-8').strip() if data else '' encoding = chardet.detect(data)['encoding']
text = data.decode(encoding=encoding).strip() if data else ''
text = text[0:PREVIEW_WORDS_LIMIT] if text else '' text = text[0:PREVIEW_WORDS_LIMIT] if text else ''
return {'content': text} return {'content': text}