32 lines
944 B
Python
32 lines
944 B
Python
![]() |
from pathlib import Path
|
||
|
import json
|
||
|
from typing import Dict
|
||
|
from openpyxl import load_workbook
|
||
|
|
||
|
from llama_index.readers.file.base_parser import BaseParser
|
||
|
from flask import current_app
|
||
|
|
||
|
|
||
|
class XLSXParser(BaseParser):
|
||
|
"""XLSX parser."""
|
||
|
|
||
|
def _init_parser(self) -> Dict:
|
||
|
"""Init parser"""
|
||
|
return {}
|
||
|
|
||
|
def parse_file(self, file: Path, errors: str = "ignore") -> str:
|
||
|
data = []
|
||
|
keys = []
|
||
|
with open(file, "r") as fp:
|
||
|
wb = load_workbook(filename=file, read_only=True)
|
||
|
# loop over all sheets
|
||
|
for sheet in wb:
|
||
|
for row in sheet.iter_rows(values_only=True):
|
||
|
if all(v is None for v in row):
|
||
|
continue
|
||
|
if keys == []:
|
||
|
keys = row
|
||
|
else:
|
||
|
data.append(json.dumps(dict(zip(keys, row)), ensure_ascii=False))
|
||
|
return data
|