[前端+RAG] PDF原生渲染(pdfjs text layer);Excel列宽优化+水平滚动

This commit is contained in:
2026-04-02 13:24:41 +08:00
parent 200ccac9b9
commit f508a8b6b1
4 changed files with 282 additions and 6 deletions

View File

@@ -1070,13 +1070,43 @@ class FileConverter:
def xlsx_to_html(self, input_path: str, output_path: Optional[str] = None) -> str:
try:
import openpyxl
from openpyxl.utils import get_column_letter
wb = openpyxl.load_workbook(input_path, data_only=True)
style = '''<style>.excel-table{border-collapse:collapse;width:100%;margin:1em 0;} .excel-table td,.excel-table th{padding:8px;border:1px solid #ddd;}</style>'''
style = '''<style>
.excel-table-wrapper{overflow-x:auto;margin:1em 0;}
.excel-table{border-collapse:collapse;margin:0;table-layout:fixed;}
.excel-table td,.excel-table th{padding:6px 10px;border:1px solid #d0d0d0;white-space:nowrap;overflow:hidden;text-overflow:ellipsis;font-size:13px;vertical-align:middle;}
.excel-table tr:nth-child(even){background:#fafbff;}
</style>'''
html = []
for idx, sheet in enumerate(wb.worksheets):
html.append(f'<h3>Sheet {idx+1}: {sheet.title}</h3>')
html.append('<div class="table-container">')
html.append('<table class="excel-table" border="1" style="border-collapse:collapse;width:100%;margin:1em 0;">')
# 计算列宽
col_widths = {}
for col_idx in range(1, sheet.max_column + 1):
col_letter = get_column_letter(col_idx)
dim = sheet.column_dimensions.get(col_letter)
if dim and dim.width and dim.width > 0:
# openpyxl width 以字符数为单位,约 7px/字符
col_widths[col_idx] = max(60, int(dim.width * 7.5))
else:
# 根据内容估算宽度
max_len = 8
for row_idx in range(1, min(sheet.max_row + 1, 50)):
cell = sheet.cell(row=row_idx, column=col_idx)
if cell.value is not None:
max_len = max(max_len, len(str(cell.value)))
col_widths[col_idx] = max(60, min(300, max_len * 9))
html.append('<div class="excel-table-wrapper">')
html.append('<table class="excel-table" border="1">')
# colgroup 设置列宽
html.append('<colgroup>')
for col_idx in range(1, sheet.max_column + 1):
w = col_widths.get(col_idx, 80)
html.append(f'<col style="width:{w}px;min-width:{w}px;">')
html.append('</colgroup>')
merged_map = {}
for r in sheet.merged_cells.ranges:
min_row, min_col, max_row, max_col = r.min_row, r.min_col, r.max_row, r.max_col
@@ -1099,6 +1129,8 @@ class FileConverter:
td_attrs += f' rowspan="{rowspan}"'
if colspan > 1:
td_attrs += f' colspan="{colspan}"'
# 合并单元格允许换行
style_str += 'white-space:normal;word-wrap:break-word;'
html.append(f'<td{td_attrs} style="{style_str}">{cell_value}</td>')
html.append('</tr>')
html.append('</table></div>')