[前端+RAG] PDF原生渲染(pdfjs text layer);Excel列宽优化+水平滚动
This commit is contained in:
@@ -35,6 +35,7 @@
|
||||
"markdown-it-sub": "^2.0.0",
|
||||
"markdown-it-sup": "^2.0.0",
|
||||
"normalize.css": "^8.0.1",
|
||||
"pdfjs-dist": "^3.11.174",
|
||||
"pinia": "^2.2.6",
|
||||
"sa-sdk-javascript": "1.27.2",
|
||||
"sass": "^1.81.0",
|
||||
@@ -50,8 +51,8 @@
|
||||
},
|
||||
"devDependencies": {
|
||||
"@tsconfig/node22": "^22.0.0",
|
||||
"@types/markdown-it": "^14.1.2",
|
||||
"@types/mark.js": "^8.11.12",
|
||||
"@types/markdown-it": "^14.1.2",
|
||||
"@types/node": "^22.9.0",
|
||||
"@vitejs/plugin-vue": "^5.1.4",
|
||||
"@vue/tsconfig": "^0.5.1",
|
||||
|
||||
184
chat_web_front/src/components/PdfViewer.vue
Normal file
184
chat_web_front/src/components/PdfViewer.vue
Normal file
@@ -0,0 +1,184 @@
|
||||
<template>
|
||||
<div class="pdf-viewer" ref="containerRef">
|
||||
<div v-for="page in pages" :key="page" class="pdf-page-wrapper">
|
||||
<div class="pdf-page" :id="'pdf-page-' + page" :style="{ position: 'relative' }">
|
||||
<canvas :ref="el => setCanvasRef(el, page)"></canvas>
|
||||
<div class="text-layer" :ref="el => setTextLayerRef(el, page)"></div>
|
||||
</div>
|
||||
</div>
|
||||
<div v-if="loading" class="pdf-loading">加载中...</div>
|
||||
<div v-if="error" class="pdf-error">{{ error }}</div>
|
||||
</div>
|
||||
</template>
|
||||
|
||||
<script setup lang="ts">
|
||||
import { ref, onMounted, onBeforeUnmount, nextTick, watch } from 'vue';
|
||||
import * as pdfjsLib from 'pdfjs-dist';
|
||||
|
||||
// Set worker
|
||||
pdfjsLib.GlobalWorkerOptions.workerSrc = new URL(
|
||||
'pdfjs-dist/build/pdf.worker.min.js',
|
||||
import.meta.url
|
||||
).toString();
|
||||
|
||||
const props = defineProps<{
|
||||
src: ArrayBuffer | Uint8Array | string;
|
||||
scale?: number;
|
||||
}>();
|
||||
|
||||
const containerRef = ref<HTMLElement | null>(null);
|
||||
const pages = ref<number[]>([]);
|
||||
const loading = ref(true);
|
||||
const error = ref('');
|
||||
const canvasRefs: Record<number, HTMLCanvasElement> = {};
|
||||
const textLayerRefs: Record<number, HTMLElement> = {};
|
||||
let pdfDoc: any = null;
|
||||
|
||||
const setCanvasRef = (el: any, page: number) => {
|
||||
if (el) canvasRefs[page] = el;
|
||||
};
|
||||
const setTextLayerRef = (el: any, page: number) => {
|
||||
if (el) textLayerRefs[page] = el;
|
||||
};
|
||||
|
||||
const renderPage = async (pageNum: number) => {
|
||||
if (!pdfDoc) return;
|
||||
const page = await pdfDoc.getPage(pageNum);
|
||||
const scale = props.scale || 1.5;
|
||||
const viewport = page.getViewport({ scale });
|
||||
|
||||
const canvas = canvasRefs[pageNum];
|
||||
if (!canvas) return;
|
||||
const context = canvas.getContext('2d');
|
||||
canvas.height = viewport.height;
|
||||
canvas.width = viewport.width;
|
||||
|
||||
await page.render({ canvasContext: context, viewport }).promise;
|
||||
|
||||
// Text layer for text selection
|
||||
const textLayerDiv = textLayerRefs[pageNum];
|
||||
if (textLayerDiv) {
|
||||
textLayerDiv.style.width = viewport.width + 'px';
|
||||
textLayerDiv.style.height = viewport.height + 'px';
|
||||
textLayerDiv.innerHTML = '';
|
||||
|
||||
const textContent = await page.getTextContent();
|
||||
const textItems = textContent.items;
|
||||
|
||||
for (const item of textItems) {
|
||||
if (!item.str) continue;
|
||||
const tx = pdfjsLib.Util.transform(viewport.transform, item.transform);
|
||||
const span = document.createElement('span');
|
||||
span.textContent = item.str;
|
||||
span.style.position = 'absolute';
|
||||
span.style.left = tx[4] + 'px';
|
||||
span.style.top = (viewport.height - tx[5]) + 'px';
|
||||
span.style.fontSize = Math.abs(tx[0]) + 'px';
|
||||
span.style.fontFamily = item.fontName || 'sans-serif';
|
||||
span.style.transformOrigin = '0% 0%';
|
||||
// Width matching
|
||||
if (item.width) {
|
||||
const textWidth = item.width * scale;
|
||||
span.style.width = textWidth + 'px';
|
||||
span.style.display = 'inline-block';
|
||||
}
|
||||
textLayerDiv.appendChild(span);
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
const loadPdf = async () => {
|
||||
loading.value = true;
|
||||
error.value = '';
|
||||
try {
|
||||
const loadingTask = pdfjsLib.getDocument({
|
||||
data: props.src instanceof ArrayBuffer ? new Uint8Array(props.src) : props.src
|
||||
});
|
||||
pdfDoc = await loadingTask.promise;
|
||||
const numPages = pdfDoc.numPages;
|
||||
pages.value = Array.from({ length: numPages }, (_, i) => i + 1);
|
||||
|
||||
await nextTick();
|
||||
for (let i = 1; i <= numPages; i++) {
|
||||
await renderPage(i);
|
||||
}
|
||||
} catch (e: any) {
|
||||
error.value = 'PDF 加载失败: ' + (e.message || e);
|
||||
console.error('PDF load error:', e);
|
||||
} finally {
|
||||
loading.value = false;
|
||||
}
|
||||
};
|
||||
|
||||
watch(() => props.src, () => {
|
||||
if (props.src) loadPdf();
|
||||
});
|
||||
|
||||
onMounted(() => {
|
||||
if (props.src) loadPdf();
|
||||
});
|
||||
|
||||
onBeforeUnmount(() => {
|
||||
if (pdfDoc) {
|
||||
pdfDoc.destroy();
|
||||
pdfDoc = null;
|
||||
}
|
||||
});
|
||||
</script>
|
||||
|
||||
<style scoped>
|
||||
.pdf-viewer {
|
||||
width: 100%;
|
||||
height: 100%;
|
||||
overflow-y: auto;
|
||||
background: #f5f5f5;
|
||||
}
|
||||
|
||||
.pdf-page-wrapper {
|
||||
display: flex;
|
||||
justify-content: center;
|
||||
margin-bottom: 16px;
|
||||
}
|
||||
|
||||
.pdf-page {
|
||||
position: relative;
|
||||
background: white;
|
||||
box-shadow: 0 2px 8px rgba(0, 0, 0, 0.15);
|
||||
}
|
||||
|
||||
.pdf-page canvas {
|
||||
display: block;
|
||||
}
|
||||
|
||||
.text-layer {
|
||||
position: absolute;
|
||||
top: 0;
|
||||
left: 0;
|
||||
overflow: hidden;
|
||||
opacity: 0.3;
|
||||
line-height: 1;
|
||||
}
|
||||
|
||||
.text-layer span {
|
||||
color: transparent;
|
||||
position: absolute;
|
||||
white-space: pre;
|
||||
cursor: text;
|
||||
}
|
||||
|
||||
.text-layer span::selection {
|
||||
background: rgba(0, 78, 160, 0.3);
|
||||
color: transparent;
|
||||
}
|
||||
|
||||
.pdf-loading, .pdf-error {
|
||||
text-align: center;
|
||||
padding: 40px;
|
||||
color: #999;
|
||||
font-size: 14px;
|
||||
}
|
||||
|
||||
.pdf-error {
|
||||
color: #c00;
|
||||
}
|
||||
</style>
|
||||
@@ -103,7 +103,12 @@
|
||||
<div class="center-header">
|
||||
<span class="center-title" :title="selectedFile.fileName">{{ selectedFile.fileName }}</span>
|
||||
</div>
|
||||
<div class="file-content" ref="fileContent" id="file-content">
|
||||
<!-- PDF 原生渲染 -->
|
||||
<div v-if="fileType === 'pdf'" class="file-content" ref="fileContent" id="file-content">
|
||||
<PdfViewer v-if="pdfData" :src="pdfData" :scale="1.3" />
|
||||
</div>
|
||||
<!-- 其他文件类型:HTML 渲染 -->
|
||||
<div v-else class="file-content" ref="fileContent" id="file-content">
|
||||
<div class="view-md" id="file-html-content" v-html="docHtml"></div>
|
||||
<div id="note-content" :title="noteContent" class="file-note"></div>
|
||||
</div>
|
||||
@@ -207,6 +212,7 @@ import {withLoading} from "@/utils/loading";
|
||||
import {copyToClip, getGlobalSelectionPosition} from "@/utils";
|
||||
import {transforMd} from "@/utils/markdown";
|
||||
import ReadingBox from "@/components/ReadingBox.vue";
|
||||
import PdfViewer from "@/components/PdfViewer.vue";
|
||||
import Loading from "@/components/Loading.vue";
|
||||
import {UploadFilled} from '@element-plus/icons-vue';
|
||||
import {ElMessage, ElMessageBox, type UploadFile, type UploadFiles} from "element-plus";
|
||||
@@ -271,6 +277,11 @@ provide('selectedFile', selectedFile);
|
||||
const docHtml = ref('');
|
||||
const fileContent = ref(null);
|
||||
const readingBox = ref(null);
|
||||
const pdfData = ref<ArrayBuffer | null>(null);
|
||||
const fileType = computed(() => {
|
||||
const name = selectedFile.value?.fileName || '';
|
||||
return name.split('.').pop()?.toLowerCase() || '';
|
||||
});
|
||||
|
||||
// ===================== 笔记 =====================
|
||||
const fileNote = reactive({ notes: [] as any[] });
|
||||
@@ -412,7 +423,55 @@ const handleNodeClick = async (data: any) => {
|
||||
articleParagraph: doc.articleParagraph || '暂无内容,请重试',
|
||||
fullContent: doc.context
|
||||
};
|
||||
// 根据文件类型加载内容
|
||||
const ext = doc.filename?.split('.').pop()?.toLowerCase() || '';
|
||||
if (ext === 'pdf') {
|
||||
await loadPdfFile();
|
||||
} else {
|
||||
pdfData.value = null;
|
||||
await loadFileContent();
|
||||
}
|
||||
};
|
||||
|
||||
const loadPdfFile = async () => {
|
||||
if (!selectedFile.value) return;
|
||||
docHtml.value = '';
|
||||
try {
|
||||
const blob = await downloadFile({ fileId: selectedFile.value.fileId });
|
||||
const arrayBuffer = await (blob as Blob).arrayBuffer();
|
||||
pdfData.value = arrayBuffer;
|
||||
} catch (e: any) {
|
||||
pdfData.value = null;
|
||||
docHtml.value = '<p style="color:#999;text-align:center;margin-top:40px;">PDF 文件加载失败</p>';
|
||||
}
|
||||
// 同时加载 HTML 用于笔记功能(后台)
|
||||
try {
|
||||
let res = await getFileContent({
|
||||
fileId: selectedFile.value.fileId,
|
||||
embeddingId: selectedFile.value.embeddingId,
|
||||
knowledgeBaseId: selectedFile.value.folderId
|
||||
});
|
||||
if (res?.code === 200 && res.data) {
|
||||
fileNote.notes = res.data.notes || [];
|
||||
}
|
||||
} catch {}
|
||||
// 绑定 PDF text layer 的选择事件
|
||||
await nextTick();
|
||||
setTimeout(() => {
|
||||
if (fileContent.value) {
|
||||
fileContent.value.addEventListener('mouseup', (event: MouseEvent) => {
|
||||
setTimeout(() => {
|
||||
const sel = window.getSelection(); if (!sel) return;
|
||||
selectText.value = sel.toString();
|
||||
if (selectText.value && shortMenuDom.value) {
|
||||
shortMenuShow.value = true;
|
||||
(shortMenuDom.value as HTMLElement).style.left = event.clientX + 'px';
|
||||
(shortMenuDom.value as HTMLElement).style.top = event.clientY + 'px';
|
||||
}
|
||||
});
|
||||
});
|
||||
}
|
||||
}, 500);
|
||||
};
|
||||
|
||||
const handleCheckChange = () => {
|
||||
|
||||
@@ -1070,13 +1070,43 @@ class FileConverter:
|
||||
def xlsx_to_html(self, input_path: str, output_path: Optional[str] = None) -> str:
|
||||
try:
|
||||
import openpyxl
|
||||
from openpyxl.utils import get_column_letter
|
||||
wb = openpyxl.load_workbook(input_path, data_only=True)
|
||||
style = '''<style>.excel-table{border-collapse:collapse;width:100%;margin:1em 0;} .excel-table td,.excel-table th{padding:8px;border:1px solid #ddd;}</style>'''
|
||||
style = '''<style>
|
||||
.excel-table-wrapper{overflow-x:auto;margin:1em 0;}
|
||||
.excel-table{border-collapse:collapse;margin:0;table-layout:fixed;}
|
||||
.excel-table td,.excel-table th{padding:6px 10px;border:1px solid #d0d0d0;white-space:nowrap;overflow:hidden;text-overflow:ellipsis;font-size:13px;vertical-align:middle;}
|
||||
.excel-table tr:nth-child(even){background:#fafbff;}
|
||||
</style>'''
|
||||
html = []
|
||||
for idx, sheet in enumerate(wb.worksheets):
|
||||
html.append(f'<h3>Sheet {idx+1}: {sheet.title}</h3>')
|
||||
html.append('<div class="table-container">')
|
||||
html.append('<table class="excel-table" border="1" style="border-collapse:collapse;width:100%;margin:1em 0;">')
|
||||
# 计算列宽
|
||||
col_widths = {}
|
||||
for col_idx in range(1, sheet.max_column + 1):
|
||||
col_letter = get_column_letter(col_idx)
|
||||
dim = sheet.column_dimensions.get(col_letter)
|
||||
if dim and dim.width and dim.width > 0:
|
||||
# openpyxl width 以字符数为单位,约 7px/字符
|
||||
col_widths[col_idx] = max(60, int(dim.width * 7.5))
|
||||
else:
|
||||
# 根据内容估算宽度
|
||||
max_len = 8
|
||||
for row_idx in range(1, min(sheet.max_row + 1, 50)):
|
||||
cell = sheet.cell(row=row_idx, column=col_idx)
|
||||
if cell.value is not None:
|
||||
max_len = max(max_len, len(str(cell.value)))
|
||||
col_widths[col_idx] = max(60, min(300, max_len * 9))
|
||||
|
||||
html.append('<div class="excel-table-wrapper">')
|
||||
html.append('<table class="excel-table" border="1">')
|
||||
# colgroup 设置列宽
|
||||
html.append('<colgroup>')
|
||||
for col_idx in range(1, sheet.max_column + 1):
|
||||
w = col_widths.get(col_idx, 80)
|
||||
html.append(f'<col style="width:{w}px;min-width:{w}px;">')
|
||||
html.append('</colgroup>')
|
||||
|
||||
merged_map = {}
|
||||
for r in sheet.merged_cells.ranges:
|
||||
min_row, min_col, max_row, max_col = r.min_row, r.min_col, r.max_row, r.max_col
|
||||
@@ -1099,6 +1129,8 @@ class FileConverter:
|
||||
td_attrs += f' rowspan="{rowspan}"'
|
||||
if colspan > 1:
|
||||
td_attrs += f' colspan="{colspan}"'
|
||||
# 合并单元格允许换行
|
||||
style_str += 'white-space:normal;word-wrap:break-word;'
|
||||
html.append(f'<td{td_attrs} style="{style_str}">{cell_value}</td>')
|
||||
html.append('</tr>')
|
||||
html.append('</table></div>')
|
||||
|
||||
Reference in New Issue
Block a user