[前端+RAG] 修复PDF文字重叠;上传异步化(LLM+向量化后台执行);摘要关键词模型改为deepseek-v3

This commit is contained in:
2026-04-02 14:10:08 +08:00
parent 0e25154468
commit 5158753b94
3 changed files with 122 additions and 116 deletions

View File

@@ -1,12 +1,12 @@
<template>
<div class="pdf-viewer" ref="containerRef">
<div v-for="page in pages" :key="page" class="pdf-page-wrapper">
<div class="pdf-page" :id="'pdf-page-' + page" :style="{ position: 'relative' }">
<div class="pdf-page" :style="{ width: pageWidths[page] + 'px', height: pageHeights[page] + 'px' }">
<canvas :ref="el => setCanvasRef(el, page)"></canvas>
<div class="text-layer" :ref="el => setTextLayerRef(el, page)"></div>
</div>
</div>
<div v-if="loading" class="pdf-loading">加载中...</div>
<div v-if="loading" class="pdf-loading">PDF 加载中...</div>
<div v-if="error" class="pdf-error">{{ error }}</div>
</div>
</template>
@@ -14,8 +14,8 @@
<script setup lang="ts">
import { ref, onMounted, onBeforeUnmount, nextTick, watch } from 'vue';
import * as pdfjsLib from 'pdfjs-dist';
import { renderTextLayer } from 'pdfjs-dist';
// Set worker
pdfjsLib.GlobalWorkerOptions.workerSrc = new URL(
'pdfjs-dist/build/pdf.worker.min.js',
import.meta.url
@@ -28,18 +28,16 @@ const props = defineProps<{
const containerRef = ref<HTMLElement | null>(null);
const pages = ref<number[]>([]);
const pageWidths = ref<Record<number, number>>({});
const pageHeights = ref<Record<number, number>>({});
const loading = ref(true);
const error = ref('');
const canvasRefs: Record<number, HTMLCanvasElement> = {};
const textLayerRefs: Record<number, HTMLElement> = {};
let pdfDoc: any = null;
const setCanvasRef = (el: any, page: number) => {
if (el) canvasRefs[page] = el;
};
const setTextLayerRef = (el: any, page: number) => {
if (el) textLayerRefs[page] = el;
};
const setCanvasRef = (el: any, page: number) => { if (el) canvasRefs[page] = el; };
const setTextLayerRef = (el: any, page: number) => { if (el) textLayerRefs[page] = el; };
const renderPage = async (pageNum: number) => {
if (!pdfDoc) return;
@@ -47,43 +45,28 @@ const renderPage = async (pageNum: number) => {
const scale = props.scale || 1.5;
const viewport = page.getViewport({ scale });
// Canvas rendering
const canvas = canvasRefs[pageNum];
if (!canvas) return;
const context = canvas.getContext('2d');
canvas.height = viewport.height;
canvas.width = viewport.width;
pageWidths.value[pageNum] = viewport.width;
pageHeights.value[pageNum] = viewport.height;
await page.render({ canvasContext: context, viewport }).promise;
// Text layer for text selection
// Text layer using pdfjs built-in API
const textLayerDiv = textLayerRefs[pageNum];
if (textLayerDiv) {
textLayerDiv.style.width = viewport.width + 'px';
textLayerDiv.style.height = viewport.height + 'px';
textLayerDiv.innerHTML = '';
const textContent = await page.getTextContent();
const textItems = textContent.items;
for (const item of textItems) {
if (!item.str) continue;
const tx = pdfjsLib.Util.transform(viewport.transform, item.transform);
const span = document.createElement('span');
span.textContent = item.str;
span.style.position = 'absolute';
span.style.left = tx[4] + 'px';
span.style.top = (viewport.height - tx[5]) + 'px';
span.style.fontSize = Math.abs(tx[0]) + 'px';
span.style.fontFamily = item.fontName || 'sans-serif';
span.style.transformOrigin = '0% 0%';
// Width matching
if (item.width) {
const textWidth = item.width * scale;
span.style.width = textWidth + 'px';
span.style.display = 'inline-block';
}
textLayerDiv.appendChild(span);
}
renderTextLayer({
textContentSource: textContent,
container: textLayerDiv,
viewport: viewport,
textDivs: []
});
}
};
@@ -97,33 +80,20 @@ const loadPdf = async () => {
pdfDoc = await loadingTask.promise;
const numPages = pdfDoc.numPages;
pages.value = Array.from({ length: numPages }, (_, i) => i + 1);
await nextTick();
for (let i = 1; i <= numPages; i++) {
await renderPage(i);
}
} catch (e: any) {
error.value = 'PDF 加载失败: ' + (e.message || e);
console.error('PDF load error:', e);
} finally {
loading.value = false;
}
};
watch(() => props.src, () => {
if (props.src) loadPdf();
});
onMounted(() => {
if (props.src) loadPdf();
});
onBeforeUnmount(() => {
if (pdfDoc) {
pdfDoc.destroy();
pdfDoc = null;
}
});
watch(() => props.src, () => { if (props.src) loadPdf(); });
onMounted(() => { if (props.src) loadPdf(); });
onBeforeUnmount(() => { if (pdfDoc) { pdfDoc.destroy(); pdfDoc = null; } });
</script>
<style scoped>
@@ -131,54 +101,51 @@ onBeforeUnmount(() => {
width: 100%;
height: 100%;
overflow-y: auto;
background: #f5f5f5;
background: #e8e8e8;
padding: 16px 0;
}
.pdf-page-wrapper {
display: flex;
justify-content: center;
margin-bottom: 16px;
margin-bottom: 12px;
}
.pdf-page {
position: relative;
background: white;
box-shadow: 0 2px 8px rgba(0, 0, 0, 0.15);
}
.pdf-page canvas {
display: block;
}
.text-layer {
position: absolute;
top: 0;
left: 0;
right: 0;
bottom: 0;
overflow: hidden;
opacity: 0.3;
line-height: 1;
}
.text-layer span {
/* pdfjs renderTextLayer creates spans with absolute positioning */
.text-layer :deep(span) {
color: transparent;
position: absolute;
white-space: pre;
cursor: text;
transform-origin: 0% 0%;
}
.text-layer span::selection {
.text-layer :deep(span::selection) {
background: rgba(0, 78, 160, 0.3);
color: transparent;
}
.text-layer :deep(br) {
display: none;
}
.pdf-loading, .pdf-error {
text-align: center;
padding: 40px;
color: #999;
font-size: 14px;
}
.pdf-error {
color: #c00;
}
.pdf-error { color: #c00; }
</style>