[前端+RAG] 添加PDF阅读模式切换(预览/笔记);改进PDF→HTML用get_text(html)保留格式;修复长文件名操作菜单不显示

This commit is contained in:
2026-04-02 14:21:41 +08:00
parent 5158753b94
commit 379187f675
2 changed files with 52 additions and 14 deletions

View File

@@ -102,12 +102,17 @@
<div v-else class="center-content">
<div class="center-header">
<span class="center-title" :title="selectedFile.fileName">{{ selectedFile.fileName }}</span>
<div v-if="fileType === 'pdf'" class="view-mode-toggle">
<span :class="{ active: !readingMode }" @click="readingMode = false">预览</span>
<span class="mode-sep">|</span>
<span :class="{ active: readingMode }" @click="switchToReadingMode">阅读(笔记)</span>
</div>
</div>
<!-- PDF 原生渲染 -->
<div v-if="fileType === 'pdf'" class="file-content" ref="fileContent" id="file-content">
<div v-if="fileType === 'pdf' && !readingMode" class="file-content" ref="fileContent" id="file-content">
<PdfViewer v-if="pdfData" :src="pdfData" :scale="1.3" />
</div>
<!-- 其他文件类型HTML 渲染 -->
<!-- HTML 阅读模式PDF 阅读模式 + 非PDF文件 -->
<div v-else class="file-content" ref="fileContent" id="file-content">
<div class="view-md" id="file-html-content" v-html="docHtml"></div>
<div id="note-content" :title="noteContent" class="file-note"></div>
@@ -279,11 +284,23 @@ const docHtml = ref('');
const fileContent = ref(null);
const readingBox = ref(null);
const pdfData = ref<ArrayBuffer | null>(null);
const readingMode = ref(false);
const fileType = computed(() => {
const name = selectedFile.value?.fileName || '';
return name.split('.').pop()?.toLowerCase() || '';
});
const switchToReadingMode = async () => {
readingMode.value = true;
// 如果还没加载 HTML 内容,加载一下
if (!docHtml.value) {
await loadFileContent();
}
await nextTick();
bindFileContentEvents();
handelNoteFlagMouseEvent();
};
// ===================== 笔记 =====================
const fileNote = reactive({ notes: [] as any[] });
const noteContent = ref('');
@@ -425,6 +442,7 @@ const handleNodeClick = async (data: any) => {
fullContent: doc.context
};
// 根据文件类型加载内容
readingMode.value = false;
const ext = doc.filename?.split('.').pop()?.toLowerCase() || '';
if (ext === 'pdf') {
await loadPdfFile();
@@ -456,6 +474,14 @@ const loadPdfFile = async () => {
});
if (res?.code === 200 && res.data) {
fileNote.notes = res.data.notes || [];
if (res.data.content) {
// 保存 HTML 内容供阅读模式使用
let content = res.data.content;
content = content.replace(pattern, (match: string, _cg: string, offset: number) => {
return transforMd(match);
});
docHtml.value = content.replace(/<p>(.*?<span class="katex">.*?<\/span>.*?)<\/p>/g, '$1');
}
}
} catch {}
// 绑定 PDF text layer 的选择事件
@@ -836,7 +862,7 @@ onMounted(async () => {
.tree-file-icon { font-size: 13px; margin-right: 5px; }
.tree-label { font-size: 13px; color: #333; overflow: hidden; text-overflow: ellipsis; white-space: nowrap; cursor: pointer; }
}
.tree-node-actions { opacity: 0; margin-left: 4px; .tree-operate-icon { width: 10px; cursor: pointer; padding: 4px; } }
.tree-node-actions { opacity: 0; margin-left: 4px; flex-shrink: 0; .tree-operate-icon { width: 10px; cursor: pointer; padding: 4px; } }
}
}
@@ -867,8 +893,15 @@ onMounted(async () => {
.center-content {
flex: 1; display: flex; flex-direction: column; height: 100%; overflow: hidden;
.center-header {
padding: 12px 20px; border-bottom: 1px solid #E6EDFF;
.center-title { font-weight: bold; font-size: 15px; color: #000; }
padding: 10px 20px; border-bottom: 1px solid #E6EDFF;
display: flex; justify-content: space-between; align-items: center;
.center-title { font-weight: bold; font-size: 15px; color: #000; overflow: hidden; text-overflow: ellipsis; white-space: nowrap; flex: 1; min-width: 0; }
.view-mode-toggle {
flex-shrink: 0; margin-left: 12px; font-size: 13px; color: #999;
span { cursor: pointer; padding: 2px 6px; border-radius: 3px; }
span.active { color: #004EA0; font-weight: bold; background: #E8F0FE; }
.mode-sep { cursor: default; color: #ddd; padding: 0 2px; }
}
}
.file-content {
flex: 1; overflow: auto; position: relative; padding: 0;

View File

@@ -916,7 +916,7 @@ class FileConverter:
return "".join(parts) if parts else '<p><em>(本页无文本内容)</em></p>'
def pdf_to_html(self, input_path: str, output_path: Optional[str] = None) -> str:
"""PDF 预览:本进程内 PyMuPDF 抽文本生成 HTML不调用外部 /convert 微服务"""
"""PDF 预览:使用 PyMuPDF 的 get_text("html") 保留格式、字体、图片"""
allowed_pdf_root = os.path.abspath(PDF_CONVERT_KB_ROOT)
abs_input = os.path.abspath(input_path)
if abs_input != allowed_pdf_root and not abs_input.startswith(allowed_pdf_root + os.sep):
@@ -935,27 +935,32 @@ class FileConverter:
with fitz.open(abs_input) as doc:
for i in range(len(doc)):
page = doc.load_page(i)
raw = (page.get_text() or "").strip()
if raw:
# 使用 get_text("html") 保留格式和图片base64内嵌
page_html = (page.get_text("html") or "").strip()
if page_html:
any_text = True
inner = self._pdf_plain_text_to_html(raw)
sections.append(
f'<section class="pdf-page" data-page="{i + 1}" '
'style="margin-bottom:1.5em;padding-bottom:1em;border-bottom:1px solid #e5e5e5;">'
f'<div style="font-size:12px;color:#888;margin-bottom:8px;">第 {i + 1} 页</div>'
f"{inner}</section>"
f"{page_html}</section>"
)
css = '''<style>
.pdf-preview { font-family: system-ui, -apple-system, sans-serif; line-height: 1.6; max-width: 100%; }
.pdf-preview img { max-width: 100%; height: auto; }
.pdf-preview p { margin: 0.3em 0; }
.pdf-preview span { line-height: 1.5; }
</style>'''
if not any_text:
wrapper = (
'<div class="pdf-preview" style="font-family:system-ui,-apple-system,Segoe UI,Roboto,sans-serif;'
'line-height:1.6;max-width:900px;">'
f'{css}<div class="pdf-preview">'
"<p><em>(未能从 PDF 提取到文本,可能是扫描件或加密文档。)</em></p></div>"
)
else:
wrapper = (
'<div class="pdf-preview" style="font-family:system-ui,-apple-system,Segoe UI,Roboto,sans-serif;'
'line-height:1.6;max-width:900px;">'
f'{css}<div class="pdf-preview">'
f"{''.join(sections)}</div>"
)
return self._save_html(f"<body>{wrapper}</body>", output_path)