diff --git a/chat_web_front/src/views/reading/index.vue b/chat_web_front/src/views/reading/index.vue index bdd6371..6526d50 100644 --- a/chat_web_front/src/views/reading/index.vue +++ b/chat_web_front/src/views/reading/index.vue @@ -102,12 +102,17 @@
{{ selectedFile.fileName }} +
+ 预览 + | + 阅读(笔记) +
-
+
- +
@@ -279,11 +284,23 @@ const docHtml = ref(''); const fileContent = ref(null); const readingBox = ref(null); const pdfData = ref(null); +const readingMode = ref(false); const fileType = computed(() => { const name = selectedFile.value?.fileName || ''; return name.split('.').pop()?.toLowerCase() || ''; }); +const switchToReadingMode = async () => { + readingMode.value = true; + // 如果还没加载 HTML 内容,加载一下 + if (!docHtml.value) { + await loadFileContent(); + } + await nextTick(); + bindFileContentEvents(); + handelNoteFlagMouseEvent(); +}; + // ===================== 笔记 ===================== const fileNote = reactive({ notes: [] as any[] }); const noteContent = ref(''); @@ -425,6 +442,7 @@ const handleNodeClick = async (data: any) => { fullContent: doc.context }; // 根据文件类型加载内容 + readingMode.value = false; const ext = doc.filename?.split('.').pop()?.toLowerCase() || ''; if (ext === 'pdf') { await loadPdfFile(); @@ -456,6 +474,14 @@ const loadPdfFile = async () => { }); if (res?.code === 200 && res.data) { fileNote.notes = res.data.notes || []; + if (res.data.content) { + // 保存 HTML 内容供阅读模式使用 + let content = res.data.content; + content = content.replace(pattern, (match: string, _cg: string, offset: number) => { + return transforMd(match); + }); + docHtml.value = content.replace(/

(.*?.*?<\/span>.*?)<\/p>/g, '$1'); + } } } catch {} // 绑定 PDF text layer 的选择事件 @@ -836,7 +862,7 @@ onMounted(async () => { .tree-file-icon { font-size: 13px; margin-right: 5px; } .tree-label { font-size: 13px; color: #333; overflow: hidden; text-overflow: ellipsis; white-space: nowrap; cursor: pointer; } } - .tree-node-actions { opacity: 0; margin-left: 4px; .tree-operate-icon { width: 10px; cursor: pointer; padding: 4px; } } + .tree-node-actions { opacity: 0; margin-left: 4px; flex-shrink: 0; .tree-operate-icon { width: 10px; cursor: pointer; padding: 4px; } } } } @@ -867,8 +893,15 @@ onMounted(async () => { .center-content { flex: 1; display: flex; flex-direction: column; height: 100%; overflow: hidden; .center-header { - padding: 12px 20px; border-bottom: 1px solid #E6EDFF; - .center-title { font-weight: bold; font-size: 15px; color: #000; } + padding: 10px 20px; border-bottom: 1px solid #E6EDFF; + display: flex; justify-content: space-between; align-items: center; + .center-title { font-weight: bold; font-size: 15px; color: #000; overflow: hidden; text-overflow: ellipsis; white-space: nowrap; flex: 1; min-width: 0; } + .view-mode-toggle { + flex-shrink: 0; margin-left: 12px; font-size: 13px; color: #999; + span { cursor: pointer; padding: 2px 6px; border-radius: 3px; } + span.active { color: #004EA0; font-weight: bold; background: #E8F0FE; } + .mode-sep { cursor: default; color: #ddd; padding: 0 2px; } + } } .file-content { flex: 1; overflow: auto; position: relative; padding: 0; diff --git a/langchain-chat/server/knowledge_base/file_converter.py b/langchain-chat/server/knowledge_base/file_converter.py index b3c8634..9fca692 100644 --- a/langchain-chat/server/knowledge_base/file_converter.py +++ b/langchain-chat/server/knowledge_base/file_converter.py @@ -916,7 +916,7 @@ class FileConverter: return "".join(parts) if parts else '

(本页无文本内容)

' def pdf_to_html(self, input_path: str, output_path: Optional[str] = None) -> str: - """PDF 预览:本进程内 PyMuPDF 抽文本生成 HTML,不调用外部 /convert 微服务。""" + """PDF 预览:使用 PyMuPDF 的 get_text("html") 保留格式、字体、图片。""" allowed_pdf_root = os.path.abspath(PDF_CONVERT_KB_ROOT) abs_input = os.path.abspath(input_path) if abs_input != allowed_pdf_root and not abs_input.startswith(allowed_pdf_root + os.sep): @@ -935,27 +935,32 @@ class FileConverter: with fitz.open(abs_input) as doc: for i in range(len(doc)): page = doc.load_page(i) - raw = (page.get_text() or "").strip() - if raw: + # 使用 get_text("html") 保留格式和图片(base64内嵌) + page_html = (page.get_text("html") or "").strip() + if page_html: any_text = True - inner = self._pdf_plain_text_to_html(raw) sections.append( f'
' f'
第 {i + 1} 页
' - f"{inner}
" + f"{page_html}" ) + css = '''''' + if not any_text: wrapper = ( - '
' + f'{css}
' "

(未能从 PDF 提取到文本,可能是扫描件或加密文档。)

" ) else: wrapper = ( - '
' + f'{css}
' f"{''.join(sections)}
" ) return self._save_html(f"{wrapper}", output_path)