diff --git a/chat_web_front/package.json b/chat_web_front/package.json
index 7a8dc29..1b4533f 100644
--- a/chat_web_front/package.json
+++ b/chat_web_front/package.json
@@ -35,6 +35,7 @@
"markdown-it-sub": "^2.0.0",
"markdown-it-sup": "^2.0.0",
"normalize.css": "^8.0.1",
+ "pdfjs-dist": "^3.11.174",
"pinia": "^2.2.6",
"sa-sdk-javascript": "1.27.2",
"sass": "^1.81.0",
@@ -50,8 +51,8 @@
},
"devDependencies": {
"@tsconfig/node22": "^22.0.0",
- "@types/markdown-it": "^14.1.2",
"@types/mark.js": "^8.11.12",
+ "@types/markdown-it": "^14.1.2",
"@types/node": "^22.9.0",
"@vitejs/plugin-vue": "^5.1.4",
"@vue/tsconfig": "^0.5.1",
diff --git a/chat_web_front/src/components/PdfViewer.vue b/chat_web_front/src/components/PdfViewer.vue
new file mode 100644
index 0000000..6c3680d
--- /dev/null
+++ b/chat_web_front/src/components/PdfViewer.vue
@@ -0,0 +1,184 @@
+
+
+
+
加载中...
+
{{ error }}
+
+
+
+
+
+
diff --git a/chat_web_front/src/views/reading/index.vue b/chat_web_front/src/views/reading/index.vue
index b5b62d9..612851a 100644
--- a/chat_web_front/src/views/reading/index.vue
+++ b/chat_web_front/src/views/reading/index.vue
@@ -103,7 +103,12 @@
-
+
+
+
+
@@ -207,6 +212,7 @@ import {withLoading} from "@/utils/loading";
import {copyToClip, getGlobalSelectionPosition} from "@/utils";
import {transforMd} from "@/utils/markdown";
import ReadingBox from "@/components/ReadingBox.vue";
+import PdfViewer from "@/components/PdfViewer.vue";
import Loading from "@/components/Loading.vue";
import {UploadFilled} from '@element-plus/icons-vue';
import {ElMessage, ElMessageBox, type UploadFile, type UploadFiles} from "element-plus";
@@ -271,6 +277,11 @@ provide('selectedFile', selectedFile);
const docHtml = ref('');
const fileContent = ref(null);
const readingBox = ref(null);
+const pdfData = ref
(null);
+const fileType = computed(() => {
+ const name = selectedFile.value?.fileName || '';
+ return name.split('.').pop()?.toLowerCase() || '';
+});
// ===================== 笔记 =====================
const fileNote = reactive({ notes: [] as any[] });
@@ -412,7 +423,55 @@ const handleNodeClick = async (data: any) => {
articleParagraph: doc.articleParagraph || '暂无内容,请重试',
fullContent: doc.context
};
- await loadFileContent();
+ // 根据文件类型加载内容
+ const ext = doc.filename?.split('.').pop()?.toLowerCase() || '';
+ if (ext === 'pdf') {
+ await loadPdfFile();
+ } else {
+ pdfData.value = null;
+ await loadFileContent();
+ }
+};
+
+const loadPdfFile = async () => {
+ if (!selectedFile.value) return;
+ docHtml.value = '';
+ try {
+ const blob = await downloadFile({ fileId: selectedFile.value.fileId });
+ const arrayBuffer = await (blob as Blob).arrayBuffer();
+ pdfData.value = arrayBuffer;
+ } catch (e: any) {
+ pdfData.value = null;
+ docHtml.value = 'PDF 文件加载失败
';
+ }
+ // 同时加载 HTML 用于笔记功能(后台)
+ try {
+ let res = await getFileContent({
+ fileId: selectedFile.value.fileId,
+ embeddingId: selectedFile.value.embeddingId,
+ knowledgeBaseId: selectedFile.value.folderId
+ });
+ if (res?.code === 200 && res.data) {
+ fileNote.notes = res.data.notes || [];
+ }
+ } catch {}
+ // 绑定 PDF text layer 的选择事件
+ await nextTick();
+ setTimeout(() => {
+ if (fileContent.value) {
+ fileContent.value.addEventListener('mouseup', (event: MouseEvent) => {
+ setTimeout(() => {
+ const sel = window.getSelection(); if (!sel) return;
+ selectText.value = sel.toString();
+ if (selectText.value && shortMenuDom.value) {
+ shortMenuShow.value = true;
+ (shortMenuDom.value as HTMLElement).style.left = event.clientX + 'px';
+ (shortMenuDom.value as HTMLElement).style.top = event.clientY + 'px';
+ }
+ });
+ });
+ }
+ }, 500);
};
const handleCheckChange = () => {
diff --git a/langchain-chat/server/knowledge_base/file_converter.py b/langchain-chat/server/knowledge_base/file_converter.py
index b738573..b3c8634 100644
--- a/langchain-chat/server/knowledge_base/file_converter.py
+++ b/langchain-chat/server/knowledge_base/file_converter.py
@@ -1070,13 +1070,43 @@ class FileConverter:
def xlsx_to_html(self, input_path: str, output_path: Optional[str] = None) -> str:
try:
import openpyxl
+ from openpyxl.utils import get_column_letter
wb = openpyxl.load_workbook(input_path, data_only=True)
- style = ''''''
+ style = ''''''
html = []
for idx, sheet in enumerate(wb.worksheets):
html.append(f'Sheet {idx+1}: {sheet.title}
')
- html.append('')
- html.append('
')
+ # 计算列宽
+ col_widths = {}
+ for col_idx in range(1, sheet.max_column + 1):
+ col_letter = get_column_letter(col_idx)
+ dim = sheet.column_dimensions.get(col_letter)
+ if dim and dim.width and dim.width > 0:
+ # openpyxl width 以字符数为单位,约 7px/字符
+ col_widths[col_idx] = max(60, int(dim.width * 7.5))
+ else:
+ # 根据内容估算宽度
+ max_len = 8
+ for row_idx in range(1, min(sheet.max_row + 1, 50)):
+ cell = sheet.cell(row=row_idx, column=col_idx)
+ if cell.value is not None:
+ max_len = max(max_len, len(str(cell.value)))
+ col_widths[col_idx] = max(60, min(300, max_len * 9))
+
+ html.append('')
+ html.append('
')
+ # colgroup 设置列宽
+ html.append('')
+ for col_idx in range(1, sheet.max_column + 1):
+ w = col_widths.get(col_idx, 80)
+ html.append(f'')
+ html.append('')
+
merged_map = {}
for r in sheet.merged_cells.ranges:
min_row, min_col, max_row, max_col = r.min_row, r.min_col, r.max_row, r.max_col
@@ -1099,6 +1129,8 @@ class FileConverter:
td_attrs += f' rowspan="{rowspan}"'
if colspan > 1:
td_attrs += f' colspan="{colspan}"'
+ # 合并单元格允许换行
+ style_str += 'white-space:normal;word-wrap:break-word;'
html.append(f'| {cell_value} | ')
html.append('')
html.append('
')