[前端+RAG] 异步上传+前端轮询自动刷新导读;PDF阅读模式合并行消除留白
This commit is contained in:
@@ -58,8 +58,8 @@
|
|||||||
<script setup lang='ts'>
|
<script setup lang='ts'>
|
||||||
import {copyToClip} from "@/utils";
|
import {copyToClip} from "@/utils";
|
||||||
import {ElMessage} from "element-plus";
|
import {ElMessage} from "element-plus";
|
||||||
import {fileGuidInfo} from "@/api";
|
import {fileGuidInfo, getFileGuide} from "@/api";
|
||||||
import {computed, inject, ref, type Ref} from "vue";
|
import {inject, onBeforeUnmount, ref, watch, type Ref} from "vue";
|
||||||
import MarkdownIt from "markdown-it";
|
import MarkdownIt from "markdown-it";
|
||||||
import {transforMd} from "@/utils/markdown";
|
import {transforMd} from "@/utils/markdown";
|
||||||
|
|
||||||
@@ -78,9 +78,51 @@ const articleAbstract=ref('');
|
|||||||
const articleKeywords=ref('');
|
const articleKeywords=ref('');
|
||||||
const articleParagraph=ref('');
|
const articleParagraph=ref('');
|
||||||
|
|
||||||
// 监听选中文件变化,更新导读内容
|
// 轮询定时器
|
||||||
import {watch} from "vue";
|
let pollTimer: any = null;
|
||||||
|
const PLACEHOLDER = '导读生成中';
|
||||||
|
|
||||||
|
const isPending = (text: string) => text && text.startsWith(PLACEHOLDER);
|
||||||
|
|
||||||
|
const startPolling = () => {
|
||||||
|
stopPolling();
|
||||||
|
pollTimer = setInterval(async () => {
|
||||||
|
if (!selectedFile.value?.fileId) { stopPolling(); return; }
|
||||||
|
try {
|
||||||
|
const res = await getFileGuide(selectedFile.value.fileId);
|
||||||
|
if (res?.code === 200 && res.data) {
|
||||||
|
const d = res.data;
|
||||||
|
if (!isPending(d.articleAbstract)) {
|
||||||
|
articleAbstract.value = d.articleAbstract || '';
|
||||||
|
let kw = d.articleKeywords || '';
|
||||||
|
if (kw && (kw.indexOf('关键词:') > -1 || kw.indexOf('关键词:') > -1)) {
|
||||||
|
kw = kw.substring(kw.indexOf('关键词:') + 4, kw.length);
|
||||||
|
kw = kw.substring(kw.indexOf('关键词:') + 4, kw.length);
|
||||||
|
}
|
||||||
|
articleKeywords.value = kw;
|
||||||
|
articleParagraph.value = d.articleParagraph || '';
|
||||||
|
// 同步更新 selectedFile 让其他组件也能拿到
|
||||||
|
if (selectedFile.value) {
|
||||||
|
selectedFile.value.articleAbstract = d.articleAbstract;
|
||||||
|
selectedFile.value.articleKeywords = d.articleKeywords;
|
||||||
|
selectedFile.value.articleParagraph = d.articleParagraph;
|
||||||
|
}
|
||||||
|
stopPolling();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
} catch {}
|
||||||
|
}, 5000);
|
||||||
|
};
|
||||||
|
|
||||||
|
const stopPolling = () => {
|
||||||
|
if (pollTimer) { clearInterval(pollTimer); pollTimer = null; }
|
||||||
|
};
|
||||||
|
|
||||||
|
onBeforeUnmount(() => stopPolling());
|
||||||
|
|
||||||
|
// 监听选中文件变化
|
||||||
watch(() => selectedFile.value, (newFile) => {
|
watch(() => selectedFile.value, (newFile) => {
|
||||||
|
stopPolling();
|
||||||
if (newFile) {
|
if (newFile) {
|
||||||
articleAbstract.value = newFile.articleAbstract || '';
|
articleAbstract.value = newFile.articleAbstract || '';
|
||||||
let kw = newFile.articleKeywords || '';
|
let kw = newFile.articleKeywords || '';
|
||||||
@@ -90,6 +132,10 @@ watch(() => selectedFile.value, (newFile) => {
|
|||||||
}
|
}
|
||||||
articleKeywords.value = kw;
|
articleKeywords.value = kw;
|
||||||
articleParagraph.value = newFile.articleParagraph || '';
|
articleParagraph.value = newFile.articleParagraph || '';
|
||||||
|
// 如果是占位文字,启动轮询
|
||||||
|
if (isPending(articleAbstract.value) || isPending(articleKeywords.value) || isPending(articleParagraph.value)) {
|
||||||
|
startPolling();
|
||||||
|
}
|
||||||
} else {
|
} else {
|
||||||
articleAbstract.value = '';
|
articleAbstract.value = '';
|
||||||
articleKeywords.value = '';
|
articleKeywords.value = '';
|
||||||
|
|||||||
@@ -211,7 +211,7 @@ import {onMounted, onUnmounted, ref, reactive, provide, nextTick, computed, watc
|
|||||||
import {
|
import {
|
||||||
getKnowledgeBaseList, addKnowledgeBase, editKnowledgeBase, delKnowledgeBase,
|
getKnowledgeBaseList, addKnowledgeBase, editKnowledgeBase, delKnowledgeBase,
|
||||||
getKnowledgeBaseContent, uploadFile, editFile, delFile, delFiles,
|
getKnowledgeBaseContent, uploadFile, editFile, delFile, delFiles,
|
||||||
downloadFile, getFileContent, addFileNote, getSize
|
downloadFile, getFileContent, addFileNote, getSize, getFileGuide
|
||||||
} from "@/api";
|
} from "@/api";
|
||||||
import {withLoading} from "@/utils/loading";
|
import {withLoading} from "@/utils/loading";
|
||||||
import {copyToClip, getGlobalSelectionPosition} from "@/utils";
|
import {copyToClip, getGlobalSelectionPosition} from "@/utils";
|
||||||
@@ -441,6 +441,18 @@ const handleNodeClick = async (data: any) => {
|
|||||||
articleParagraph: doc.articleParagraph || '暂无内容,请重试',
|
articleParagraph: doc.articleParagraph || '暂无内容,请重试',
|
||||||
fullContent: doc.context
|
fullContent: doc.context
|
||||||
};
|
};
|
||||||
|
|
||||||
|
// 从 API 获取最新的导读数据(后台线程可能已更新 MySQL)
|
||||||
|
try {
|
||||||
|
const res = await getFileGuide(doc.id + '');
|
||||||
|
if (res?.code === 200 && res.data) {
|
||||||
|
const fresh = res.data;
|
||||||
|
if (fresh.articleAbstract) { selectedFile.value.articleAbstract = fresh.articleAbstract; data.raw.articleAbstract = fresh.articleAbstract; }
|
||||||
|
if (fresh.articleKeywords) { selectedFile.value.articleKeywords = fresh.articleKeywords; data.raw.articleKeywords = fresh.articleKeywords; }
|
||||||
|
if (fresh.articleParagraph) { selectedFile.value.articleParagraph = fresh.articleParagraph; data.raw.articleParagraph = fresh.articleParagraph; }
|
||||||
|
}
|
||||||
|
} catch {}
|
||||||
|
|
||||||
// 根据文件类型加载内容
|
// 根据文件类型加载内容
|
||||||
readingMode.value = false;
|
readingMode.value = false;
|
||||||
const ext = doc.filename?.split('.').pop()?.toLowerCase() || '';
|
const ext = doc.filename?.split('.').pop()?.toLowerCase() || '';
|
||||||
|
|||||||
@@ -957,22 +957,33 @@ class FileConverter:
|
|||||||
|
|
||||||
if text.strip():
|
if text.strip():
|
||||||
any_text = True
|
any_text = True
|
||||||
# 按行处理文本,识别标题
|
|
||||||
lines = text.split('\n')
|
lines = text.split('\n')
|
||||||
|
# 合并连续非空行为段落,空行分段,标题行独立
|
||||||
|
current_para = []
|
||||||
for line in lines:
|
for line in lines:
|
||||||
line = line.strip()
|
stripped = line.strip()
|
||||||
if not line:
|
if not stripped:
|
||||||
|
# 空行 → 结束当前段落
|
||||||
|
if current_para:
|
||||||
|
page_parts.append(f'<p>{self._escape_html("".join(current_para))}</p>')
|
||||||
|
current_para = []
|
||||||
continue
|
continue
|
||||||
# 简单的标题检测:短行 + 无标点结尾
|
# 标题检测
|
||||||
is_heading = (len(line) < 40 and not line.endswith(('。', ',', ';', '、', ':', ',', '.', ';'))
|
is_heading = (len(stripped) < 30
|
||||||
and not line.startswith(('(', '('))
|
and not stripped.endswith(('。', ',', ';', '、', ':', ',', '.', ';'))
|
||||||
and re.match(r'^[一二三四五六七八九十\d]+[、..]', line))
|
and not stripped.startswith(('(', '('))
|
||||||
|
and re.match(r'^[一二三四五六七八九十\d]+[、..]', stripped))
|
||||||
if is_heading:
|
if is_heading:
|
||||||
escaped = self._escape_html(line)
|
# 先输出累积的段落
|
||||||
page_parts.append(f'<h3>{escaped}</h3>')
|
if current_para:
|
||||||
|
page_parts.append(f'<p>{self._escape_html("".join(current_para))}</p>')
|
||||||
|
current_para = []
|
||||||
|
page_parts.append(f'<h3>{self._escape_html(stripped)}</h3>')
|
||||||
else:
|
else:
|
||||||
escaped = self._escape_html(line)
|
current_para.append(stripped)
|
||||||
page_parts.append(f'<p>{escaped}</p>')
|
# 输出最后一个段落
|
||||||
|
if current_para:
|
||||||
|
page_parts.append(f'<p>{self._escape_html("".join(current_para))}</p>')
|
||||||
|
|
||||||
# 渲染表格
|
# 渲染表格
|
||||||
for table in tables:
|
for table in tables:
|
||||||
|
|||||||
@@ -270,6 +270,74 @@ def upload_docs(
|
|||||||
return BaseResponse(code=200, msg="文件上传与向量化完成", data={"failed_files": failed_files})
|
return BaseResponse(code=200, msg="文件上传与向量化完成", data={"failed_files": failed_files})
|
||||||
|
|
||||||
|
|
||||||
|
def _background_generate_and_update(
|
||||||
|
knowledge_base_name: str,
|
||||||
|
file_names: List[str],
|
||||||
|
chunk_size: int,
|
||||||
|
chunk_overlap: int,
|
||||||
|
zh_title_enhance: bool,
|
||||||
|
docs: dict,
|
||||||
|
not_refresh_vs_cache: bool,
|
||||||
|
):
|
||||||
|
"""后台线程:生成 LLM 导读 + 向量化,完成后直连 MySQL 更新。"""
|
||||||
|
import time
|
||||||
|
import pymysql
|
||||||
|
start = time.time()
|
||||||
|
|
||||||
|
kb = KBServiceFactory.get_service_by_name(knowledge_base_name)
|
||||||
|
|
||||||
|
for filename in file_names:
|
||||||
|
try:
|
||||||
|
knowledge_file = KnowledgeFile(filename=filename, knowledge_base_name=knowledge_base_name)
|
||||||
|
new_loop = asyncio.new_event_loop()
|
||||||
|
asyncio.set_event_loop(new_loop)
|
||||||
|
try:
|
||||||
|
llm_result = new_loop.run_until_complete(knowledge_file.get_llm_result())
|
||||||
|
finally:
|
||||||
|
new_loop.close()
|
||||||
|
|
||||||
|
logger.info(f"[后台] LLM 导读生成完成: {filename}")
|
||||||
|
|
||||||
|
# 直连 MySQL 更新(用 embedding_id 匹配,因为 Java 端 embedding_id = filename)
|
||||||
|
try:
|
||||||
|
conn = pymysql.connect(**ck_mysql_config)
|
||||||
|
with conn.cursor() as cursor:
|
||||||
|
updated = cursor.execute(
|
||||||
|
"UPDATE gpt_upload_file SET article_abstract=%s, article_keywords=%s, article_paragraph=%s WHERE embedding_id=%s",
|
||||||
|
(
|
||||||
|
str(llm_result.get("article_abstract", "生成摘要失败")),
|
||||||
|
str(llm_result.get("article_keywords", "生成关键词失败")),
|
||||||
|
str(llm_result.get("article_paragraph", "生成章节速览失败")),
|
||||||
|
filename
|
||||||
|
)
|
||||||
|
)
|
||||||
|
conn.commit()
|
||||||
|
logger.info(f"[后台] MySQL 更新成功: {filename}, affected rows: {updated}")
|
||||||
|
conn.close()
|
||||||
|
except Exception as db_e:
|
||||||
|
logger.error(f"[后台] MySQL 更新失败 {filename}: {db_e}", exc_info=True)
|
||||||
|
except Exception as e:
|
||||||
|
logger.error(f"[后台] LLM 生成失败 {filename}: {e}", exc_info=True)
|
||||||
|
|
||||||
|
# 向量化
|
||||||
|
try:
|
||||||
|
_update_docs_impl(
|
||||||
|
knowledge_base_name=knowledge_base_name,
|
||||||
|
file_names=file_names,
|
||||||
|
override_custom_docs=True,
|
||||||
|
chunk_size=chunk_size,
|
||||||
|
chunk_overlap=chunk_overlap,
|
||||||
|
zh_title_enhance=zh_title_enhance,
|
||||||
|
docs=docs,
|
||||||
|
not_refresh_vs_cache=True,
|
||||||
|
)
|
||||||
|
if kb and not not_refresh_vs_cache:
|
||||||
|
kb.save_vector_store()
|
||||||
|
except Exception as e:
|
||||||
|
logger.error(f"[后台] 向量化失败: {e}", exc_info=True)
|
||||||
|
logger.info(f"[后台] 全部完成,耗时: {time.time() - start:.2f}s")
|
||||||
|
|
||||||
|
|
||||||
def upload_docs_new(
|
def upload_docs_new(
|
||||||
files: List[UploadFile] = File(..., description="上传文件,支持多文件"),
|
files: List[UploadFile] = File(..., description="上传文件,支持多文件"),
|
||||||
knowledge_base_name: str = Form(..., description="知识库名称", examples=["samples"]),
|
knowledge_base_name: str = Form(..., description="知识库名称", examples=["samples"]),
|
||||||
@@ -283,7 +351,7 @@ def upload_docs_new(
|
|||||||
not_refresh_vs_cache: bool = Form(False, description="暂不保存向量库(用于FAISS)"),
|
not_refresh_vs_cache: bool = Form(False, description="暂不保存向量库(用于FAISS)"),
|
||||||
) -> BaseResponse:
|
) -> BaseResponse:
|
||||||
"""
|
"""
|
||||||
API接口:上传文件,同步生成导读(模型已优化为deepseek-v3),然后向量化
|
API接口:上传文件,快速返回(仅提取全文),LLM导读+向量化后台异步执行并直连MySQL回写
|
||||||
"""
|
"""
|
||||||
import time
|
import time
|
||||||
start_time = time.time()
|
start_time = time.time()
|
||||||
@@ -305,64 +373,53 @@ def upload_docs_new(
|
|||||||
file_names = list(docs.keys())
|
file_names = list(docs.keys())
|
||||||
llm_results = {}
|
llm_results = {}
|
||||||
|
|
||||||
|
# 保存文件 + 提取全文(快速,不调 LLM)
|
||||||
for result in _save_files_in_thread(files, knowledge_base_name=knowledge_base_name, override=override):
|
for result in _save_files_in_thread(files, knowledge_base_name=knowledge_base_name, override=override):
|
||||||
filename = result["data"]["file_name"]
|
filename = result["data"]["file_name"]
|
||||||
if result["code"] != 200:
|
if result["code"] != 200:
|
||||||
failed_files[filename] = result["msg"]
|
failed_files[filename] = result["msg"]
|
||||||
|
|
||||||
if filename not in file_names:
|
if filename not in file_names:
|
||||||
file_names.append(filename)
|
file_names.append(filename)
|
||||||
|
|
||||||
try:
|
try:
|
||||||
knowledge_file = KnowledgeFile(filename=filename, knowledge_base_name=knowledge_base_name)
|
knowledge_file = KnowledgeFile(filename=filename, knowledge_base_name=knowledge_base_name)
|
||||||
import concurrent.futures
|
full_text_data = knowledge_file.get_full_text()
|
||||||
def run_async_in_thread():
|
import json as _json
|
||||||
new_loop = asyncio.new_event_loop()
|
try:
|
||||||
asyncio.set_event_loop(new_loop)
|
full_text = _json.loads(full_text_data).get("full_text", "")
|
||||||
try:
|
except:
|
||||||
return new_loop.run_until_complete(knowledge_file.get_llm_result())
|
full_text = ""
|
||||||
finally:
|
|
||||||
new_loop.close()
|
|
||||||
|
|
||||||
with concurrent.futures.ThreadPoolExecutor(max_workers=1) as executor:
|
|
||||||
future = executor.submit(run_async_in_thread)
|
|
||||||
llm_result = future.result()
|
|
||||||
llm_results[filename] = {
|
llm_results[filename] = {
|
||||||
"full_text": llm_result.get("full_text", "获取全文失败"),
|
"full_text": full_text,
|
||||||
"article_abstract": llm_result.get("article_abstract", "生成摘要失败"),
|
"article_abstract": "导读生成中,请稍后刷新...",
|
||||||
"article_keywords": llm_result.get("article_keywords", "生成关键词失败"),
|
"article_keywords": "导读生成中,请稍后刷新...",
|
||||||
"article_paragraph": llm_result.get("article_paragraph", "生成章节速览失败")
|
"article_paragraph": "导读生成中,请稍后刷新..."
|
||||||
}
|
}
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
logger.error(f"生成LLM结果时出错:{e}", exc_info=e if log_verbose else None)
|
logger.error(f"提取全文失败 {filename}: {e}")
|
||||||
llm_results[filename] = {
|
llm_results[filename] = {
|
||||||
"article_abstract": "生成摘要失败",
|
"full_text": "",
|
||||||
"article_keywords": "生成关键词失败",
|
"article_abstract": "导读生成中,请稍后刷新...",
|
||||||
"article_paragraph": "生成章节速览失败"
|
"article_keywords": "导读生成中,请稍后刷新...",
|
||||||
|
"article_paragraph": "导读生成中,请稍后刷新..."
|
||||||
}
|
}
|
||||||
|
|
||||||
if to_vector_store:
|
# 后台线程:LLM 导读 + 向量化 + MySQL 回写
|
||||||
update_st = time.time()
|
import threading
|
||||||
result = _update_docs_impl(
|
threading.Thread(
|
||||||
knowledge_base_name=knowledge_base_name,
|
target=_background_generate_and_update,
|
||||||
file_names=file_names,
|
args=(knowledge_base_name, file_names, chunk_size, chunk_overlap,
|
||||||
override_custom_docs=True,
|
zh_title_enhance, docs, not_refresh_vs_cache),
|
||||||
chunk_size=chunk_size,
|
daemon=True
|
||||||
chunk_overlap=chunk_overlap,
|
).start()
|
||||||
zh_title_enhance=zh_title_enhance,
|
|
||||||
docs=docs,
|
logger.info(f"上传+全文提取: {time.time() - start_time:.2f}s,后台生成中")
|
||||||
not_refresh_vs_cache=True,
|
return BaseResponse(code=200, msg="文件上传完成", data={
|
||||||
)
|
|
||||||
failed_files.update(result.data["failed_files"])
|
|
||||||
if not not_refresh_vs_cache:
|
|
||||||
kb.save_vector_store()
|
|
||||||
logger.info(f'向量化用时:{time.time() - update_st}')
|
|
||||||
logger.info(f"总执行时间: {time.time() - start_time:.2f}s")
|
|
||||||
return BaseResponse(code=200, msg="文件上传与向量化完成", data={
|
|
||||||
"failed_files": failed_files,
|
"failed_files": failed_files,
|
||||||
"llm_results": llm_results
|
"llm_results": llm_results
|
||||||
})
|
})
|
||||||
|
|
||||||
|
|
||||||
def delete_docs(
|
def delete_docs(
|
||||||
knowledge_base_name: str = Body(..., examples=["samples"]),
|
knowledge_base_name: str = Body(..., examples=["samples"]),
|
||||||
file_names: List[str] = Body(..., examples=[["file_name.md", "test.txt"]]),
|
file_names: List[str] = Body(..., examples=[["file_name.md", "test.txt"]]),
|
||||||
|
|||||||
Reference in New Issue
Block a user