[前端+RAG] 修复:异步上传+MySQL回写导读;PDF切换detached(Uint8Array复制);对话工具栏遮挡;阅读模式空白
This commit is contained in:
@@ -74,9 +74,17 @@ const loadPdf = async () => {
|
||||
loading.value = true;
|
||||
error.value = '';
|
||||
try {
|
||||
// 每次加载都复制一份,避免 pdfjs worker transfer 后 detach 原始数据
|
||||
const rawSrc = toRaw(props.src);
|
||||
const data = rawSrc instanceof ArrayBuffer ? new Uint8Array(rawSrc) : rawSrc;
|
||||
const loadingTask = pdfjsLib.getDocument({ data });
|
||||
let bytes: Uint8Array;
|
||||
if (rawSrc instanceof Uint8Array) {
|
||||
bytes = new Uint8Array(rawSrc); // 复制
|
||||
} else if (rawSrc instanceof ArrayBuffer) {
|
||||
bytes = new Uint8Array(new Uint8Array(rawSrc)); // 复制
|
||||
} else {
|
||||
bytes = rawSrc as any;
|
||||
}
|
||||
const loadingTask = pdfjsLib.getDocument({ data: bytes });
|
||||
pdfDoc = await loadingTask.promise;
|
||||
const numPages = pdfDoc.numPages;
|
||||
pages.value = Array.from({ length: numPages }, (_, i) => i + 1);
|
||||
|
||||
@@ -434,7 +434,7 @@ const handleStop = async () => {
|
||||
|
||||
<style lang="less" scoped>
|
||||
.message-content {
|
||||
height: calc(100% - 255px);
|
||||
height: calc(100% - 290px);
|
||||
overflow-y: auto;
|
||||
padding: 20px;
|
||||
|
||||
@@ -456,12 +456,9 @@ const handleStop = async () => {
|
||||
.tool-bar {
|
||||
display: flex;
|
||||
justify-content: space-between;
|
||||
width: calc(100% - 15px);
|
||||
padding-top: 30px;
|
||||
height: 65px;
|
||||
position:absolute;
|
||||
bottom:275px;
|
||||
background: linear-gradient( 360deg, #C0D4FD 0%, rgba(199,219,255,0) 100%);
|
||||
width: 100%;
|
||||
padding-top: 8px;
|
||||
height: 40px;
|
||||
.label {
|
||||
display: flex;
|
||||
justify-content: space-around;
|
||||
|
||||
@@ -110,7 +110,7 @@
|
||||
</div>
|
||||
<!-- PDF 原生渲染 -->
|
||||
<div v-if="fileType === 'pdf' && !readingMode" class="file-content" ref="fileContent" id="file-content">
|
||||
<PdfViewer v-if="pdfData" :src="pdfData" :scale="1.3" />
|
||||
<PdfViewer v-if="pdfBytes" :src="pdfBytes" :scale="1.3" />
|
||||
</div>
|
||||
<!-- HTML 阅读模式(PDF 阅读模式 + 非PDF文件) -->
|
||||
<div v-else class="file-content" ref="fileContent" id="file-content">
|
||||
@@ -284,13 +284,6 @@ const docHtml = ref('');
|
||||
const fileContent = ref(null);
|
||||
const readingBox = ref(null);
|
||||
const pdfBytes = ref<Uint8Array | null>(null); // 存原始字节,不会被 detach
|
||||
const pdfData = computed(() => {
|
||||
// 每次访问时复制一份新的 ArrayBuffer 给 PdfViewer
|
||||
if (!pdfBytes.value) return null;
|
||||
const copy = new ArrayBuffer(pdfBytes.value.byteLength);
|
||||
new Uint8Array(copy).set(pdfBytes.value);
|
||||
return copy;
|
||||
});
|
||||
const readingMode = ref(false);
|
||||
const fileType = computed(() => {
|
||||
const name = selectedFile.value?.fileName || '';
|
||||
@@ -915,8 +908,12 @@ onMounted(async () => {
|
||||
.view-md {
|
||||
padding: 20px;
|
||||
// 覆盖 PyMuPDF get_text("html") 输出的固定宽度
|
||||
:deep(div[style*="width:"]) { width: 100% !important; max-width: 100% !important; }
|
||||
:deep(.pdf-page > div) { width: 100% !important; }
|
||||
:deep(div) { max-width: 100% !important; }
|
||||
:deep(div[style*="width:"]) { width: auto !important; max-width: 100% !important; }
|
||||
:deep(.pdf-page) { max-width: 100% !important; }
|
||||
:deep(.pdf-page > div) { width: auto !important; max-width: 100% !important; }
|
||||
:deep(.pdf-preview) { max-width: 100% !important; }
|
||||
:deep(section) { max-width: 100% !important; }
|
||||
:deep(p) { font-size: 15px; line-height: 1.8rem; margin-block-start: 0; }
|
||||
:deep(.highlight) { background: #D0EAC8; }
|
||||
:deep(.note-flag) { width: 23px; height: 28px; line-height: 28px; display: inline-block; text-align: center; font-weight: bold; font-size: 10px; margin-left: 8px; cursor: pointer; background: url("@/assets/images/reading/note.png"); color: #004EA0; background-size: contain !important; background-repeat: no-repeat !important; background-position: center bottom !important; }
|
||||
|
||||
@@ -269,6 +269,75 @@ def upload_docs(
|
||||
return BaseResponse(code=200, msg="文件上传与向量化完成", data={"failed_files": failed_files})
|
||||
|
||||
|
||||
def _background_llm_and_vectorize(
|
||||
knowledge_base_name: str,
|
||||
file_names: List[str],
|
||||
chunk_size: int,
|
||||
chunk_overlap: int,
|
||||
zh_title_enhance: bool,
|
||||
docs: dict,
|
||||
not_refresh_vs_cache: bool,
|
||||
embedding_ids: dict,
|
||||
):
|
||||
"""后台线程:LLM 导读 + 向量化,完成后直连 MySQL 更新结果。"""
|
||||
import time
|
||||
import pymysql
|
||||
start_time = time.time()
|
||||
|
||||
kb = KBServiceFactory.get_service_by_name(knowledge_base_name)
|
||||
|
||||
for filename in file_names:
|
||||
try:
|
||||
knowledge_file = KnowledgeFile(filename=filename, knowledge_base_name=knowledge_base_name)
|
||||
new_loop = asyncio.new_event_loop()
|
||||
asyncio.set_event_loop(new_loop)
|
||||
try:
|
||||
llm_result = new_loop.run_until_complete(knowledge_file.get_llm_result())
|
||||
finally:
|
||||
new_loop.close()
|
||||
|
||||
# 直连 MySQL 更新导读结果
|
||||
embedding_id = embedding_ids.get(filename, filename)
|
||||
try:
|
||||
conn = pymysql.connect(**ck_mysql_config)
|
||||
with conn.cursor() as cursor:
|
||||
cursor.execute(
|
||||
"UPDATE gpt_upload_file SET article_abstract=%s, article_keywords=%s, article_paragraph=%s WHERE embedding_id=%s",
|
||||
(
|
||||
str(llm_result.get("article_abstract", "生成摘要失败")),
|
||||
str(llm_result.get("article_keywords", "生成关键词失败")),
|
||||
str(llm_result.get("article_paragraph", "生成章节速览失败")),
|
||||
embedding_id
|
||||
)
|
||||
)
|
||||
conn.commit()
|
||||
conn.close()
|
||||
logger.info(f"[后台] LLM 导读已更新到数据库: {filename}")
|
||||
except Exception as db_e:
|
||||
logger.error(f"[后台] MySQL 更新失败 {filename}: {db_e}")
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"[后台] LLM 导读生成失败 {filename}: {e}")
|
||||
|
||||
# 向量化
|
||||
try:
|
||||
_update_docs_impl(
|
||||
knowledge_base_name=knowledge_base_name,
|
||||
file_names=file_names,
|
||||
override_custom_docs=True,
|
||||
chunk_size=chunk_size,
|
||||
chunk_overlap=chunk_overlap,
|
||||
zh_title_enhance=zh_title_enhance,
|
||||
docs=docs,
|
||||
not_refresh_vs_cache=True,
|
||||
)
|
||||
if kb and not not_refresh_vs_cache:
|
||||
kb.save_vector_store()
|
||||
except Exception as e:
|
||||
logger.error(f"[后台] 向量化失败: {e}")
|
||||
logger.info(f"[后台] 总耗时: {time.time() - start_time:.2f}s")
|
||||
|
||||
|
||||
def upload_docs_new(
|
||||
files: List[UploadFile] = File(..., description="上传文件,支持多文件"),
|
||||
knowledge_base_name: str = Form(..., description="知识库名称", examples=["samples"]),
|
||||
@@ -282,7 +351,7 @@ def upload_docs_new(
|
||||
not_refresh_vs_cache: bool = Form(False, description="暂不保存向量库(用于FAISS)"),
|
||||
) -> BaseResponse:
|
||||
"""
|
||||
API接口:上传文件,并/或向量化
|
||||
API接口:上传文件,提取全文后快速返回,LLM导读+向量化后台异步执行并直连MySQL更新结果
|
||||
"""
|
||||
import time
|
||||
start_time = time.time()
|
||||
@@ -303,64 +372,51 @@ def upload_docs_new(
|
||||
failed_files = {}
|
||||
file_names = list(docs.keys())
|
||||
llm_results = {}
|
||||
embedding_ids = {}
|
||||
|
||||
# 先将上传的文件保存到磁盘
|
||||
# 保存文件到磁盘 + 提取全文(快速)
|
||||
for result in _save_files_in_thread(files, knowledge_base_name=knowledge_base_name, override=override):
|
||||
filename = result["data"]["file_name"]
|
||||
if result["code"] != 200:
|
||||
failed_files[filename] = result["msg"]
|
||||
|
||||
if filename not in file_names:
|
||||
file_names.append(filename)
|
||||
embedding_ids[filename] = filename
|
||||
|
||||
# 生成摘要、关键词、章节速览(模型已优化为 deepseek-v3)
|
||||
try:
|
||||
knowledge_file = KnowledgeFile(filename=filename, knowledge_base_name=knowledge_base_name)
|
||||
import concurrent.futures
|
||||
def run_async_in_thread():
|
||||
new_loop = asyncio.new_event_loop()
|
||||
asyncio.set_event_loop(new_loop)
|
||||
try:
|
||||
return new_loop.run_until_complete(knowledge_file.get_llm_result())
|
||||
finally:
|
||||
new_loop.close()
|
||||
|
||||
with concurrent.futures.ThreadPoolExecutor(max_workers=1) as executor:
|
||||
future = executor.submit(run_async_in_thread)
|
||||
llm_result = future.result()
|
||||
full_text_data = knowledge_file.get_full_text()
|
||||
import json as _json
|
||||
try:
|
||||
full_text = _json.loads(full_text_data).get("full_text", "")
|
||||
except:
|
||||
full_text = ""
|
||||
llm_results[filename] = {
|
||||
"full_text": llm_result.get("full_text", "获取全文失败"),
|
||||
"article_abstract": llm_result.get("article_abstract", "生成摘要失败"),
|
||||
"article_keywords": llm_result.get("article_keywords", "生成关键词失败"),
|
||||
"article_paragraph": llm_result.get("article_paragraph", "生成章节速览失败")
|
||||
"full_text": full_text,
|
||||
"article_abstract": "导读生成中,请稍后刷新查看...",
|
||||
"article_keywords": "导读生成中,请稍后刷新查看...",
|
||||
"article_paragraph": "导读生成中,请稍后刷新查看..."
|
||||
}
|
||||
except Exception as e:
|
||||
logger.error(f"生成LLM结果时出错:{e}", exc_info=e if log_verbose else None)
|
||||
logger.error(f"提取全文失败 {filename}: {e}")
|
||||
llm_results[filename] = {
|
||||
"article_abstract": "生成摘要失败",
|
||||
"article_keywords": "生成关键词失败",
|
||||
"article_paragraph": "生成章节速览失败"
|
||||
"full_text": "",
|
||||
"article_abstract": "导读生成中,请稍后刷新查看...",
|
||||
"article_keywords": "导读生成中,请稍后刷新查看...",
|
||||
"article_paragraph": "导读生成中,请稍后刷新查看..."
|
||||
}
|
||||
|
||||
# 对保存的文件进行向量化
|
||||
if to_vector_store:
|
||||
update_st = time.time()
|
||||
result = _update_docs_impl(
|
||||
knowledge_base_name=knowledge_base_name,
|
||||
file_names=file_names,
|
||||
override_custom_docs=True,
|
||||
chunk_size=chunk_size,
|
||||
chunk_overlap=chunk_overlap,
|
||||
zh_title_enhance=zh_title_enhance,
|
||||
docs=docs,
|
||||
not_refresh_vs_cache=True,
|
||||
)
|
||||
failed_files.update(result.data["failed_files"])
|
||||
if not not_refresh_vs_cache:
|
||||
kb.save_vector_store()
|
||||
logger.info(f'向量化用时:{time.time() - update_st}')
|
||||
logger.info(f"总执行时间: {time.time() - start_time:.2f}s")
|
||||
return BaseResponse(code=200, msg="文件上传与向量化完成", data={
|
||||
# 后台异步:LLM 导读 + 向量化,完成后直连 MySQL 更新
|
||||
import threading
|
||||
threading.Thread(
|
||||
target=_background_llm_and_vectorize,
|
||||
args=(knowledge_base_name, file_names, chunk_size, chunk_overlap,
|
||||
zh_title_enhance, docs, not_refresh_vs_cache, embedding_ids),
|
||||
daemon=True
|
||||
).start()
|
||||
|
||||
logger.info(f"文件上传+全文提取: {time.time() - start_time:.2f}s,LLM+向量化转后台")
|
||||
return BaseResponse(code=200, msg="文件上传完成,导读生成中", data={
|
||||
"failed_files": failed_files,
|
||||
"llm_results": llm_results
|
||||
})
|
||||
|
||||
Reference in New Issue
Block a user