diff --git a/chat_web_front/src/components/PdfViewer.vue b/chat_web_front/src/components/PdfViewer.vue index 6c3680d..b5e8e07 100644 --- a/chat_web_front/src/components/PdfViewer.vue +++ b/chat_web_front/src/components/PdfViewer.vue @@ -1,12 +1,12 @@ @@ -14,8 +14,8 @@ diff --git a/langchain-chat/server/knowledge_base/kb_doc_api.py b/langchain-chat/server/knowledge_base/kb_doc_api.py index 7487723..77a6788 100644 --- a/langchain-chat/server/knowledge_base/kb_doc_api.py +++ b/langchain-chat/server/knowledge_base/kb_doc_api.py @@ -269,6 +269,62 @@ def upload_docs( return BaseResponse(code=200, msg="文件上传与向量化完成", data={"failed_files": failed_files}) +def _background_llm_and_vectorize( + knowledge_base_name: str, + file_names: List[str], + chunk_size: int, + chunk_overlap: int, + zh_title_enhance: bool, + docs: dict, + not_refresh_vs_cache: bool, +): + """后台线程:执行 LLM 导读生成 + 向量化,不阻塞上传响应。""" + import time + start_time = time.time() + + kb = KBServiceFactory.get_service_by_name(knowledge_base_name) + + # 1. 生成 LLM 导读(摘要、关键词、章节速览) + for filename in file_names: + try: + knowledge_file = KnowledgeFile(filename=filename, knowledge_base_name=knowledge_base_name) + new_loop = asyncio.new_event_loop() + asyncio.set_event_loop(new_loop) + try: + llm_result = new_loop.run_until_complete(knowledge_file.get_llm_result()) + finally: + new_loop.close() + + # 将 LLM 结果写入缓存文件,供 Java 后端轮询读取 + import json + cache_dir = os.path.join(os.path.dirname(os.path.dirname(os.path.dirname(__file__))), "knowledge_base", knowledge_base_name) + os.makedirs(cache_dir, exist_ok=True) + cache_file = os.path.join(cache_dir, f"{filename}.llm_result.json") + with open(cache_file, 'w', encoding='utf-8') as f: + json.dump(llm_result, f, ensure_ascii=False) + logger.info(f"[后台] LLM 导读生成完成: {filename}") + except Exception as e: + logger.error(f"[后台] LLM 导读生成失败 {filename}: {e}") + + # 2. 向量化 + try: + _update_docs_impl( + knowledge_base_name=knowledge_base_name, + file_names=file_names, + override_custom_docs=True, + chunk_size=chunk_size, + chunk_overlap=chunk_overlap, + zh_title_enhance=zh_title_enhance, + docs=docs, + not_refresh_vs_cache=True, + ) + if kb and not not_refresh_vs_cache: + kb.save_vector_store() + logger.info(f"[后台] 向量化完成,总耗时: {time.time() - start_time:.2f}s") + except Exception as e: + logger.error(f"[后台] 向量化失败: {e}") + + def upload_docs_new( files: List[UploadFile] = File(..., description="上传文件,支持多文件"), knowledge_base_name: str = Form(..., description="知识库名称", examples=["samples"]), @@ -282,16 +338,15 @@ def upload_docs_new( not_refresh_vs_cache: bool = Form(False, description="暂不保存向量库(用于FAISS)"), ) -> BaseResponse: """ - API接口:上传文件,并/或向量化 + API接口:上传文件,先提取全文快速返回,LLM导读+向量化后台异步执行 """ - import time # 添加计时模块 + import time start_time = time.time() if not validate_kb_name(knowledge_base_name): return BaseResponse(code=403, msg="Don't attack me") kb = KBServiceFactory.get_service_by_name(knowledge_base_name) if kb is None: - # 自动创建知识库 kb = KBServiceFactory.get_service(knowledge_base_name, DEFAULT_VS_TYPE, EMBEDDING_MODEL) try: kb.create_kb() @@ -303,68 +358,52 @@ def upload_docs_new( failed_files = {} file_names = list(docs.keys()) - - # 生成摘要、关键词、章节速览的结果存储 llm_results = {} - # 先将上传的文件保存到磁盘 + # 保存文件到磁盘 + 提取全文(快速操作) for result in _save_files_in_thread(files, knowledge_base_name=knowledge_base_name, override=override): filename = result["data"]["file_name"] if result["code"] != 200: failed_files[filename] = result["msg"] - if filename not in file_names: file_names.append(filename) - # 针对成功上传的文件,生成摘要、关键词、章节速览 + # 仅提取全文(快速),不调用 LLM try: knowledge_file = KnowledgeFile(filename=filename, knowledge_base_name=knowledge_base_name) - # 使用线程池运行异步函数,避免事件循环冲突 - import concurrent.futures - def run_async_in_thread(): - new_loop = asyncio.new_event_loop() - asyncio.set_event_loop(new_loop) - try: - return new_loop.run_until_complete(knowledge_file.get_llm_result()) - finally: - new_loop.close() - - with concurrent.futures.ThreadPoolExecutor(max_workers=1) as executor: - future = executor.submit(run_async_in_thread) - llm_result = future.result() + full_text_data = knowledge_file.get_full_text() + import json as _json + try: + full_text = _json.loads(full_text_data).get("full_text", "") + except: + full_text = "" llm_results[filename] = { - "full_text": llm_result.get("full_text", "获取全文失败"), - "article_abstract": llm_result.get("article_abstract", "生成摘要失败"), - "article_keywords": llm_result.get("article_keywords", "生成关键词失败"), - "article_paragraph": llm_result.get("article_paragraph", "生成章节速览失败") + "full_text": full_text, + "article_abstract": "导读生成中...", + "article_keywords": "导读生成中...", + "article_paragraph": "导读生成中..." } except Exception as e: - logger.error(f"生成LLM结果时出错:{e}", exc_info=e if log_verbose else None) + logger.error(f"提取全文失败 {filename}: {e}") llm_results[filename] = { - "article_abstract": "生成摘要失败", - "article_keywords": "生成关键词失败", - "article_paragraph": "生成章节速览失败" + "full_text": "", + "article_abstract": "导读生成中...", + "article_keywords": "导读生成中...", + "article_paragraph": "导读生成中..." } - # 对保存的文件进行向量化 - if to_vector_store: - update_st = time.time() - result = _update_docs_impl( - knowledge_base_name=knowledge_base_name, - file_names=file_names, - override_custom_docs=True, - chunk_size=chunk_size, - chunk_overlap=chunk_overlap, - zh_title_enhance=zh_title_enhance, - docs=docs, - not_refresh_vs_cache=True, - ) - failed_files.update(result.data["failed_files"]) - if not not_refresh_vs_cache: - kb.save_vector_store() - logger.info(f'向量化用时:{time.time() - update_st}') - logger.info(f"总执行时间: {time.time() - start_time:.2f}s") - return BaseResponse(code=200, msg="文件上传与向量化完成", data={ + # 后台异步执行 LLM 导读 + 向量化(不阻塞响应) + import threading + bg_thread = threading.Thread( + target=_background_llm_and_vectorize, + args=(knowledge_base_name, file_names, chunk_size, chunk_overlap, + zh_title_enhance, docs, not_refresh_vs_cache), + daemon=True + ) + bg_thread.start() + + logger.info(f"文件上传+全文提取用时: {time.time() - start_time:.2f}s,LLM+向量化已转后台") + return BaseResponse(code=200, msg="文件上传完成,导读生成中", data={ "failed_files": failed_files, "llm_results": llm_results }) diff --git a/langchain-chat/server/knowledge_base/utils.py b/langchain-chat/server/knowledge_base/utils.py index 58fb3c3..514a3f6 100644 --- a/langchain-chat/server/knowledge_base/utils.py +++ b/langchain-chat/server/knowledge_base/utils.py @@ -390,7 +390,7 @@ class KnowledgeFile: llm_time = time.time() abstract_task = get_llm_model_response_async( strategy_name="gen_abstract", - llm_model_name=LLM_MODELS[1], + llm_model_name=LLM_MODELS[0], template_prompt_name="gen_abstract", prompt_param_dict={"context": full_text}, temperature=0.7, @@ -399,7 +399,7 @@ class KnowledgeFile: keywords_task = get_llm_model_response_async( strategy_name="gen_keywords", - llm_model_name=LLM_MODELS[1], + llm_model_name=LLM_MODELS[0], template_prompt_name="gen_keywords", prompt_param_dict={"context": full_text}, temperature=0.7,