[前端+后端+RAG] 检索范围切换(当前文件/整个知识库);联网搜索功能(SearXNG);搜索结果带网络链接;修复RAG检索source格式不匹配bug
This commit is contained in:
@@ -50,6 +50,7 @@ async def self_kb_chat(
|
||||
"content": "虎头虎脑"}]]
|
||||
),
|
||||
stream: bool = Body(True, description="流式输出"),
|
||||
web_search: bool = Body(False, description="是否开启联网搜索"),
|
||||
):
|
||||
"""
|
||||
个人知识库对话api\n
|
||||
@@ -149,7 +150,29 @@ async def self_kb_chat(
|
||||
except Exception as e:
|
||||
logger.error(f"个人知识库问答路由错误: {self_kb_route}", exc_info=True)
|
||||
docs = []
|
||||
logger.info(f"个人知识库问答source_documents: {docs}")
|
||||
logger.info(f"个人知识库问答source_documents: {len(docs)}条")
|
||||
|
||||
# 联网搜索
|
||||
web_search_context = ""
|
||||
web_search_results = [] # 保存搜索结果供后面引用
|
||||
if web_search:
|
||||
try:
|
||||
from server.chat.ZhipuSearchAPI import ZhipuSearchAPIWrapper
|
||||
searcher = ZhipuSearchAPIWrapper()
|
||||
web_results = searcher.zhipu_search(search_query)
|
||||
web_search_results = web_results[:5] if web_results else []
|
||||
if web_results:
|
||||
web_parts = []
|
||||
for i, r in enumerate(web_results[:5], 1):
|
||||
title = r.get("title", "")
|
||||
content = r.get("content", "")[:300]
|
||||
url = r.get("url", "")
|
||||
web_parts.append(f"[{i}] {title}\n{content}\n来源: {url}")
|
||||
web_search_context = "\n\n【联网搜索结果】\n" + "\n\n".join(web_parts)
|
||||
logger.info(f"联网搜索获取到 {len(web_results)} 条结果")
|
||||
except Exception as e:
|
||||
logger.error(f"联网搜索失败: {e}")
|
||||
|
||||
# if SELF_USE_RERANKER:
|
||||
# reranker_model_path = MODEL_PATH["reranker"].get(RERANKER_MODEL,"BAAI/bge-reranker-large")
|
||||
# print("-----------------model path------------------")
|
||||
@@ -184,25 +207,28 @@ async def self_kb_chat(
|
||||
if '0' in self_kb_route:
|
||||
context = "\n".join([doc.page_content for doc in docs]).strip("xa0")
|
||||
logger.info(f"个人知识库问答 context 长度:{len(context)}")
|
||||
# context_70 = context if len(context)<30000 else TextRank(context,num_sentences=70)
|
||||
context = context[:40000] if len(context)>40000 else context
|
||||
logger.info(f"截取后个人知识库问答 context 长度:{len(context)}")
|
||||
context = context[:30000] if len(context)>30000 else context
|
||||
if web_search_context:
|
||||
context += web_search_context
|
||||
logger.info(f"最终 context 长度:{len(context)}")
|
||||
if history:
|
||||
history = history if len(history) < 20000 else TextRank(history,num_sentences=1)
|
||||
# logger.info(f"个人知识库问答 context 长度超过 30000,使用 TextRank 算法进行降维得到 context 长度:{len(context)}")
|
||||
chain = LLMChain(prompt=chat_prompt, llm=model1, verbose=True)
|
||||
task = asyncio.create_task(wrap_done(
|
||||
chain.acall({"context": context, "question": query, "history": history, "quote": quote, "fileName":fileNames}),
|
||||
callback.done),
|
||||
)
|
||||
elif '1' in self_kb_route:
|
||||
# 联网搜索结果作为额外文档加入
|
||||
if web_search_context:
|
||||
from langchain.docstore.document import Document as LCDocument
|
||||
docs.append(LCDocument(page_content=web_search_context, metadata={"source": "web_search"}))
|
||||
chain = load_qa_chain(
|
||||
model,
|
||||
chain_type="stuff",
|
||||
prompt=chat_prompt,
|
||||
model,
|
||||
chain_type="stuff",
|
||||
prompt=chat_prompt,
|
||||
verbose=True
|
||||
)
|
||||
# Begin a task that runs in the background.
|
||||
task = asyncio.create_task(wrap_done(
|
||||
chain.acall({"input_documents": docs, "question": query, "history": history, "quote": quote, "fileName":fileNames}),
|
||||
callback.done),
|
||||
@@ -235,14 +261,18 @@ async def self_kb_chat(
|
||||
yield json.dumps(response, ensure_ascii=False)
|
||||
await task
|
||||
source_documents = []
|
||||
if len(docs) == 0: # 没有找到相关文档
|
||||
if len(docs) == 0 and not web_search_context:
|
||||
source_documents.append(f"""暂未从本篇文献中找到答案,该回答为大模型自身能力解答!""")
|
||||
else:
|
||||
# 去除文件扩展名
|
||||
# fileNames_without_ext = [name.rsplit('.', 1)[0] for name in fileNames]
|
||||
# 连接文件名(如果有多个文件名)
|
||||
# joined_fileNames = ', '.join(fileNames_without_ext)
|
||||
source_documents.append(f"""[{len(source_documents) + 1}] [{docs[0].metadata.get("source")}]()\n""")
|
||||
if len(docs) > 0:
|
||||
source_documents.append(f"""[{len(source_documents) + 1}] [{docs[0].metadata.get("source")}]()\n""")
|
||||
# 联网搜索结果链接
|
||||
if web_search_results:
|
||||
for r in web_search_results:
|
||||
title = r.get("title", "").replace("\n", "")
|
||||
url = r.get("url", "")
|
||||
if title and url:
|
||||
source_documents.append(f"""[{len(source_documents) + 1}] [{title}]({url})\n""")
|
||||
yield json.dumps({"docs": source_documents}, ensure_ascii=False)
|
||||
|
||||
return EventSourceResponse(knowledge_base_chat_iterator(query))
|
||||
|
||||
Reference in New Issue
Block a user