[前端+后端+RAG] 检索范围切换(当前文件/整个知识库);联网搜索功能(SearXNG);搜索结果带网络链接;修复RAG检索source格式不匹配bug

This commit is contained in:
2026-04-07 15:02:54 +08:00
parent a5110da4e8
commit e1e5d4f30d
6 changed files with 158 additions and 36 deletions

View File

@@ -247,6 +247,7 @@ public class SmartChatController extends BaseController {
} }
talkDto.setKnowledgeBaseNameList(knowledgeBaseNameList); talkDto.setKnowledgeBaseNameList(knowledgeBaseNameList);
talkDto.setWebSearch(smartChatQueryDto.getWebSearch());
talkDto.setQuery(chatMessages.getContent().replaceAll("\n", "")); talkDto.setQuery(chatMessages.getContent().replaceAll("\n", ""));
talkDto.setStream(true); talkDto.setStream(true);

View File

@@ -35,4 +35,7 @@ public class SmartChatQueryDto {
/* 用户请求类型 **/ /* 用户请求类型 **/
private Integer chatType; private Integer chatType;
/* 是否开启联网搜索 **/
private Boolean webSearch;
} }

View File

@@ -46,6 +46,10 @@ public class SmartChatSelfDto {
/* 用户请求类型 **/ /* 用户请求类型 **/
private Integer chatType; private Integer chatType;
/* 是否开启联网搜索 **/
@JsonProperty("web_search")
private Boolean webSearch;
public String toJsonString() { public String toJsonString() {
StringBuffer str = new StringBuffer(); StringBuffer str = new StringBuffer();
extracted(knowledgeBaseNameList, str); extracted(knowledgeBaseNameList, str);
@@ -63,7 +67,7 @@ public class SmartChatSelfDto {
", \"fileNames\":" + fileNameList + ", \"fileNames\":" + fileNameList +
", \"quote\":\"" + ReplaceUtils.replaceHiddenChars(quote) + '\"' + ", \"quote\":\"" + ReplaceUtils.replaceHiddenChars(quote) + '\"' +
", \"prompt_name\":\"" + promptName + '\"' + ", \"prompt_name\":\"" + promptName + '\"' +
// ", \"use_model_self_response\":\"" + "False" + '\"' + ", \"web_search\":" + (webSearch != null && webSearch ? "true" : "false") +
'}'; '}';
} }

View File

@@ -9,16 +9,27 @@
<!-- 文字窗口--> <!-- 文字窗口-->
<div> <div>
<div class="tool-bar"> <div class="tool-bar">
<div class="label"> <div class="label"></div>
<!-- <img src="../assets/images/writing/start.png">
<div>AI写作助手</div>-->
</div>
<div class="clean" @click="cleanChat"> <div class="clean" @click="cleanChat">
<img src="../assets/images/writing/brush.png"> <img src="../assets/images/writing/brush.png">
<div>清除对话</div> <div>清除对话</div>
</div> </div>
</div> </div>
<div class="search-scope" v-if="selectedFile">
<div class="scope-label">检索范围</div>
<div class="scope-option" :class="{ active: searchScope === 'file' }" @click="searchScope = 'file'"
:title="'仅在「' + selectedFile.fileName + '」中检索'">
<span class="scope-icon">📄</span>
<span class="scope-name">{{ selectedFile.fileName }}</span>
</div>
<div class="scope-option" :class="{ active: searchScope === 'kb' }" @click="searchScope = 'kb'"
:title="'在「' + selectedFile.folderName + '」知识库的所有文件中检索'">
<span class="scope-icon">📁</span>
<span class="scope-name">{{ selectedFile.folderName }}</span>
</div>
</div>
<div class="text-box"> <div class="text-box">
<div class="quote-box" v-if="quoteMsg"> <div class="quote-box" v-if="quoteMsg">
<div class="vertical-line"></div> <div class="vertical-line"></div>
@@ -34,10 +45,17 @@
@input="handleInput" @input="handleInput"
@keydown.enter="keyDown" @keydown.enter="keyDown"
placeholder="请输入你想提的问题字数不能超过1000字"/> placeholder="请输入你想提的问题字数不能超过1000字"/>
<div> <div class="text-box-bottom">
<img v-if="textarea&&!sendStatus" style="width: 38px" src="../assets/images/writing/send-blue.png" @click="send('','0')"> <div class="web-search-toggle" :class="{ active: webSearchEnabled }" @click="webSearchEnabled = !webSearchEnabled"
<img v-if="!textarea&&!sendStatus" src="../assets/images/writing/send-gray.png"> :title="webSearchEnabled ? '联网搜索已开启,点击关闭' : '开启联网搜索,从互联网获取最新信息'">
<img v-if="sendStatus" src="../assets/images/chat/stopChat.png" @click="handleStop"></img> <span class="ws-icon">🌐</span>
<span class="ws-text">联网搜索</span>
</div>
<div class="send-btn">
<img v-if="textarea&&!sendStatus" style="width: 38px" src="../assets/images/writing/send-blue.png" @click="send('','0')">
<img v-if="!textarea&&!sendStatus" src="../assets/images/writing/send-gray.png">
<img v-if="sendStatus" src="../assets/images/chat/stopChat.png" @click="handleStop">
</div>
</div> </div>
</div> </div>
@@ -76,6 +94,8 @@ const clearQuote = () => {
} }
//const title = inject('aiboxTitle'); //const title = inject('aiboxTitle');
const searchScope = ref<'file' | 'kb'>('file');
const webSearchEnabled = ref(false);
const textarea = ref(""); const textarea = ref("");
const firstChat = ref(true); const firstChat = ref(true);
const sendStatus = ref(false); const sendStatus = ref(false);
@@ -198,12 +218,13 @@ const getFetchChatAPIProcess = async (type: string) => {
headers: headers, headers: headers,
signal: controller.signal, signal: controller.signal,
body: JSON.stringify({ body: JSON.stringify({
fileNames: [selectedFile.value?.embeddingId], fileNames: searchScope.value === 'file' ? [selectedFile.value?.embeddingId] : [],
conversationId: conversationId.value, conversationId: conversationId.value,
promptName: "default", promptName: "default",
knowledgeBaseIdList: [selectedFile.value?.folderId], knowledgeBaseIdList: [selectedFile.value?.folderId],
chatType: type, chatType: type,
quote: quoteMsg.value quote: quoteMsg.value,
webSearch: webSearchEnabled.value
}), }),
} }
); );
@@ -372,6 +393,7 @@ const loadChatHistory = async () => {
// 监听文件切换,重新加载对话历史 // 监听文件切换,重新加载对话历史
watch(() => selectedFile.value?.fileId, () => { watch(() => selectedFile.value?.fileId, () => {
searchScope.value = 'file';
loadChatHistory(); loadChatHistory();
}); });
@@ -434,7 +456,7 @@ const handleStop = async () => {
<style lang="less" scoped> <style lang="less" scoped>
.message-content { .message-content {
height: calc(100% - 290px); height: calc(100% - 320px);
overflow-y: auto; overflow-y: auto;
padding: 20px; padding: 20px;
@@ -505,28 +527,93 @@ const handleStop = async () => {
} }
} }
.search-scope {
display: flex;
align-items: center;
padding: 4px 12px;
gap: 6px;
.scope-label {
font-size: 13px;
color: #333;
flex-shrink: 0;
}
.scope-option {
display: flex;
align-items: center;
gap: 5px;
padding: 6px 14px;
border-radius: 14px;
border: 1px solid #E0E0E0;
cursor: pointer;
font-size: 13px;
color: #666;
transition: all 0.2s;
max-width: 45%;
overflow: hidden;
&:hover { border-color: #004EA0; color: #004EA0; }
&.active { border-color: #004EA0; color: #fff; background: #004EA0; }
.scope-icon { font-size: 14px; flex-shrink: 0; }
.scope-name { overflow: hidden; text-overflow: ellipsis; white-space: nowrap; }
}
}
.text-box { .text-box {
//width: 100%;
height: 190px; height: 190px;
background: #FFFFFF; background: #FFFFFF;
border-radius: 8px; border-radius: 8px;
border: 1px solid #D5DDFF; border: 1px solid #D5DDFF;
margin: 12px 20px 12px 20px; margin: 12px 20px 12px 20px;
display: flex;
flex-direction: column;
.box-textarea { .box-textarea {
outline: none; outline: none;
border: none; border: none;
resize: none; resize: none;
width: 100%; width: 100%;
height: calc(100% - 54px); flex: 1;
padding: 16px; padding: 16px;
line-height: 24px; line-height: 24px;
border-radius: 8px; border-radius: 8px;
} }
.text-box-bottom {
display: flex;
justify-content: space-between;
align-items: center;
padding: 6px 12px;
.web-search-toggle {
display: flex;
align-items: center;
gap: 5px;
padding: 6px 14px;
border-radius: 14px;
border: 1px solid #E0E0E0;
cursor: pointer;
font-size: 13px;
color: #999;
transition: all 0.2s;
user-select: none;
&:hover { border-color: #10a37f; color: #10a37f; }
&.active { border-color: #10a37f; color: #fff; background: #10a37f; }
.ws-icon { font-size: 14px; }
.ws-text { font-size: 13px; }
}
.send-btn {
img { width: 38px; cursor: pointer; }
}
}
img { img {
cursor:pointer; cursor: pointer;
float: right;
margin-right: 16px;
} }
.quote-box { .quote-box {

View File

@@ -71,10 +71,7 @@ class ZhipuSearchAPIWrapper:
logging.info(f"Zhipu检索内容:{search_query}") logging.info(f"Zhipu检索内容:{search_query}")
url = "http://ywk3hvt4d:01Jp2V1tR9PdTsYSz919779Rb9_@134.122.191.214/search" url = "http://ywk3hvt4d:01Jp2V1tR9PdTsYSz919779Rb9_@134.122.191.214/search"
if "天气" in search_query: engines = "duckduckgo,bing"
engines = "google"
else:
engines = "baidu"
data = { data = {
"format":"json", "format":"json",
"q":search_query, "q":search_query,

View File

@@ -50,6 +50,7 @@ async def self_kb_chat(
"content": "虎头虎脑"}]] "content": "虎头虎脑"}]]
), ),
stream: bool = Body(True, description="流式输出"), stream: bool = Body(True, description="流式输出"),
web_search: bool = Body(False, description="是否开启联网搜索"),
): ):
""" """
个人知识库对话api\n 个人知识库对话api\n
@@ -149,7 +150,29 @@ async def self_kb_chat(
except Exception as e: except Exception as e:
logger.error(f"个人知识库问答路由错误: {self_kb_route}", exc_info=True) logger.error(f"个人知识库问答路由错误: {self_kb_route}", exc_info=True)
docs = [] docs = []
logger.info(f"个人知识库问答source_documents: {docs}") logger.info(f"个人知识库问答source_documents: {len(docs)}")
# 联网搜索
web_search_context = ""
web_search_results = [] # 保存搜索结果供后面引用
if web_search:
try:
from server.chat.ZhipuSearchAPI import ZhipuSearchAPIWrapper
searcher = ZhipuSearchAPIWrapper()
web_results = searcher.zhipu_search(search_query)
web_search_results = web_results[:5] if web_results else []
if web_results:
web_parts = []
for i, r in enumerate(web_results[:5], 1):
title = r.get("title", "")
content = r.get("content", "")[:300]
url = r.get("url", "")
web_parts.append(f"[{i}] {title}\n{content}\n来源: {url}")
web_search_context = "\n\n【联网搜索结果】\n" + "\n\n".join(web_parts)
logger.info(f"联网搜索获取到 {len(web_results)} 条结果")
except Exception as e:
logger.error(f"联网搜索失败: {e}")
# if SELF_USE_RERANKER: # if SELF_USE_RERANKER:
# reranker_model_path = MODEL_PATH["reranker"].get(RERANKER_MODEL,"BAAI/bge-reranker-large") # reranker_model_path = MODEL_PATH["reranker"].get(RERANKER_MODEL,"BAAI/bge-reranker-large")
# print("-----------------model path------------------") # print("-----------------model path------------------")
@@ -184,25 +207,28 @@ async def self_kb_chat(
if '0' in self_kb_route: if '0' in self_kb_route:
context = "\n".join([doc.page_content for doc in docs]).strip("xa0") context = "\n".join([doc.page_content for doc in docs]).strip("xa0")
logger.info(f"个人知识库问答 context 长度:{len(context)}") logger.info(f"个人知识库问答 context 长度:{len(context)}")
# context_70 = context if len(context)<30000 else TextRank(context,num_sentences=70) context = context[:30000] if len(context)>30000 else context
context = context[:40000] if len(context)>40000 else context if web_search_context:
logger.info(f"截取后个人知识库问答 context 长度:{len(context)}") context += web_search_context
logger.info(f"最终 context 长度:{len(context)}")
if history: if history:
history = history if len(history) < 20000 else TextRank(history,num_sentences=1) history = history if len(history) < 20000 else TextRank(history,num_sentences=1)
# logger.info(f"个人知识库问答 context 长度超过 30000使用 TextRank 算法进行降维得到 context 长度:{len(context)}")
chain = LLMChain(prompt=chat_prompt, llm=model1, verbose=True) chain = LLMChain(prompt=chat_prompt, llm=model1, verbose=True)
task = asyncio.create_task(wrap_done( task = asyncio.create_task(wrap_done(
chain.acall({"context": context, "question": query, "history": history, "quote": quote, "fileName":fileNames}), chain.acall({"context": context, "question": query, "history": history, "quote": quote, "fileName":fileNames}),
callback.done), callback.done),
) )
elif '1' in self_kb_route: elif '1' in self_kb_route:
# 联网搜索结果作为额外文档加入
if web_search_context:
from langchain.docstore.document import Document as LCDocument
docs.append(LCDocument(page_content=web_search_context, metadata={"source": "web_search"}))
chain = load_qa_chain( chain = load_qa_chain(
model, model,
chain_type="stuff", chain_type="stuff",
prompt=chat_prompt, prompt=chat_prompt,
verbose=True verbose=True
) )
# Begin a task that runs in the background.
task = asyncio.create_task(wrap_done( task = asyncio.create_task(wrap_done(
chain.acall({"input_documents": docs, "question": query, "history": history, "quote": quote, "fileName":fileNames}), chain.acall({"input_documents": docs, "question": query, "history": history, "quote": quote, "fileName":fileNames}),
callback.done), callback.done),
@@ -235,14 +261,18 @@ async def self_kb_chat(
yield json.dumps(response, ensure_ascii=False) yield json.dumps(response, ensure_ascii=False)
await task await task
source_documents = [] source_documents = []
if len(docs) == 0: # 没有找到相关文档 if len(docs) == 0 and not web_search_context:
source_documents.append(f"""暂未从本篇文献中找到答案,该回答为大模型自身能力解答!""") source_documents.append(f"""暂未从本篇文献中找到答案,该回答为大模型自身能力解答!""")
else: else:
# 去除文件扩展名 if len(docs) > 0:
# fileNames_without_ext = [name.rsplit('.', 1)[0] for name in fileNames] source_documents.append(f"""[{len(source_documents) + 1}] [{docs[0].metadata.get("source")}]()\n""")
# 连接文件名(如果有多个文件名) # 联网搜索结果链接
# joined_fileNames = ', '.join(fileNames_without_ext) if web_search_results:
source_documents.append(f"""[{len(source_documents) + 1}] [{docs[0].metadata.get("source")}]()\n""") for r in web_search_results:
title = r.get("title", "").replace("\n", "")
url = r.get("url", "")
if title and url:
source_documents.append(f"""[{len(source_documents) + 1}] [{title}]({url})\n""")
yield json.dumps({"docs": source_documents}, ensure_ascii=False) yield json.dumps({"docs": source_documents}, ensure_ascii=False)
return EventSourceResponse(knowledge_base_chat_iterator(query)) return EventSourceResponse(knowledge_base_chat_iterator(query))