[前端+后端+RAG] 检索范围切换(当前文件/整个知识库)；联网搜索功能(SearXNG)；搜索结果带网络链接；修复RAG检索source格式不匹配bug

2026-04-07 15:02:54 +08:00
parent a5110da4e8
commit e1e5d4f30d
6 changed files with 158 additions and 36 deletions
--- a/chat_web_backend/src/main/java/com/inspur/llm/chat/gpt/controller/gpt/SmartChatController.java
+++ b/chat_web_backend/src/main/java/com/inspur/llm/chat/gpt/controller/gpt/SmartChatController.java
@@ -247,6 +247,7 @@ public class SmartChatController extends BaseController {
        }
        talkDto.setKnowledgeBaseNameList(knowledgeBaseNameList);
        talkDto.setWebSearch(smartChatQueryDto.getWebSearch());
        talkDto.setQuery(chatMessages.getContent().replaceAll("\n", ""));
        talkDto.setStream(true);
--- a/chat_web_backend/src/main/java/com/inspur/llm/chat/gpt/pojo/dto/SmartChatQueryDto.java
+++ b/chat_web_backend/src/main/java/com/inspur/llm/chat/gpt/pojo/dto/SmartChatQueryDto.java
@@ -35,4 +35,7 @@ public class SmartChatQueryDto {
    /* 用户请求类型 **/
    private Integer chatType;
    /* 是否开启联网搜索 **/
    private Boolean webSearch;
 }
--- a/chat_web_backend/src/main/java/com/inspur/llm/chat/gpt/pojo/dto/SmartChatSelfDto.java
+++ b/chat_web_backend/src/main/java/com/inspur/llm/chat/gpt/pojo/dto/SmartChatSelfDto.java
@@ -46,6 +46,10 @@ public class SmartChatSelfDto {
    /* 用户请求类型 **/
    private Integer chatType;
    /* 是否开启联网搜索 **/
    @JsonProperty("web_search")
    private Boolean webSearch;
    public String toJsonString() {
        StringBuffer str = new StringBuffer();
        extracted(knowledgeBaseNameList, str);
@@ -63,7 +67,7 @@ public class SmartChatSelfDto {
                ", \"fileNames\":" + fileNameList +
                ", \"quote\":\"" + ReplaceUtils.replaceHiddenChars(quote) + '\"' +
                ", \"prompt_name\":\"" + promptName + '\"' +
-//                ", \"use_model_self_response\":\"" + "False" + '\"' +
+                ", \"web_search\":" + (webSearch != null && webSearch ? "true" : "false") +
                '}';
    }
--- a/chat_web_front/src/components/ReadingCreate.vue
+++ b/chat_web_front/src/components/ReadingCreate.vue
@@ -9,16 +9,27 @@
    <!-- 文字窗口-->
    <div>
      <div class="tool-bar">
-        <div class="label">
+        <div class="label"></div>
          <!--          <img src="../assets/images/writing/start.png">
                    <div>AI写作助手</div>-->
        </div>
        <div class="clean" @click="cleanChat">
          <img src="../assets/images/writing/brush.png">
          <div>清除对话</div>
        </div>
      </div>
      <div class="search-scope" v-if="selectedFile">
        <div class="scope-label">检索范围：</div>
        <div class="scope-option" :class="{ active: searchScope === 'file' }" @click="searchScope = 'file'"
             :title="'仅在「' + selectedFile.fileName + '」中检索'">
          <span class="scope-icon">📄</span>
          <span class="scope-name">{{ selectedFile.fileName }}</span>
        </div>
        <div class="scope-option" :class="{ active: searchScope === 'kb' }" @click="searchScope = 'kb'"
             :title="'在「' + selectedFile.folderName + '」知识库的所有文件中检索'">
          <span class="scope-icon">📁</span>
          <span class="scope-name">{{ selectedFile.folderName }}</span>
        </div>
      </div>
      <div class="text-box">
        <div class="quote-box" v-if="quoteMsg">
          <div class="vertical-line"></div>
@@ -34,10 +45,17 @@
            @input="handleInput"
            @keydown.enter="keyDown"
            placeholder="请输入你想提的问题，字数不能超过1000字"/>
-        <div>
+        <div class="text-box-bottom">
-          <img v-if="textarea&&!sendStatus" style="width: 38px" src="../assets/images/writing/send-blue.png" @click="send('','0')">
+          <div class="web-search-toggle" :class="{ active: webSearchEnabled }" @click="webSearchEnabled = !webSearchEnabled"
-          <img v-if="!textarea&&!sendStatus" src="../assets/images/writing/send-gray.png">
+               :title="webSearchEnabled ? '联网搜索已开启，点击关闭' : '开启联网搜索，从互联网获取最新信息'">
-          <img v-if="sendStatus" src="../assets/images/chat/stopChat.png" @click="handleStop"></img>
+            <span class="ws-icon">🌐</span>
            <span class="ws-text">联网搜索</span>
          </div>
          <div class="send-btn">
            <img v-if="textarea&&!sendStatus" style="width: 38px" src="../assets/images/writing/send-blue.png" @click="send('','0')">
            <img v-if="!textarea&&!sendStatus" src="../assets/images/writing/send-gray.png">
            <img v-if="sendStatus" src="../assets/images/chat/stopChat.png" @click="handleStop">
          </div>
        </div>
      </div>
@@ -76,6 +94,8 @@ const clearQuote = () => {
 }
 //const title = inject('aiboxTitle');
 const searchScope = ref<'file' | 'kb'>('file');
 const webSearchEnabled = ref(false);
 const textarea = ref("");
 const firstChat = ref(true);
 const sendStatus = ref(false);
@@ -198,12 +218,13 @@ const getFetchChatAPIProcess = async (type: string) => {
        headers: headers,
        signal: controller.signal,
        body: JSON.stringify({
-          fileNames: [selectedFile.value?.embeddingId],
+          fileNames: searchScope.value === 'file' ? [selectedFile.value?.embeddingId] : [],
          conversationId: conversationId.value,
          promptName: "default",
          knowledgeBaseIdList: [selectedFile.value?.folderId],
          chatType: type,
-          quote: quoteMsg.value
+          quote: quoteMsg.value,
          webSearch: webSearchEnabled.value
        }),
      }
  );
@@ -372,6 +393,7 @@ const loadChatHistory = async () => {
 // 监听文件切换，重新加载对话历史
 watch(() => selectedFile.value?.fileId, () => {
  searchScope.value = 'file';
  loadChatHistory();
 });
@@ -434,7 +456,7 @@ const handleStop = async () => {
 <style lang="less" scoped>
 .message-content {
-  height: calc(100% - 290px);
+  height: calc(100% - 320px);
  overflow-y: auto;
  padding: 20px;
@@ -505,28 +527,93 @@ const handleStop = async () => {
  }
 }
 .search-scope {
  display: flex;
  align-items: center;
  padding: 4px 12px;
  gap: 6px;
  .scope-label {
    font-size: 13px;
    color: #333;
    flex-shrink: 0;
  }
  .scope-option {
    display: flex;
    align-items: center;
    gap: 5px;
    padding: 6px 14px;
    border-radius: 14px;
    border: 1px solid #E0E0E0;
    cursor: pointer;
    font-size: 13px;
    color: #666;
    transition: all 0.2s;
    max-width: 45%;
    overflow: hidden;
    &:hover { border-color: #004EA0; color: #004EA0; }
    &.active { border-color: #004EA0; color: #fff; background: #004EA0; }
    .scope-icon { font-size: 14px; flex-shrink: 0; }
    .scope-name { overflow: hidden; text-overflow: ellipsis; white-space: nowrap; }
  }
 }
 .text-box {
  //width: 100%;
  height: 190px;
  background: #FFFFFF;
  border-radius: 8px;
  border: 1px solid #D5DDFF;
  margin: 12px 20px 12px 20px;
  display: flex;
  flex-direction: column;
  .box-textarea {
    outline: none;
    border: none;
    resize: none;
    width: 100%;
-    height: calc(100% - 54px);
+    flex: 1;
    padding: 16px;
    line-height: 24px;
    border-radius: 8px;
  }
  .text-box-bottom {
    display: flex;
    justify-content: space-between;
    align-items: center;
    padding: 6px 12px;
    .web-search-toggle {
      display: flex;
      align-items: center;
      gap: 5px;
      padding: 6px 14px;
      border-radius: 14px;
      border: 1px solid #E0E0E0;
      cursor: pointer;
      font-size: 13px;
      color: #999;
      transition: all 0.2s;
      user-select: none;
      &:hover { border-color: #10a37f; color: #10a37f; }
      &.active { border-color: #10a37f; color: #fff; background: #10a37f; }
      .ws-icon { font-size: 14px; }
      .ws-text { font-size: 13px; }
    }
    .send-btn {
      img { width: 38px; cursor: pointer; }
    }
  }
  img {
-    cursor:pointer;
+    cursor: pointer;
    float: right;
    margin-right: 16px;
  }
  .quote-box {
--- a/langchain-chat/server/chat/ZhipuSearchAPI.py
+++ b/langchain-chat/server/chat/ZhipuSearchAPI.py
@@ -71,10 +71,7 @@ class ZhipuSearchAPIWrapper:
            logging.info(f"Zhipu检索内容:{search_query}")
            url = "http://ywk3hvt4d:01Jp2V1tR9PdTsYSz919779Rb9_@134.122.191.214/search"
-            if "天气" in search_query:
+            engines = "duckduckgo,bing"
                engines = "google"
            else:
                engines = "baidu"
            data = {
                "format":"json",
                "q":search_query,
--- a/langchain-chat/server/chat/self_kb_chat.py
+++ b/langchain-chat/server/chat/self_kb_chat.py
@@ -50,6 +50,7 @@ async def self_kb_chat(
                            "content": "虎头虎脑"}]]
                    ),
                    stream: bool = Body(True, description="流式输出"),
                    web_search: bool = Body(False, description="是否开启联网搜索"),
                    ):
    """
        个人知识库对话api\n
@@ -149,7 +150,29 @@ async def self_kb_chat(
        except Exception as e:
            logger.error(f"个人知识库问答路由错误: {self_kb_route}", exc_info=True)
            docs = []
-        logger.info(f"个人知识库问答source_documents: {docs}")
+        logger.info(f"个人知识库问答source_documents: {len(docs)}条")
        # 联网搜索
        web_search_context = ""
        web_search_results = []  # 保存搜索结果供后面引用
        if web_search:
            try:
                from server.chat.ZhipuSearchAPI import ZhipuSearchAPIWrapper
                searcher = ZhipuSearchAPIWrapper()
                web_results = searcher.zhipu_search(search_query)
                web_search_results = web_results[:5] if web_results else []
                if web_results:
                    web_parts = []
                    for i, r in enumerate(web_results[:5], 1):
                        title = r.get("title", "")
                        content = r.get("content", "")[:300]
                        url = r.get("url", "")
                        web_parts.append(f"[{i}] {title}\n{content}\n来源: {url}")
                    web_search_context = "\n\n【联网搜索结果】\n" + "\n\n".join(web_parts)
                    logger.info(f"联网搜索获取到 {len(web_results)} 条结果")
            except Exception as e:
                logger.error(f"联网搜索失败: {e}")
        # if SELF_USE_RERANKER:
        #     reranker_model_path = MODEL_PATH["reranker"].get(RERANKER_MODEL,"BAAI/bge-reranker-large")
        #     print("-----------------model path------------------")
@@ -184,25 +207,28 @@ async def self_kb_chat(
        if '0' in self_kb_route:
            context = "\n".join([doc.page_content for doc in docs]).strip("xa0")
            logger.info(f"个人知识库问答 context 长度：{len(context)}")
-            # context_70 = context if len(context)<30000 else TextRank(context,num_sentences=70)
+            context = context[:30000] if len(context)>30000 else context
-            context = context[:40000] if len(context)>40000 else context
+            if web_search_context:
-            logger.info(f"截取后个人知识库问答 context 长度：{len(context)}")
+                context += web_search_context
            logger.info(f"最终 context 长度：{len(context)}")
            if history:
                history = history if len(history) < 20000 else TextRank(history,num_sentences=1)
            # logger.info(f"个人知识库问答 context 长度超过 30000，使用 TextRank 算法进行降维得到 context 长度：{len(context)}")
            chain = LLMChain(prompt=chat_prompt, llm=model1, verbose=True)
            task = asyncio.create_task(wrap_done(
                chain.acall({"context": context, "question": query, "history": history, "quote": quote, "fileName":fileNames}),
                callback.done),
            )
        elif '1' in self_kb_route:
            # 联网搜索结果作为额外文档加入
            if web_search_context:
                from langchain.docstore.document import Document as LCDocument
                docs.append(LCDocument(page_content=web_search_context, metadata={"source": "web_search"}))
            chain = load_qa_chain(
                model,
                chain_type="stuff",
                prompt=chat_prompt,
                verbose=True
            )
            # Begin a task that runs in the background.
            task = asyncio.create_task(wrap_done(
                chain.acall({"input_documents": docs, "question": query, "history": history, "quote": quote, "fileName":fileNames}),
                callback.done),
@@ -235,14 +261,18 @@ async def self_kb_chat(
            yield json.dumps(response, ensure_ascii=False)
        await task
        source_documents = []
-        if len(docs) == 0:  # 没有找到相关文档
+        if len(docs) == 0 and not web_search_context:
            source_documents.append(f"""暂未从本篇文献中找到答案,该回答为大模型自身能力解答！""")
        else:
-            # 去除文件扩展名
+            if len(docs) > 0:
-            # fileNames_without_ext = [name.rsplit('.', 1)[0] for name in fileNames]
+                source_documents.append(f"""[{len(source_documents) + 1}] [{docs[0].metadata.get("source")}]()\n""")
-            # 连接文件名（如果有多个文件名）
+            # 联网搜索结果链接
-            # joined_fileNames = ', '.join(fileNames_without_ext)
+            if web_search_results:
-            source_documents.append(f"""[{len(source_documents) + 1}] [{docs[0].metadata.get("source")}]()\n""")    
+                for r in web_search_results:
                    title = r.get("title", "").replace("\n", "")
                    url = r.get("url", "")
                    if title and url:
                        source_documents.append(f"""[{len(source_documents) + 1}] [{title}]({url})\n""")
        yield json.dumps({"docs": source_documents}, ensure_ascii=False) 
    return EventSourceResponse(knowledge_base_chat_iterator(query))