[运维] 接入内网 searxng + 清理启动脚本 + 修 log-trim 权限

搜索接口： - duckduckgo_search.py / ZhipuSearchAPI.py 切换到内网 searxng (原 43.251.225.121 / 134.122.191.214 已失效) 启动脚本清理： - 删除废弃 backend/ 目录 (与 chat_web_backend/ 编译产物 jar MD5 相同，仅是改名副本) - 删除 start_all.sh 与 langchain-chat/{start,stop,stop_quick,shutdown_all,restart}.sh (被 scripts/*-restart.sh 覆盖) - 删除 chat_web_backend/{start,test_mysql}.sh 修复： - scripts/backend-restart.sh 对齐当前实际在跑的 chat_web_backend.jar (profile=dev) - scripts/log-trim-daemon.sh 把 LOCK 移到 /tmp 按用户命名，修复非首次用户跑时的 Permission denied 新增： - scripts/start-all.sh：一键启动入口，串联 mysql/redis/milvus/langchain/backend/frontend，含端口自检 - chat_web_backend/application-local.yml.archived：原 backend/ 下 yj profile 覆盖配置的归档备份其他： - .gitignore 忽略 scripts/pptist-deploy/PPTist/ (323M 第三方源码树)
2026-04-20 15:59:11 +08:00
parent 279b104434
commit 0c3a393d04
18 changed files with 222 additions and 585 deletions
--- a/langchain-chat/server/agent/tools/duckduckgo_search.py
+++ b/langchain-chat/server/agent/tools/duckduckgo_search.py
@@ -1,186 +1,148 @@
 import asyncio
 import re
-import aiohttp
 import json
 import logging
+import requests
 from pydantic import BaseModel, Field

 from server.chat import utils

-# 配置日志记录器
 logging.basicConfig(level=logging.INFO, format='%(asctime)s [%(levelname)s] %(message)s')
 logger = logging.getLogger(__name__)

-async def duckduckgo_search_iter(query: str, uuid: str = "",time: str = "", resource_type: str = None, limit: int = 3):
-    # 定义三个API的URL
-    text_url = 'http://43.251.225.121/inspur/search_text'
-    video_url = 'http://43.251.225.121/inspur/search_video'
-    news_url = 'http://43.251.225.121/inspur/search_new'
+# 新接口：内网 searxng 服务（原 43.251.225.121 已下线）
+# aiohttp 与该 searxng 配合会 30s 超时（疑似 header/UA 被拦），所以改用 requests。
+SEARXNG_URL = 'http://118.196.92.255/searxng/search'
+SEARXNG_HEADERS = {'User-Agent': 'Mozilla/5.0 (X11; Linux x86_64) gangyan-langchain'}

-    payload = {
-        "query": query,
-        "time": time
-    }

-    async def fetch(session, url, json_payload,limit):
-        logger.info(f"从 {url} 获取数据，请求参数: {json_payload}")
-        try:
-            json_payload["limit"] = limit
-            async with session.post(url, json=json_payload) as response:
-                if response.status != 200:
-                    logger.error(f"向 {url} 请求失败，状态码 {response.status}")
-                data = await response.json()
-                logger.info(f"从 {url} 获取的资料数: {len(data) if isinstance(data, list) else '未知'}")
-                return data
-        except Exception as e:
-            logger.error(f"获取 {url} 数据时发生错误: {e}")
+def _searxng_results_to_items(results, mapping, limit):
+    """把 searxng 统一的 {url,title,content} 映射成老接口期望的字段格式"""
+    out = []
+    for r in results[:limit]:
+        title = r.get('title', '') or ''
+        url = r.get('url', '') or ''
+        content = r.get('content', '') or ''
+        item = {}
+        for dst_key, src in mapping.items():
+            if src == 'title':
+                item[dst_key] = title
+            elif src == 'url':
+                item[dst_key] = url
+            elif src == 'content':
+                item[dst_key] = content
+        out.append(item)
+    return out
+
+
+def _sync_fetch(params, limit_n, kind):
+    logger.info(f"searxng 请求 {kind}: params={params}")
+    try:
+        r = requests.get(SEARXNG_URL, params=params, headers=SEARXNG_HEADERS, timeout=15)
+        if r.status_code != 200:
+            logger.error(f"searxng {kind} HTTP {r.status_code}")
            return []
+        data = r.json()
+        results = data.get('results', []) if isinstance(data, dict) else []
+        logger.info(f"searxng {kind} 条数: {len(results)}")
+        if kind == 'text':
+            return _searxng_results_to_items(results, {'title': 'title', 'href': 'url', 'body': 'content'}, limit_n)
+        if kind == 'video':
+            return _searxng_results_to_items(results, {'title': 'title', 'content': 'url', 'description': 'content'}, limit_n)
+        if kind == 'news':
+            return _searxng_results_to_items(results, {'title': 'title', 'url': 'url', 'body': 'content'}, limit_n)
+        return []
+    except Exception as e:
+        logger.error(f"searxng {kind} 请求异常: {type(e).__name__}: {e}")
+        return []

-    # 根据 resource_type 确定要请求的 API
-    # 默认并发请求三个API
-    # 视频只请求 video_url
-    # 新闻只请求 news_url
-    # 其他类型只请求 text_url
-    async with aiohttp.ClientSession() as session:
-        logger.info("发起请求duckduckgo...")

-        n = limit % 3
-        limit1 = 0
-        limit2 = 0 
-        limit3 = 0
-        match n:
-            case 0:
-                limit1 = limit//3
-                limit2 = limit1
-                limit3 = limit1
-            case 1:
-                limit1 = limit//3 +1
-                limit2 = limit//3
-                limit3 = limit2
-            case 2:
-                limit1 = limit//3 +1
-                limit2 = limit1
-                limit2 = limit
+async def duckduckgo_search_iter(query: str, uuid: str = "", time: str = "", resource_type: str = None, limit: int = 3):
+    logger.info("发起 searxng 搜索请求...")

-        if resource_type is None or not resource_type == 'video':
-            text_task = asyncio.create_task(fetch(session, text_url, payload,limit1))
-            video_task = asyncio.create_task(fetch(session, video_url, payload, limit3))
-            news_task = asyncio.create_task(fetch(session, news_url, payload, limit2))
-            text_result, video_result, news_result = await asyncio.gather(text_task, video_task, news_task)
-            logger.info("合并结果...")
-            
-            logger.info("合并结果完成")
-            combined_result = {
-                "text": text_result,
-                "video": video_result,
-                "news": news_result
-            }
+    # 三类按 limit 平均分配
+    n = limit % 3
+    if n == 0:
+        limit1 = limit2 = limit3 = limit // 3
+    elif n == 1:
+        limit1 = limit // 3 + 1
+        limit2 = limit3 = limit // 3
+    else:
+        limit1 = limit2 = limit // 3 + 1
+        limit3 = limit // 3

-        else:
-            video_result = await fetch(session, video_url, payload, limit)
-            combined_result = {
-                "video": video_result
-            }
-        del limit1,limit2,limit3
-        # elif resource_type == 'news':
-        #     news_result = await fetch(session, news_url, payload)
-        #     combined_result = {
-        #         "news": news_result
-        #     }
+    if resource_type is None or resource_type != 'video':
+        text_task = asyncio.to_thread(_sync_fetch, {'q': query, 'format': 'json', 'categories': 'general'}, limit1, 'text')
+        news_task = asyncio.to_thread(_sync_fetch, {'q': query, 'format': 'json', 'categories': 'news'}, limit2, 'news')
+        video_task = asyncio.to_thread(_sync_fetch, {'q': query, 'format': 'json', 'categories': 'videos'}, limit3, 'video')
+        text_result, news_result, video_result = await asyncio.gather(text_task, news_task, video_task)
+        combined_result = {
+            "text": text_result,
+            "video": video_result,
+            "news": news_result,
+        }
+    else:
+        video_result = await asyncio.to_thread(_sync_fetch, {'q': query, 'format': 'json', 'categories': 'videos'}, limit, 'video')
+        combined_result = {"video": video_result}

-        # else:  # 其他类型
-        #     text_result = await fetch(session, text_url, payload)
-        #     combined_result = {
-        #         "text": text_result
-        #     }
+    logger.info("searxng 请求已完成")

-        logger.info("请求已完成")
-        res = []
-        source = []
-        info = utils.get_shared_variable(uuid)
-        index = info["num"]
-        if "text" in combined_result:
-            for item in combined_result["text"]:
-                index += 1
-                res.append(f'资料[{index}] 资料标题{item["title"]}({item["href"]}) 资料内容为: {item["body"]}')
-                source.append(f'资料[{index}] [{item["title"]}]({item["href"]})')
-        if "video" in combined_result:
-            for item in combined_result["video"]:
-                index += 1
-                res.append(f'资料[{index}] 视频标题[{item["title"]}]({item["content"]}) 视频内容为: {item["description"]}')
-                source.append(f'视频资料[{index}] [{item["title"]}]({item["content"]})')
-        if "news" in combined_result:
-            for item in combined_result["news"]:
-                index += 1
-                res.append(f'资料[{index}] 新闻标题[{item["title"]}]({item["url"]}) 新闻内容为: {item["body"]}')
-                source.append(f'资料[{index}] [{item["title"]}]({item["url"]})')
-        info["source_docs"].extend(source)
-        utils.set_shared_variable(uuid, info)
-    return res,source
+    res = []
+    source = []
+    info = utils.get_shared_variable(uuid)
+    index = info["num"]
+    if "text" in combined_result:
+        for item in combined_result["text"]:
+            index += 1
+            res.append(f'资料[{index}] 资料标题{item["title"]}({item["href"]}) 资料内容为: {item["body"]}')
+            source.append(f'资料[{index}] [{item["title"]}]({item["href"]})')
+    if "video" in combined_result:
+        for item in combined_result["video"]:
+            index += 1
+            res.append(f'资料[{index}] 视频标题[{item["title"]}]({item["content"]}) 视频内容为: {item["description"]}')
+            source.append(f'视频资料[{index}] [{item["title"]}]({item["content"]})')
+    if "news" in combined_result:
+        for item in combined_result["news"]:
+            index += 1
+            res.append(f'资料[{index}] 新闻标题[{item["title"]}]({item["url"]}) 新闻内容为: {item["body"]}')
+            source.append(f'资料[{index}] [{item["title"]}]({item["url"]})')
+    info["source_docs"].extend(source)
+    utils.set_shared_variable(uuid, info)
+    return res, source


 def duckduckgo_search(query: str, time: str = "", resource_type: str = None):
-    logger.info(f"模型输入: {query}")   
-    # 对传入的 query 字段进行解析
-    # 判断 query 是否包含 "}{"
-    # if "}{" in query:
-    #     # 将 query 分割为两个JSON字符串
-    #     split_index = query.find("}{")
-    #     json_part1 = query[:split_index+1]
-    #     json_part2 = query[split_index+1:]
-        
-    #     try:
-    #         obj1 = json.loads(json_part1)
-    #         obj2 = json.loads(json_part2)
-            
-    #         # 提取 query, resource_type, time, uuid
-    #         parsed_query = obj1.get("query", "")
-    #         parsed_resource_type = obj1.get("resource_type", None)
-    #         parsed_time = obj1.get("time", time)  # 如obj1未包含time则使用传入的默认值
-    #         parsed_uuid = obj2.get("uuid", "")
+    logger.info(f"模型输入: {query}")
    matches = re.findall(r'\{.*?\}', query)
-    if len(matches)>=2:
+    if len(matches) >= 2:
        query = matches[0]
    else:
        return "<关键指令>不需要再调用该工具了</关键指令>"
+    parsed_uuid = ""
+    parsed_limit = 3
    try:
-        obj1= json.loads(query)
+        obj1 = json.loads(query)
        parsed_query = obj1.get("query", "")
        parsed_limit = obj1.get("limit", 3)
        parsed_resource_type = obj1.get("resource_type", None)
-        parsed_time = obj1.get("time", time)  # 如obj1未包含time则使用传入的默认值
+        parsed_time = obj1.get("time", time)
        parsed_uuid = json.loads(matches[1])["uuid"]
-        # 将解析到的值覆盖原有的参数
        query = parsed_query if parsed_query else query
        resource_type = parsed_resource_type if parsed_resource_type else resource_type
        time = parsed_time if parsed_time else time
-                
        logger.info(f"解析完成，query: {query}, uuid: {parsed_uuid}, time: {time}, resource_type: {resource_type}, parsed_limit: {parsed_limit}")
    except json.JSONDecodeError as e:
        logger.error(f"解析JSON出错: {e}")

-    # 在同步环境中运行异步函数
    combined_result = asyncio.run(duckduckgo_search_iter(query, parsed_uuid, time, resource_type, parsed_limit))
-    # 以标准json格式输出
    logger.info("返回JSON格式的结果给到模型...")
    return combined_result
+
+
 class DuckduckgoInput(BaseModel):
    location: str = Field(description="网络搜索查询")

+
 if __name__ == "__main__":
-    # 测试调用
-    # 1. 默认请求三个API
-    # result_default = duckduckgo_search("粉末冶金", "m", "default")
-    # print("duckduckgo输出(默认):\n", result_default)
-
-    # # 2. 只请求视频
-    # result_video = duckduckgo_search("粉末冶金", "m", "video")
-    # print("duckduckgo输出(视频):\n", result_video)
-
-    # # 3. 只请求新闻
-    # result_news = duckduckgo_search("粉末冶金", "m", "news")
-    # print("duckduckgo输出(新闻):\n", result_news)
-
-    # 4. 其它类型只请求文本
-    result_other = duckduckgo_search("粉末冶金", "m", "other")
-    print("duckduckgo输出(其他):\n", result_other)
+    result_other = duckduckgo_search('{"query":"粉末冶金","limit":3}{"uuid":"test-uuid"}', "m", "other")
+    print("searxng输出(其他):\n", result_other)