Files
gangyan/langchain-chat/server/agent/tools/duckduckgo_search.py
liuguancen 0c3a393d04 [运维] 接入内网 searxng + 清理启动脚本 + 修 log-trim 权限
搜索接口:

- duckduckgo_search.py / ZhipuSearchAPI.py 切换到内网 searxng (原 43.251.225.121 / 134.122.191.214 已失效)

启动脚本清理:

- 删除废弃 backend/ 目录 (与 chat_web_backend/ 编译产物 jar MD5 相同,仅是改名副本)

- 删除 start_all.sh 与 langchain-chat/{start,stop,stop_quick,shutdown_all,restart}.sh (被 scripts/*-restart.sh 覆盖)

- 删除 chat_web_backend/{start,test_mysql}.sh

修复:

- scripts/backend-restart.sh 对齐当前实际在跑的 chat_web_backend.jar (profile=dev)

- scripts/log-trim-daemon.sh 把 LOCK 移到 /tmp 按用户命名,修复非首次用户跑时的 Permission denied

新增:

- scripts/start-all.sh:一键启动入口,串联 mysql/redis/milvus/langchain/backend/frontend,含端口自检

- chat_web_backend/application-local.yml.archived:原 backend/ 下 yj profile 覆盖配置的归档备份

其他:

- .gitignore 忽略 scripts/pptist-deploy/PPTist/ (323M 第三方源码树)
2026-04-20 15:59:11 +08:00

149 lines
6.2 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

import asyncio
import re
import json
import logging
import requests
from pydantic import BaseModel, Field
from server.chat import utils
logging.basicConfig(level=logging.INFO, format='%(asctime)s [%(levelname)s] %(message)s')
logger = logging.getLogger(__name__)
# 新接口:内网 searxng 服务(原 43.251.225.121 已下线)
# aiohttp 与该 searxng 配合会 30s 超时(疑似 header/UA 被拦),所以改用 requests。
SEARXNG_URL = 'http://118.196.92.255/searxng/search'
SEARXNG_HEADERS = {'User-Agent': 'Mozilla/5.0 (X11; Linux x86_64) gangyan-langchain'}
def _searxng_results_to_items(results, mapping, limit):
"""把 searxng 统一的 {url,title,content} 映射成老接口期望的字段格式"""
out = []
for r in results[:limit]:
title = r.get('title', '') or ''
url = r.get('url', '') or ''
content = r.get('content', '') or ''
item = {}
for dst_key, src in mapping.items():
if src == 'title':
item[dst_key] = title
elif src == 'url':
item[dst_key] = url
elif src == 'content':
item[dst_key] = content
out.append(item)
return out
def _sync_fetch(params, limit_n, kind):
logger.info(f"searxng 请求 {kind}: params={params}")
try:
r = requests.get(SEARXNG_URL, params=params, headers=SEARXNG_HEADERS, timeout=15)
if r.status_code != 200:
logger.error(f"searxng {kind} HTTP {r.status_code}")
return []
data = r.json()
results = data.get('results', []) if isinstance(data, dict) else []
logger.info(f"searxng {kind} 条数: {len(results)}")
if kind == 'text':
return _searxng_results_to_items(results, {'title': 'title', 'href': 'url', 'body': 'content'}, limit_n)
if kind == 'video':
return _searxng_results_to_items(results, {'title': 'title', 'content': 'url', 'description': 'content'}, limit_n)
if kind == 'news':
return _searxng_results_to_items(results, {'title': 'title', 'url': 'url', 'body': 'content'}, limit_n)
return []
except Exception as e:
logger.error(f"searxng {kind} 请求异常: {type(e).__name__}: {e}")
return []
async def duckduckgo_search_iter(query: str, uuid: str = "", time: str = "", resource_type: str = None, limit: int = 3):
logger.info("发起 searxng 搜索请求...")
# 三类按 limit 平均分配
n = limit % 3
if n == 0:
limit1 = limit2 = limit3 = limit // 3
elif n == 1:
limit1 = limit // 3 + 1
limit2 = limit3 = limit // 3
else:
limit1 = limit2 = limit // 3 + 1
limit3 = limit // 3
if resource_type is None or resource_type != 'video':
text_task = asyncio.to_thread(_sync_fetch, {'q': query, 'format': 'json', 'categories': 'general'}, limit1, 'text')
news_task = asyncio.to_thread(_sync_fetch, {'q': query, 'format': 'json', 'categories': 'news'}, limit2, 'news')
video_task = asyncio.to_thread(_sync_fetch, {'q': query, 'format': 'json', 'categories': 'videos'}, limit3, 'video')
text_result, news_result, video_result = await asyncio.gather(text_task, news_task, video_task)
combined_result = {
"text": text_result,
"video": video_result,
"news": news_result,
}
else:
video_result = await asyncio.to_thread(_sync_fetch, {'q': query, 'format': 'json', 'categories': 'videos'}, limit, 'video')
combined_result = {"video": video_result}
logger.info("searxng 请求已完成")
res = []
source = []
info = utils.get_shared_variable(uuid)
index = info["num"]
if "text" in combined_result:
for item in combined_result["text"]:
index += 1
res.append(f'资料[{index}] 资料标题{item["title"]}({item["href"]}) 资料内容为: {item["body"]}')
source.append(f'资料[{index}] [{item["title"]}]({item["href"]})')
if "video" in combined_result:
for item in combined_result["video"]:
index += 1
res.append(f'资料[{index}] 视频标题[{item["title"]}]({item["content"]}) 视频内容为: {item["description"]}')
source.append(f'视频资料[{index}] [{item["title"]}]({item["content"]})')
if "news" in combined_result:
for item in combined_result["news"]:
index += 1
res.append(f'资料[{index}] 新闻标题[{item["title"]}]({item["url"]}) 新闻内容为: {item["body"]}')
source.append(f'资料[{index}] [{item["title"]}]({item["url"]})')
info["source_docs"].extend(source)
utils.set_shared_variable(uuid, info)
return res, source
def duckduckgo_search(query: str, time: str = "", resource_type: str = None):
logger.info(f"模型输入: {query}")
matches = re.findall(r'\{.*?\}', query)
if len(matches) >= 2:
query = matches[0]
else:
return "<关键指令>不需要再调用该工具了</关键指令>"
parsed_uuid = ""
parsed_limit = 3
try:
obj1 = json.loads(query)
parsed_query = obj1.get("query", "")
parsed_limit = obj1.get("limit", 3)
parsed_resource_type = obj1.get("resource_type", None)
parsed_time = obj1.get("time", time)
parsed_uuid = json.loads(matches[1])["uuid"]
query = parsed_query if parsed_query else query
resource_type = parsed_resource_type if parsed_resource_type else resource_type
time = parsed_time if parsed_time else time
logger.info(f"解析完成query: {query}, uuid: {parsed_uuid}, time: {time}, resource_type: {resource_type}, parsed_limit: {parsed_limit}")
except json.JSONDecodeError as e:
logger.error(f"解析JSON出错: {e}")
combined_result = asyncio.run(duckduckgo_search_iter(query, parsed_uuid, time, resource_type, parsed_limit))
logger.info("返回JSON格式的结果给到模型...")
return combined_result
class DuckduckgoInput(BaseModel):
location: str = Field(description="网络搜索查询")
if __name__ == "__main__":
result_other = duckduckgo_search('{"query":"粉末冶金","limit":3}{"uuid":"test-uuid"}', "m", "other")
print("searxng输出(其他):\n", result_other)