2026-04-02 11:36:05 +08:00
|
|
|
|
import asyncio
|
|
|
|
|
|
import re
|
|
|
|
|
|
import json
|
|
|
|
|
|
import logging
|
2026-04-20 15:59:11 +08:00
|
|
|
|
import requests
|
2026-04-02 11:36:05 +08:00
|
|
|
|
from pydantic import BaseModel, Field
|
|
|
|
|
|
|
|
|
|
|
|
from server.chat import utils
|
|
|
|
|
|
|
|
|
|
|
|
logging.basicConfig(level=logging.INFO, format='%(asctime)s [%(levelname)s] %(message)s')
|
|
|
|
|
|
logger = logging.getLogger(__name__)
|
|
|
|
|
|
|
2026-04-20 15:59:11 +08:00
|
|
|
|
# 新接口:内网 searxng 服务(原 43.251.225.121 已下线)
|
|
|
|
|
|
# aiohttp 与该 searxng 配合会 30s 超时(疑似 header/UA 被拦),所以改用 requests。
|
|
|
|
|
|
SEARXNG_URL = 'http://118.196.92.255/searxng/search'
|
|
|
|
|
|
SEARXNG_HEADERS = {'User-Agent': 'Mozilla/5.0 (X11; Linux x86_64) gangyan-langchain'}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def _searxng_results_to_items(results, mapping, limit):
|
|
|
|
|
|
"""把 searxng 统一的 {url,title,content} 映射成老接口期望的字段格式"""
|
|
|
|
|
|
out = []
|
|
|
|
|
|
for r in results[:limit]:
|
|
|
|
|
|
title = r.get('title', '') or ''
|
|
|
|
|
|
url = r.get('url', '') or ''
|
|
|
|
|
|
content = r.get('content', '') or ''
|
|
|
|
|
|
item = {}
|
|
|
|
|
|
for dst_key, src in mapping.items():
|
|
|
|
|
|
if src == 'title':
|
|
|
|
|
|
item[dst_key] = title
|
|
|
|
|
|
elif src == 'url':
|
|
|
|
|
|
item[dst_key] = url
|
|
|
|
|
|
elif src == 'content':
|
|
|
|
|
|
item[dst_key] = content
|
|
|
|
|
|
out.append(item)
|
|
|
|
|
|
return out
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def _sync_fetch(params, limit_n, kind):
|
|
|
|
|
|
logger.info(f"searxng 请求 {kind}: params={params}")
|
|
|
|
|
|
try:
|
|
|
|
|
|
r = requests.get(SEARXNG_URL, params=params, headers=SEARXNG_HEADERS, timeout=15)
|
|
|
|
|
|
if r.status_code != 200:
|
|
|
|
|
|
logger.error(f"searxng {kind} HTTP {r.status_code}")
|
2026-04-02 11:36:05 +08:00
|
|
|
|
return []
|
2026-04-20 15:59:11 +08:00
|
|
|
|
data = r.json()
|
|
|
|
|
|
results = data.get('results', []) if isinstance(data, dict) else []
|
|
|
|
|
|
logger.info(f"searxng {kind} 条数: {len(results)}")
|
|
|
|
|
|
if kind == 'text':
|
|
|
|
|
|
return _searxng_results_to_items(results, {'title': 'title', 'href': 'url', 'body': 'content'}, limit_n)
|
|
|
|
|
|
if kind == 'video':
|
|
|
|
|
|
return _searxng_results_to_items(results, {'title': 'title', 'content': 'url', 'description': 'content'}, limit_n)
|
|
|
|
|
|
if kind == 'news':
|
|
|
|
|
|
return _searxng_results_to_items(results, {'title': 'title', 'url': 'url', 'body': 'content'}, limit_n)
|
|
|
|
|
|
return []
|
|
|
|
|
|
except Exception as e:
|
|
|
|
|
|
logger.error(f"searxng {kind} 请求异常: {type(e).__name__}: {e}")
|
|
|
|
|
|
return []
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
async def duckduckgo_search_iter(query: str, uuid: str = "", time: str = "", resource_type: str = None, limit: int = 3):
|
|
|
|
|
|
logger.info("发起 searxng 搜索请求...")
|
|
|
|
|
|
|
|
|
|
|
|
# 三类按 limit 平均分配
|
|
|
|
|
|
n = limit % 3
|
|
|
|
|
|
if n == 0:
|
|
|
|
|
|
limit1 = limit2 = limit3 = limit // 3
|
|
|
|
|
|
elif n == 1:
|
|
|
|
|
|
limit1 = limit // 3 + 1
|
|
|
|
|
|
limit2 = limit3 = limit // 3
|
|
|
|
|
|
else:
|
|
|
|
|
|
limit1 = limit2 = limit // 3 + 1
|
|
|
|
|
|
limit3 = limit // 3
|
|
|
|
|
|
|
|
|
|
|
|
if resource_type is None or resource_type != 'video':
|
|
|
|
|
|
text_task = asyncio.to_thread(_sync_fetch, {'q': query, 'format': 'json', 'categories': 'general'}, limit1, 'text')
|
|
|
|
|
|
news_task = asyncio.to_thread(_sync_fetch, {'q': query, 'format': 'json', 'categories': 'news'}, limit2, 'news')
|
|
|
|
|
|
video_task = asyncio.to_thread(_sync_fetch, {'q': query, 'format': 'json', 'categories': 'videos'}, limit3, 'video')
|
|
|
|
|
|
text_result, news_result, video_result = await asyncio.gather(text_task, news_task, video_task)
|
|
|
|
|
|
combined_result = {
|
|
|
|
|
|
"text": text_result,
|
|
|
|
|
|
"video": video_result,
|
|
|
|
|
|
"news": news_result,
|
|
|
|
|
|
}
|
|
|
|
|
|
else:
|
|
|
|
|
|
video_result = await asyncio.to_thread(_sync_fetch, {'q': query, 'format': 'json', 'categories': 'videos'}, limit, 'video')
|
|
|
|
|
|
combined_result = {"video": video_result}
|
|
|
|
|
|
|
|
|
|
|
|
logger.info("searxng 请求已完成")
|
|
|
|
|
|
|
|
|
|
|
|
res = []
|
|
|
|
|
|
source = []
|
|
|
|
|
|
info = utils.get_shared_variable(uuid)
|
|
|
|
|
|
index = info["num"]
|
|
|
|
|
|
if "text" in combined_result:
|
|
|
|
|
|
for item in combined_result["text"]:
|
|
|
|
|
|
index += 1
|
|
|
|
|
|
res.append(f'资料[{index}] 资料标题{item["title"]}({item["href"]}) 资料内容为: {item["body"]}')
|
|
|
|
|
|
source.append(f'资料[{index}] [{item["title"]}]({item["href"]})')
|
|
|
|
|
|
if "video" in combined_result:
|
|
|
|
|
|
for item in combined_result["video"]:
|
|
|
|
|
|
index += 1
|
|
|
|
|
|
res.append(f'资料[{index}] 视频标题[{item["title"]}]({item["content"]}) 视频内容为: {item["description"]}')
|
|
|
|
|
|
source.append(f'视频资料[{index}] [{item["title"]}]({item["content"]})')
|
|
|
|
|
|
if "news" in combined_result:
|
|
|
|
|
|
for item in combined_result["news"]:
|
|
|
|
|
|
index += 1
|
|
|
|
|
|
res.append(f'资料[{index}] 新闻标题[{item["title"]}]({item["url"]}) 新闻内容为: {item["body"]}')
|
|
|
|
|
|
source.append(f'资料[{index}] [{item["title"]}]({item["url"]})')
|
|
|
|
|
|
info["source_docs"].extend(source)
|
|
|
|
|
|
utils.set_shared_variable(uuid, info)
|
|
|
|
|
|
return res, source
|
2026-04-02 11:36:05 +08:00
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def duckduckgo_search(query: str, time: str = "", resource_type: str = None):
|
2026-04-20 15:59:11 +08:00
|
|
|
|
logger.info(f"模型输入: {query}")
|
2026-04-02 11:36:05 +08:00
|
|
|
|
matches = re.findall(r'\{.*?\}', query)
|
2026-04-20 15:59:11 +08:00
|
|
|
|
if len(matches) >= 2:
|
2026-04-02 11:36:05 +08:00
|
|
|
|
query = matches[0]
|
|
|
|
|
|
else:
|
|
|
|
|
|
return "<关键指令>不需要再调用该工具了</关键指令>"
|
2026-04-20 15:59:11 +08:00
|
|
|
|
parsed_uuid = ""
|
|
|
|
|
|
parsed_limit = 3
|
2026-04-02 11:36:05 +08:00
|
|
|
|
try:
|
2026-04-20 15:59:11 +08:00
|
|
|
|
obj1 = json.loads(query)
|
2026-04-02 11:36:05 +08:00
|
|
|
|
parsed_query = obj1.get("query", "")
|
|
|
|
|
|
parsed_limit = obj1.get("limit", 3)
|
|
|
|
|
|
parsed_resource_type = obj1.get("resource_type", None)
|
2026-04-20 15:59:11 +08:00
|
|
|
|
parsed_time = obj1.get("time", time)
|
2026-04-02 11:36:05 +08:00
|
|
|
|
parsed_uuid = json.loads(matches[1])["uuid"]
|
|
|
|
|
|
query = parsed_query if parsed_query else query
|
|
|
|
|
|
resource_type = parsed_resource_type if parsed_resource_type else resource_type
|
|
|
|
|
|
time = parsed_time if parsed_time else time
|
|
|
|
|
|
logger.info(f"解析完成,query: {query}, uuid: {parsed_uuid}, time: {time}, resource_type: {resource_type}, parsed_limit: {parsed_limit}")
|
|
|
|
|
|
except json.JSONDecodeError as e:
|
|
|
|
|
|
logger.error(f"解析JSON出错: {e}")
|
|
|
|
|
|
|
|
|
|
|
|
combined_result = asyncio.run(duckduckgo_search_iter(query, parsed_uuid, time, resource_type, parsed_limit))
|
|
|
|
|
|
logger.info("返回JSON格式的结果给到模型...")
|
|
|
|
|
|
return combined_result
|
2026-04-20 15:59:11 +08:00
|
|
|
|
|
|
|
|
|
|
|
2026-04-02 11:36:05 +08:00
|
|
|
|
class DuckduckgoInput(BaseModel):
|
|
|
|
|
|
location: str = Field(description="网络搜索查询")
|
|
|
|
|
|
|
2026-04-20 15:59:11 +08:00
|
|
|
|
|
2026-04-02 11:36:05 +08:00
|
|
|
|
if __name__ == "__main__":
|
2026-04-20 15:59:11 +08:00
|
|
|
|
result_other = duckduckgo_search('{"query":"粉末冶金","limit":3}{"uuid":"test-uuid"}', "m", "other")
|
|
|
|
|
|
print("searxng输出(其他):\n", result_other)
|