import asyncio import re import json import logging import requests from pydantic import BaseModel, Field from server.chat import utils logging.basicConfig(level=logging.INFO, format='%(asctime)s [%(levelname)s] %(message)s') logger = logging.getLogger(__name__) # 新接口:内网 searxng 服务(原 43.251.225.121 已下线) # aiohttp 与该 searxng 配合会 30s 超时(疑似 header/UA 被拦),所以改用 requests。 SEARXNG_URL = 'http://118.196.92.255/searxng/search' SEARXNG_HEADERS = {'User-Agent': 'Mozilla/5.0 (X11; Linux x86_64) gangyan-langchain'} def _searxng_results_to_items(results, mapping, limit): """把 searxng 统一的 {url,title,content} 映射成老接口期望的字段格式""" out = [] for r in results[:limit]: title = r.get('title', '') or '' url = r.get('url', '') or '' content = r.get('content', '') or '' item = {} for dst_key, src in mapping.items(): if src == 'title': item[dst_key] = title elif src == 'url': item[dst_key] = url elif src == 'content': item[dst_key] = content out.append(item) return out def _sync_fetch(params, limit_n, kind): logger.info(f"searxng 请求 {kind}: params={params}") try: r = requests.get(SEARXNG_URL, params=params, headers=SEARXNG_HEADERS, timeout=15) if r.status_code != 200: logger.error(f"searxng {kind} HTTP {r.status_code}") return [] data = r.json() results = data.get('results', []) if isinstance(data, dict) else [] logger.info(f"searxng {kind} 条数: {len(results)}") if kind == 'text': return _searxng_results_to_items(results, {'title': 'title', 'href': 'url', 'body': 'content'}, limit_n) if kind == 'video': return _searxng_results_to_items(results, {'title': 'title', 'content': 'url', 'description': 'content'}, limit_n) if kind == 'news': return _searxng_results_to_items(results, {'title': 'title', 'url': 'url', 'body': 'content'}, limit_n) return [] except Exception as e: logger.error(f"searxng {kind} 请求异常: {type(e).__name__}: {e}") return [] async def duckduckgo_search_iter(query: str, uuid: str = "", time: str = "", resource_type: str = None, limit: int = 3): logger.info("发起 searxng 搜索请求...") # 三类按 limit 平均分配 n = limit % 3 if n == 0: limit1 = limit2 = limit3 = limit // 3 elif n == 1: limit1 = limit // 3 + 1 limit2 = limit3 = limit // 3 else: limit1 = limit2 = limit // 3 + 1 limit3 = limit // 3 if resource_type is None or resource_type != 'video': text_task = asyncio.to_thread(_sync_fetch, {'q': query, 'format': 'json', 'categories': 'general'}, limit1, 'text') news_task = asyncio.to_thread(_sync_fetch, {'q': query, 'format': 'json', 'categories': 'news'}, limit2, 'news') video_task = asyncio.to_thread(_sync_fetch, {'q': query, 'format': 'json', 'categories': 'videos'}, limit3, 'video') text_result, news_result, video_result = await asyncio.gather(text_task, news_task, video_task) combined_result = { "text": text_result, "video": video_result, "news": news_result, } else: video_result = await asyncio.to_thread(_sync_fetch, {'q': query, 'format': 'json', 'categories': 'videos'}, limit, 'video') combined_result = {"video": video_result} logger.info("searxng 请求已完成") res = [] source = [] info = utils.get_shared_variable(uuid) index = info["num"] if "text" in combined_result: for item in combined_result["text"]: index += 1 res.append(f'资料[{index}] 资料标题{item["title"]}({item["href"]}) 资料内容为: {item["body"]}') source.append(f'资料[{index}] [{item["title"]}]({item["href"]})') if "video" in combined_result: for item in combined_result["video"]: index += 1 res.append(f'资料[{index}] 视频标题[{item["title"]}]({item["content"]}) 视频内容为: {item["description"]}') source.append(f'视频资料[{index}] [{item["title"]}]({item["content"]})') if "news" in combined_result: for item in combined_result["news"]: index += 1 res.append(f'资料[{index}] 新闻标题[{item["title"]}]({item["url"]}) 新闻内容为: {item["body"]}') source.append(f'资料[{index}] [{item["title"]}]({item["url"]})') info["source_docs"].extend(source) utils.set_shared_variable(uuid, info) return res, source def duckduckgo_search(query: str, time: str = "", resource_type: str = None): logger.info(f"模型输入: {query}") matches = re.findall(r'\{.*?\}', query) if len(matches) >= 2: query = matches[0] else: return "<关键指令>不需要再调用该工具了" parsed_uuid = "" parsed_limit = 3 try: obj1 = json.loads(query) parsed_query = obj1.get("query", "") parsed_limit = obj1.get("limit", 3) parsed_resource_type = obj1.get("resource_type", None) parsed_time = obj1.get("time", time) parsed_uuid = json.loads(matches[1])["uuid"] query = parsed_query if parsed_query else query resource_type = parsed_resource_type if parsed_resource_type else resource_type time = parsed_time if parsed_time else time logger.info(f"解析完成,query: {query}, uuid: {parsed_uuid}, time: {time}, resource_type: {resource_type}, parsed_limit: {parsed_limit}") except json.JSONDecodeError as e: logger.error(f"解析JSON出错: {e}") combined_result = asyncio.run(duckduckgo_search_iter(query, parsed_uuid, time, resource_type, parsed_limit)) logger.info("返回JSON格式的结果给到模型...") return combined_result class DuckduckgoInput(BaseModel): location: str = Field(description="网络搜索查询") if __name__ == "__main__": result_other = duckduckgo_search('{"query":"粉末冶金","limit":3}{"uuid":"test-uuid"}', "m", "other") print("searxng输出(其他):\n", result_other)