[运维] 接入内网 searxng + 清理启动脚本 + 修 log-trim 权限

搜索接口:

- duckduckgo_search.py / ZhipuSearchAPI.py 切换到内网 searxng (原 43.251.225.121 / 134.122.191.214 已失效)

启动脚本清理:

- 删除废弃 backend/ 目录 (与 chat_web_backend/ 编译产物 jar MD5 相同,仅是改名副本)

- 删除 start_all.sh 与 langchain-chat/{start,stop,stop_quick,shutdown_all,restart}.sh (被 scripts/*-restart.sh 覆盖)

- 删除 chat_web_backend/{start,test_mysql}.sh

修复:

- scripts/backend-restart.sh 对齐当前实际在跑的 chat_web_backend.jar (profile=dev)

- scripts/log-trim-daemon.sh 把 LOCK 移到 /tmp 按用户命名,修复非首次用户跑时的 Permission denied

新增:

- scripts/start-all.sh:一键启动入口,串联 mysql/redis/milvus/langchain/backend/frontend,含端口自检

- chat_web_backend/application-local.yml.archived:原 backend/ 下 yj profile 覆盖配置的归档备份

其他:

- .gitignore 忽略 scripts/pptist-deploy/PPTist/ (323M 第三方源码树)
This commit is contained in:
2026-04-20 15:59:11 +08:00
parent 279b104434
commit 0c3a393d04
18 changed files with 222 additions and 585 deletions

View File

@@ -1,99 +0,0 @@
#!/bin/bash
# 确保使用 bash 运行
if [ -z "$BASH_VERSION" ]; then
exec bash "$0" "$@"
fi
# 颜色定义
GREEN='\033[0;32m'
YELLOW='\033[1;33m'
RED='\033[0;31m'
NC='\033[0m' # No Color
# 打印带颜色的消息
print_yellow() { printf "${YELLOW}%s${NC}\n" "$1"; }
print_green() { printf "${GREEN}%s${NC}\n" "$1"; }
print_red() { printf "${RED}%s${NC}\n" "$1"; }
print_yellow "=== 停止 7861 和 8501 端口服务 ==="
for port in 7861 8501; do
pids=$(lsof -t -i:"$port" 2>/dev/null)
if [ -n "$pids" ]; then
print_yellow "正在停止端口 $port 的进程: $pids"
kill -9 $pids 2>/dev/null
print_green "端口 $port 已停止"
else
echo "端口 $port 无运行中的服务"
fi
done
# 也停止所有 startup.py 进程
pids=$(ps aux | grep "[p]ython.*startup.py -a" | awk '{print $2}')
if [ -n "$pids" ]; then
print_yellow "正在停止 startup.py 进程: $pids"
kill -9 $pids 2>/dev/null
print_green "startup.py 进程已停止"
fi
echo ""
print_yellow "=== 启动服务 ==="
cd /home/gc/gangyan/langchain-chat
# 初始化 conda确保 PATH 中包含 conda 路径)
CONDA_INIT="/root/miniconda3/etc/profile.d/conda.sh"
if [ -f "$CONDA_INIT" ]; then
# 初始化 conda将 conda 路径添加到 PATH
. "$CONDA_INIT" 2>/dev/null || source "$CONDA_INIT" 2>/dev/null
fi
# 查找 conda 可执行文件(按优先级)
if command -v conda &> /dev/null; then
CONDA_EXE="conda"
elif [ -f "/root/miniconda3/bin/conda" ]; then
CONDA_EXE="/root/miniconda3/bin/conda"
elif [ -f "/root/miniconda3/condabin/conda" ]; then
CONDA_EXE="/root/miniconda3/condabin/conda"
else
print_red "错误: 未找到 conda 命令"
print_red "请检查 conda 是否已安装: /root/miniconda3"
exit 1
fi
# 使用 conda 环境启动
print_yellow "使用环境: gangyan"
print_yellow "日志文件: nohup.out"
print_yellow "Conda路径: $CONDA_EXE"
# 获取 python 的完整路径
PYTHON_EXE="/root/miniconda3/envs/gangyan/bin/python"
if [ ! -f "$PYTHON_EXE" ]; then
# 尝试通过 conda run 获取路径
PYTHON_EXE="$($CONDA_EXE run -n gangyan which python 2>/dev/null)"
if [ -z "$PYTHON_EXE" ] || [ ! -f "$PYTHON_EXE" ]; then
print_red "错误: 无法找到 python 可执行文件"
print_red "请检查 conda 环境 gangyan 是否已安装"
exit 1
fi
fi
print_yellow "Python路径: $PYTHON_EXE"
# 直接使用 python 完整路径启动(不依赖 conda activate更可靠
# 设置 PYTHONPATH 确保能正确导入模块
export PYTHONPATH="/home/gc/gangyan/langchain-chat:$PYTHONPATH"
cd /home/gc/gangyan/langchain-chat
nohup "$PYTHON_EXE" startup.py -a >> nohup.out 2>&1 &
PID=$!
print_green "服务已启动PID: $PID"
print_yellow "日志文件: /home/gc/gangyan/langchain-chat/nohup.out"
print_yellow "查看日志: tail -f nohup.out"
# 等待几秒后显示日志
sleep 2
echo ""
print_yellow "=== 最近日志 ==="
tail -20 nohup.out

View File

@@ -1,186 +1,148 @@
import asyncio
import re
import aiohttp
import json
import logging
import requests
from pydantic import BaseModel, Field
from server.chat import utils
# 配置日志记录器
logging.basicConfig(level=logging.INFO, format='%(asctime)s [%(levelname)s] %(message)s')
logger = logging.getLogger(__name__)
async def duckduckgo_search_iter(query: str, uuid: str = "",time: str = "", resource_type: str = None, limit: int = 3):
# 定义三个API的URL
text_url = 'http://43.251.225.121/inspur/search_text'
video_url = 'http://43.251.225.121/inspur/search_video'
news_url = 'http://43.251.225.121/inspur/search_new'
# 新接口:内网 searxng 服务(原 43.251.225.121 已下线)
# aiohttp 与该 searxng 配合会 30s 超时(疑似 header/UA 被拦),所以改用 requests。
SEARXNG_URL = 'http://118.196.92.255/searxng/search'
SEARXNG_HEADERS = {'User-Agent': 'Mozilla/5.0 (X11; Linux x86_64) gangyan-langchain'}
payload = {
"query": query,
"time": time
}
async def fetch(session, url, json_payload,limit):
logger.info(f"{url} 获取数据,请求参数: {json_payload}")
try:
json_payload["limit"] = limit
async with session.post(url, json=json_payload) as response:
if response.status != 200:
logger.error(f"{url} 请求失败,状态码 {response.status}")
data = await response.json()
logger.info(f"{url} 获取的资料数: {len(data) if isinstance(data, list) else '未知'}")
return data
except Exception as e:
logger.error(f"获取 {url} 数据时发生错误: {e}")
def _searxng_results_to_items(results, mapping, limit):
"""把 searxng 统一的 {url,title,content} 映射成老接口期望的字段格式"""
out = []
for r in results[:limit]:
title = r.get('title', '') or ''
url = r.get('url', '') or ''
content = r.get('content', '') or ''
item = {}
for dst_key, src in mapping.items():
if src == 'title':
item[dst_key] = title
elif src == 'url':
item[dst_key] = url
elif src == 'content':
item[dst_key] = content
out.append(item)
return out
def _sync_fetch(params, limit_n, kind):
logger.info(f"searxng 请求 {kind}: params={params}")
try:
r = requests.get(SEARXNG_URL, params=params, headers=SEARXNG_HEADERS, timeout=15)
if r.status_code != 200:
logger.error(f"searxng {kind} HTTP {r.status_code}")
return []
data = r.json()
results = data.get('results', []) if isinstance(data, dict) else []
logger.info(f"searxng {kind} 条数: {len(results)}")
if kind == 'text':
return _searxng_results_to_items(results, {'title': 'title', 'href': 'url', 'body': 'content'}, limit_n)
if kind == 'video':
return _searxng_results_to_items(results, {'title': 'title', 'content': 'url', 'description': 'content'}, limit_n)
if kind == 'news':
return _searxng_results_to_items(results, {'title': 'title', 'url': 'url', 'body': 'content'}, limit_n)
return []
except Exception as e:
logger.error(f"searxng {kind} 请求异常: {type(e).__name__}: {e}")
return []
# 根据 resource_type 确定要请求的 API
# 默认并发请求三个API
# 视频只请求 video_url
# 新闻只请求 news_url
# 其他类型只请求 text_url
async with aiohttp.ClientSession() as session:
logger.info("发起请求duckduckgo...")
n = limit % 3
limit1 = 0
limit2 = 0
limit3 = 0
match n:
case 0:
limit1 = limit//3
limit2 = limit1
limit3 = limit1
case 1:
limit1 = limit//3 +1
limit2 = limit//3
limit3 = limit2
case 2:
limit1 = limit//3 +1
limit2 = limit1
limit2 = limit
async def duckduckgo_search_iter(query: str, uuid: str = "", time: str = "", resource_type: str = None, limit: int = 3):
logger.info("发起 searxng 搜索请求...")
if resource_type is None or not resource_type == 'video':
text_task = asyncio.create_task(fetch(session, text_url, payload,limit1))
video_task = asyncio.create_task(fetch(session, video_url, payload, limit3))
news_task = asyncio.create_task(fetch(session, news_url, payload, limit2))
text_result, video_result, news_result = await asyncio.gather(text_task, video_task, news_task)
logger.info("合并结果...")
logger.info("合并结果完成")
combined_result = {
"text": text_result,
"video": video_result,
"news": news_result
}
# 三类按 limit 平均分配
n = limit % 3
if n == 0:
limit1 = limit2 = limit3 = limit // 3
elif n == 1:
limit1 = limit // 3 + 1
limit2 = limit3 = limit // 3
else:
limit1 = limit2 = limit // 3 + 1
limit3 = limit // 3
else:
video_result = await fetch(session, video_url, payload, limit)
combined_result = {
"video": video_result
}
del limit1,limit2,limit3
# elif resource_type == 'news':
# news_result = await fetch(session, news_url, payload)
# combined_result = {
# "news": news_result
# }
if resource_type is None or resource_type != 'video':
text_task = asyncio.to_thread(_sync_fetch, {'q': query, 'format': 'json', 'categories': 'general'}, limit1, 'text')
news_task = asyncio.to_thread(_sync_fetch, {'q': query, 'format': 'json', 'categories': 'news'}, limit2, 'news')
video_task = asyncio.to_thread(_sync_fetch, {'q': query, 'format': 'json', 'categories': 'videos'}, limit3, 'video')
text_result, news_result, video_result = await asyncio.gather(text_task, news_task, video_task)
combined_result = {
"text": text_result,
"video": video_result,
"news": news_result,
}
else:
video_result = await asyncio.to_thread(_sync_fetch, {'q': query, 'format': 'json', 'categories': 'videos'}, limit, 'video')
combined_result = {"video": video_result}
# else: # 其他类型
# text_result = await fetch(session, text_url, payload)
# combined_result = {
# "text": text_result
# }
logger.info("searxng 请求已完成")
logger.info("请求已完成")
res = []
source = []
info = utils.get_shared_variable(uuid)
index = info["num"]
if "text" in combined_result:
for item in combined_result["text"]:
index += 1
res.append(f'资料[{index}] 资料标题{item["title"]}({item["href"]}) 资料内容为: {item["body"]}')
source.append(f'资料[{index}] [{item["title"]}]({item["href"]})')
if "video" in combined_result:
for item in combined_result["video"]:
index += 1
res.append(f'资料[{index}] 视频标题[{item["title"]}]({item["content"]}) 视频内容为: {item["description"]}')
source.append(f'视频资料[{index}] [{item["title"]}]({item["content"]})')
if "news" in combined_result:
for item in combined_result["news"]:
index += 1
res.append(f'资料[{index}] 新闻标题[{item["title"]}]({item["url"]}) 新闻内容为: {item["body"]}')
source.append(f'资料[{index}] [{item["title"]}]({item["url"]})')
info["source_docs"].extend(source)
utils.set_shared_variable(uuid, info)
return res,source
res = []
source = []
info = utils.get_shared_variable(uuid)
index = info["num"]
if "text" in combined_result:
for item in combined_result["text"]:
index += 1
res.append(f'资料[{index}] 资料标题{item["title"]}({item["href"]}) 资料内容为: {item["body"]}')
source.append(f'资料[{index}] [{item["title"]}]({item["href"]})')
if "video" in combined_result:
for item in combined_result["video"]:
index += 1
res.append(f'资料[{index}] 视频标题[{item["title"]}]({item["content"]}) 视频内容为: {item["description"]}')
source.append(f'视频资料[{index}] [{item["title"]}]({item["content"]})')
if "news" in combined_result:
for item in combined_result["news"]:
index += 1
res.append(f'资料[{index}] 新闻标题[{item["title"]}]({item["url"]}) 新闻内容为: {item["body"]}')
source.append(f'资料[{index}] [{item["title"]}]({item["url"]})')
info["source_docs"].extend(source)
utils.set_shared_variable(uuid, info)
return res, source
def duckduckgo_search(query: str, time: str = "", resource_type: str = None):
logger.info(f"模型输入: {query}")
# 对传入的 query 字段进行解析
# 判断 query 是否包含 "}{"
# if "}{" in query:
# # 将 query 分割为两个JSON字符串
# split_index = query.find("}{")
# json_part1 = query[:split_index+1]
# json_part2 = query[split_index+1:]
# try:
# obj1 = json.loads(json_part1)
# obj2 = json.loads(json_part2)
# # 提取 query, resource_type, time, uuid
# parsed_query = obj1.get("query", "")
# parsed_resource_type = obj1.get("resource_type", None)
# parsed_time = obj1.get("time", time) # 如obj1未包含time则使用传入的默认值
# parsed_uuid = obj2.get("uuid", "")
logger.info(f"模型输入: {query}")
matches = re.findall(r'\{.*?\}', query)
if len(matches)>=2:
if len(matches) >= 2:
query = matches[0]
else:
return "<关键指令>不需要再调用该工具了</关键指令>"
parsed_uuid = ""
parsed_limit = 3
try:
obj1= json.loads(query)
obj1 = json.loads(query)
parsed_query = obj1.get("query", "")
parsed_limit = obj1.get("limit", 3)
parsed_resource_type = obj1.get("resource_type", None)
parsed_time = obj1.get("time", time) # 如obj1未包含time则使用传入的默认值
parsed_time = obj1.get("time", time)
parsed_uuid = json.loads(matches[1])["uuid"]
# 将解析到的值覆盖原有的参数
query = parsed_query if parsed_query else query
resource_type = parsed_resource_type if parsed_resource_type else resource_type
time = parsed_time if parsed_time else time
logger.info(f"解析完成query: {query}, uuid: {parsed_uuid}, time: {time}, resource_type: {resource_type}, parsed_limit: {parsed_limit}")
except json.JSONDecodeError as e:
logger.error(f"解析JSON出错: {e}")
# 在同步环境中运行异步函数
combined_result = asyncio.run(duckduckgo_search_iter(query, parsed_uuid, time, resource_type, parsed_limit))
# 以标准json格式输出
logger.info("返回JSON格式的结果给到模型...")
return combined_result
class DuckduckgoInput(BaseModel):
location: str = Field(description="网络搜索查询")
if __name__ == "__main__":
# 测试调用
# 1. 默认请求三个API
# result_default = duckduckgo_search("粉末冶金", "m", "default")
# print("duckduckgo输出(默认):\n", result_default)
# # 2. 只请求视频
# result_video = duckduckgo_search("粉末冶金", "m", "video")
# print("duckduckgo输出(视频):\n", result_video)
# # 3. 只请求新闻
# result_news = duckduckgo_search("粉末冶金", "m", "news")
# print("duckduckgo输出(新闻):\n", result_news)
# 4. 其它类型只请求文本
result_other = duckduckgo_search("粉末冶金", "m", "other")
print("duckduckgo输出(其他):\n", result_other)
result_other = duckduckgo_search('{"query":"粉末冶金","limit":3}{"uuid":"test-uuid"}', "m", "other")
print("searxng输出(其他):\n", result_other)

View File

@@ -70,7 +70,7 @@ class ZhipuSearchAPIWrapper:
)
logging.info(f"Zhipu检索内容:{search_query}")
url = "http://ywk3hvt4d:01Jp2V1tR9PdTsYSz919779Rb9_@134.122.191.214/search"
url = "http://118.196.92.255/searxng/search"
engines = "duckduckgo,bing"
data = {
"format":"json",

View File

@@ -1,2 +0,0 @@
# mac设备上的grep命令可能不支持grep -P选项请使用Homebrew安装;或使用ggrep命令
ps -eo pid,user,cmd|grep -P 'server/api.py|webui.py|fastchat.serve|langchain_chat'|grep -v grep|awk '{print $1}'|xargs kill -9

View File

@@ -1,60 +0,0 @@
#!/bin/bash
# 启动 Python 后端服务脚本(使用 nohup
# 颜色定义
GREEN='\033[0;32m'
YELLOW='\033[1;33m'
RED='\033[0;31m'
NC='\033[0m' # No Color
# 检查是否已有进程在运行
EXISTING=$(ps aux | grep "[p]ython.*startup.py -a" | awk '{print $2}')
if [ -n "$EXISTING" ]; then
echo -e "${YELLOW}警告: 检测到已有 Python 后端进程在运行 (PID: $EXISTING)${NC}"
read -p "是否先停止现有进程? (y/n): " -n 1 -r
echo
if [[ $REPLY =~ ^[Yy]$ ]]; then
echo -e "${YELLOW}正在停止现有进程...${NC}"
kill $EXISTING 2>/dev/null
sleep 2
# 如果还在运行,强制终止
if ps -p $EXISTING > /dev/null 2>&1; then
kill -9 $EXISTING 2>/dev/null
fi
echo -e "${GREEN}已停止现有进程${NC}"
else
echo -e "${RED}请先停止现有进程或使用 stop.sh${NC}"
exit 1
fi
fi
# 获取脚本所在目录
SCRIPT_DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )"
cd "$SCRIPT_DIR"
# 设置 PDF 转换服务环境变量(确保与 pdf-convert-service 一致)
export PDF_CONVERT_KB_ROOT="${PDF_CONVERT_KB_ROOT:-$SCRIPT_DIR/knowledge_base}"
# 检查 conda 环境
if ! command -v conda &> /dev/null; then
echo -e "${RED}错误: 未找到 conda 命令${NC}"
exit 1
fi
# 激活 conda 环境并启动
echo -e "${YELLOW}正在启动 Python 后端...${NC}"
echo -e "${YELLOW}使用环境: gangyan${NC}"
echo -e "${YELLOW}日志文件: nohup.out${NC}"
# 使用 nohup 启动
nohup conda run -n gangyan python startup.py -a > nohup.out 2>&1 &
PID=$!
echo -e "${GREEN}后端已启动PID: $PID${NC}"
echo -e "${YELLOW}查看日志: tail -f nohup.out${NC}"
echo -e "${YELLOW}停止服务: ./stop.sh${NC}"
# 等待几秒后显示日志
sleep 2
echo -e "\n${YELLOW}=== 最近日志 ===${NC}"
tail -20 nohup.out

View File

@@ -1,70 +0,0 @@
#!/bin/bash
# 停止 Python 后端服务脚本
# 默认端口(从 server_config.py 中获取)
DEFAULT_PORT=7861
# 颜色定义
RED='\033[0;31m'
GREEN='\033[0;32m'
YELLOW='\033[1;33m'
NC='\033[0m' # No Color
echo -e "${YELLOW}正在查找运行中的 Python 后端进程...${NC}"
# 方法1: 通过进程名查找
PIDS=$(ps aux | grep "[p]ython.*startup.py -a" | awk '{print $2}')
# 方法2: 如果方法1没找到通过端口查找
if [ -z "$PIDS" ]; then
echo -e "${YELLOW}未找到 startup.py 进程,尝试通过端口 ${DEFAULT_PORT} 查找...${NC}"
PIDS=$(lsof -ti:${DEFAULT_PORT} 2>/dev/null)
fi
if [ -z "$PIDS" ]; then
echo -e "${RED}未找到运行中的 Python 后端进程${NC}"
exit 0
fi
echo -e "${GREEN}找到以下进程:${NC}"
ps aux | grep "[p]ython.*startup.py -a" | grep -v grep
if [ -n "$PIDS" ]; then
echo -e "${YELLOW}进程 PID: $PIDS${NC}"
fi
# 询问是否确认停止
read -p "是否停止这些进程? (y/n): " -n 1 -r
echo
if [[ ! $REPLY =~ ^[Yy]$ ]]; then
echo -e "${YELLOW}已取消${NC}"
exit 0
fi
# 停止进程
for PID in $PIDS; do
if [ -n "$PID" ]; then
echo -e "${YELLOW}正在停止进程 $PID...${NC}"
kill $PID 2>/dev/null
if [ $? -eq 0 ]; then
echo -e "${GREEN}进程 $PID 已发送停止信号${NC}"
else
echo -e "${RED}无法停止进程 $PID${NC}"
fi
fi
done
# 等待进程结束
echo -e "${YELLOW}等待进程结束...${NC}"
sleep 3
# 检查是否还有进程在运行
REMAINING=$(ps aux | grep "[p]ython.*startup.py -a" | awk '{print $2}')
if [ -n "$REMAINING" ]; then
echo -e "${YELLOW}仍有进程在运行,强制终止...${NC}"
for PID in $REMAINING; do
kill -9 $PID 2>/dev/null
echo -e "${GREEN}已强制终止进程 $PID${NC}"
done
fi
echo -e "${GREEN}所有进程已停止${NC}"

View File

@@ -1,53 +0,0 @@
#!/bin/bash
# 快速停止 Python 后端服务脚本(无需确认)
# 默认端口
DEFAULT_PORT=7861
# 颜色定义
GREEN='\033[0;32m'
YELLOW='\033[1;33m'
RED='\033[0;31m'
NC='\033[0m'
echo -e "${YELLOW}正在查找并停止 Python 后端进程...${NC}"
# 方法1: 通过进程名查找
PIDS=$(ps aux | grep "[p]ython.*startup.py -a" | awk '{print $2}')
# 方法2: 如果方法1没找到通过端口查找
if [ -z "$PIDS" ]; then
PIDS=$(lsof -ti:${DEFAULT_PORT} 2>/dev/null)
fi
if [ -z "$PIDS" ]; then
echo -e "${RED}未找到运行中的 Python 后端进程${NC}"
exit 0
fi
# 显示找到的进程
echo -e "${GREEN}找到以下进程:${NC}"
ps aux | grep "[p]ython.*startup.py -a" | grep -v grep
# 停止进程
for PID in $PIDS; do
if [ -n "$PID" ]; then
echo -e "${YELLOW}正在停止进程 $PID...${NC}"
kill $PID 2>/dev/null
fi
done
# 等待进程结束
sleep 2
# 检查是否还有进程在运行,如果有则强制终止
REMAINING=$(ps aux | grep "[p]ython.*startup.py -a" | awk '{print $2}')
if [ -n "$REMAINING" ]; then
echo -e "${YELLOW}强制终止剩余进程...${NC}"
for PID in $REMAINING; do
kill -9 $PID 2>/dev/null
echo -e "${GREEN}已强制终止进程 $PID${NC}"
done
fi
echo -e "${GREEN}✓ 所有进程已停止${NC}"