Files
gangyan/scripts/langchain-restart.sh

158 lines
6.0 KiB
Bash
Raw Normal View History

#!/usr/bin/env bash
# 须用 bash 执行;若误用 sh/dash 会自动改用 bash 再跑一遍
[ -n "${BASH_VERSION:-}" ] || exec /usr/bin/env bash "$0" ${1+"$@"}
# 重启 langchain-chatstartup.py --all-api与 start_all.sh 一致)
# 日志gangyan/logs/langchain-chat.log
set -u
source "$(cd "$(dirname "$0")" && pwd)/common-restart.sh"
LOG_FILE="$LOG_DIR/langchain-chat.log"
TRIM_LOG="$LOG_DIR/log-trim.log"
CONDA_SH="/opt/software/miniconda3/etc/profile.d/conda.sh"
LC_ROOT="$GANGYAN_ROOT/langchain-chat"
# 与 configs/server_config.py 一致,用于旧进程未退出时按端口释放
LANGCHAIN_LISTEN_PORTS=(7861 20000 20101 20002)
_kill_by_listen_ports() {
local port pid
for port in "${LANGCHAIN_LISTEN_PORTS[@]}"; do
for pid in $(lsof -ti :"$port" -sTCP:LISTEN 2>/dev/null || true); do
kill -TERM "$pid" 2>/dev/null && log_tee "按端口 ${port} TERM PID=${pid}"
done
done
sleep 2
for port in "${LANGCHAIN_LISTEN_PORTS[@]}"; do
for pid in $(lsof -ti :"$port" -sTCP:LISTEN 2>/dev/null || true); do
kill -KILL "$pid" 2>/dev/null && log_tee "按端口 ${port} KILL PID=${pid}"
done
done
}
# 用 ss 判断端口是否在监听。仅用 lsof 会漏掉 root 监听的端口(非 root 看不到 PID
# 会误判为已释放并再次启动,导致 [Errno 98] address already in use。
_langchain_ports_still_listen() {
local port
for port in "${LANGCHAIN_LISTEN_PORTS[@]}"; do
if ss -tln 2>/dev/null | grep -qE ":${port}[[:space:]]"; then
return 0
fi
done
return 1
}
_sudo_fuser_kill_ports() {
command -v sudo >/dev/null 2>&1 || return 1
if ! sudo -n true 2>/dev/null; then
log_tee "提示: 相关端口仍被占用(可能是 root 启动的进程)。当前用户无法用 lsof 看到其 PID。"
log_tee "请在本机执行其一: sudo bash $0 或 sudo fuser -k 7861/tcp 20000/tcp 20101/tcp 20002/tcp"
return 1
fi
local port
for port in "${LANGCHAIN_LISTEN_PORTS[@]}"; do
if ss -tln 2>/dev/null | grep -qE ":${port}[[:space:]]"; then
sudo -n fuser -k "${port}/tcp" 2>/dev/null && log_tee "sudo fuser -k ${port}/tcp"
fi
done
sleep 2
return 0
}
_stop_langchain() {
log_tee "======== 停止 langchain-chat ========"
# 多种命令行形式(-a / --all-api、conda 路径、直接 python
for pat in \
"/opt/software/miniconda3/envs/langchain-chat/bin/python"'.''*'"startup.py" \
"conda run -n langchain-chat"'.''*'"startup.py" \
"python"'.''*'"startup.py --all-api" \
"python"'.''*'"startup.py -a"
do
if pkill -f "$pat" 2>/dev/null; then
log_tee "pkill 已匹配并发送信号: $pat"
fi
done
# cwd 在 langchain-chat 且命令行含 startup.py兜底
while read -r pid; do
[ -z "${pid:-}" ] && continue
[ ! -d "/proc/$pid" ] && continue
local cwd cmdl
cwd=$(readlink -f "/proc/$pid/cwd" 2>/dev/null || true)
cmdl=$(tr '\0' ' ' <"/proc/$pid/cmdline" 2>/dev/null || true)
if [[ "$cmdl" == *startup.py* ]] && [[ "$cwd" == "$LC_ROOT" ]]; then
kill -TERM "$pid" 2>/dev/null && log_tee "按 cwd+argv TERM PID=$pid"
fi
done < <(pgrep -f 'startup\.py' 2>/dev/null || true)
sleep 2
# pkill 未匹配到旧进程时,端口仍可能被旧实例占用(含 root 启动、非 root 无法用 lsof 杀)
if _langchain_ports_still_listen; then
log_tee "langchain 相关端口仍监听 (7861/20000/20101/20002),按 lsof 可见 PID 清理…"
_kill_by_listen_ports
else
log_tee "相关端口已释放,跳过端口强杀"
fi
if _langchain_ports_still_listen; then
log_tee "端口仍被占用,尝试免密 sudo fuser 清理…"
_sudo_fuser_kill_ports || true
fi
local _w=0
while _langchain_ports_still_listen && [ "$_w" -lt 30 ]; do
sleep 1
_w=$((_w + 1))
done
if _langchain_ports_still_listen; then
log_tee "错误: 7861/20000/20101/20002 仍有端口在监听,无法安全启动第二个实例。"
log_tee "请先释放端口后再执行本脚本(见上方 sudo 提示)。"
exit 1
fi
sleep 1
}
_stop_langchain
# 停止旧的日志裁剪守护(避免重复多开)
pkill -f "log-trim-daemon\.sh --file ${LOG_FILE}" 2>/dev/null || true
log_tee "======== 启动 langchain-chat (--all-api) ========"
if [ ! -f "$CONDA_SH" ]; then
log_tee "错误: 未找到 $CONDA_SH"
exit 1
fi
# shellcheck source=/dev/null
source "$CONDA_SH"
conda activate langchain-chat
cd "$LC_ROOT"
export PYTHONPATH="$LC_ROOT"
# PDF 预览与 kb_config.PDF_CONVERT_KB_ROOT 一致(地址归一化在 Python 内完成)
export PDF_CONVERT_KB_ROOT="${PDF_CONVERT_KB_ROOT:-$LC_ROOT/knowledge_base}"
# 明确使用可请求的地址(:- 无法覆盖已设置的 0.0.0.0,此处强制纠正)
if [[ "${PDF_CONVERT_API_URL:-}" == *0.0.0.0* ]] || [[ -z "${PDF_CONVERT_API_URL:-}" ]]; then
export PDF_CONVERT_API_URL="http://127.0.0.1:6006/convert/"
fi
# 避免旧 .pyc 仍含硬编码的 0.0.0.0:6006
rm -f "$LC_ROOT/configs/__pycache__/kb_config."*.pyc 2>/dev/null || true
rm -f "$LC_ROOT/server/knowledge_base/__pycache__/file_converter."*.pyc 2>/dev/null || true
rm -f "$LC_ROOT/server/knowledge_base/__pycache__/cleanpdf."*.pyc 2>/dev/null || true
rm -f "$LC_ROOT/server/knowledge_base/__pycache__/pdf_convert_url."*.pyc 2>/dev/null || true
# 控制日志文件总大小(保留最后 5MB避免 langchain-chat.log 无限增长)
nohup bash "$GANGYAN_ROOT/scripts/log-trim-daemon.sh" --file "$LOG_FILE" --max-mb 5 --interval-sec 3 >> "$TRIM_LOG" 2>&1 &
log_tee "已启动日志裁剪守护,日志: $TRIM_LOG"
nohup python startup.py --all-api >> "$LOG_FILE" 2>&1 &
log_tee "已后台启动PID=$! API 约 7861 ,日志: $LOG_FILE"
# 本地 PDF 预览依赖的转换微服务(:6006未监听时再启动
if ! ss -tln 2>/dev/null | grep -qE ':6006[[:space:]]'; then
bash "$GANGYAN_ROOT/scripts/pdf-convert-service.sh" >> "$LOG_DIR/pdf-convert-service.log" 2>&1 || log_tee "提示: pdf-convert-service 启动失败,见 $LOG_DIR/pdf-convert-service.log"
else
log_tee "pdf-convert-service 端口 6006 已在监听,跳过"
fi