三个独立修复 / 排查:
1. R1 思考过程不显示
- 根因: chat_test.py 等 <think> 开标签出现才进思考态,但 R1
流式输出本来就在 reasoning 态启动,永远不出 <think>,所有
reasoning 全部当 text 走到答案区
- 修法: 引入 r1_thinking_done 状态机,默认在思考态,
看到 </think> 切换;R1-70B 直连本地代理 deepseek-r1
(官方 deepseek-reasoner 把 reasoning 放独立字段,旧版
callback 取不到)
- 结果验证: "1+1" → 269 think + 40 text,思考与答案正确分流
2. 选题推荐场景拒答 + chat 模板标记泄漏
- 根因: prompt 写死了 "你只能回答有关选题推荐的问题"
+ 直接嵌入 <|im_start|>/<|im_end|> Qwen chat 标记
- 修法: 改写 Topic Recommend Assistant prompt,删 chat 标记,
行为准则改为"沾边查询用工具回答";agent_v2 增加 strip
防守层
- 结果验证: "钢铁行业研究重点方向" → agent 调工具,不再拒答
3. 知识库召回 0 排查(数据问题,未根治)
- 根因: kb_config.py 把所有 KB(政策库/钢铁库/报告库等)
都映射到 t_policy_total_bge_new_v2,但 Milvus 里根本没有
这个 collection(实际只有 11 个 p_* 个人库 + 1 个
t_journal_article_bge_v1)
- 临时改: search_tool.py 加诊断日志 [RAG诊断] 输出每个 KB
召回数;rag_search 内 for 循环里首个 KB 空就 return 的
bug 改 continue
- 待决策: kb_config 是否把默认 KB 映射到唯一存在的
t_journal_article_bge_v1,或重建对应集合
Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
178 lines
7.2 KiB
Python
178 lines
7.2 KiB
Python
"""
|
||
LangGraph 版 Agent runner。
|
||
|
||
替代旧的 agent_chat_test 内核:
|
||
- 不再用 LLM 做 step routing(thinking/select_tool/answer),让模型 function-calling 自己决定
|
||
- 同一轮的多个 tool_calls 自动并行(ToolNode)
|
||
- 把 LangGraph 事件流映射到现有前端协议({"text":...}/{"docs":...}/{"detail":...})
|
||
|
||
输入:query + history + uuid + model_name
|
||
输出:和旧版 agent_chat_test 一样的 dict 序列("answer"/"docs"/"detail"/...)
|
||
"""
|
||
import asyncio
|
||
import json
|
||
import logging
|
||
import re
|
||
from typing import AsyncIterable, List, Optional
|
||
|
||
from langgraph.prebuilt import create_react_agent
|
||
from langchain_core.messages import HumanMessage, AIMessage, ToolMessage
|
||
from langchain_openai import ChatOpenAI
|
||
|
||
from configs import LLM_MODELS, prompt_config
|
||
from server.utils import get_prompt_template, get_model_worker_config
|
||
from server.chat import utils as shared_utils
|
||
from server.chat.tools_v2 import make_tools
|
||
|
||
logger = logging.getLogger(__name__)
|
||
|
||
|
||
_CHAT_MARKER_RE = re.compile(r"<\|im_(?:start|end)\|>")
|
||
|
||
|
||
def _strip_chat_markers(text: str) -> str:
|
||
"""剥掉 prompt 内嵌的 Qwen chat template 标记,避免模型 echo 泄漏到答案。"""
|
||
return _CHAT_MARKER_RE.sub("", text or "")
|
||
|
||
|
||
def _build_system_prompt(user_prompt_name: str, query: str, think_content: str) -> str:
|
||
"""复用旧版 Think Test Bak + 用户业务 prompt 的拼装逻辑,但简化为单条 system message。"""
|
||
user = get_prompt_template("llm_chat", user_prompt_name) if user_prompt_name else ""
|
||
user = _strip_chat_markers(user)
|
||
think_content = _strip_chat_markers(think_content)
|
||
|
||
parts = []
|
||
parts.append("你是浪潮开发的智能专家。回答用户问题前可以使用工具检索资料。")
|
||
parts.append("严格要求:")
|
||
parts.append("1. 优先使用工具获取资料后再回答,禁止虚构内容")
|
||
parts.append("2. 同一个工具同一参数禁止反复调用超过 2 次")
|
||
parts.append("3. 回答时必须基于工具返回的资料,引用要标注【】序号")
|
||
parts.append("4. 涉及国家政策优先用 知识库联想 + 政策库")
|
||
parts.append("5. 答案紧扣用户问题,不要主观臆想")
|
||
parts.append("")
|
||
parts.append(f"思考提示:{think_content}")
|
||
parts.append("")
|
||
if user:
|
||
parts.append(f"业务约束:{user}")
|
||
return "\n".join(parts)
|
||
|
||
|
||
def _convert_history(history: list) -> list:
|
||
"""把 chat_test.py 的 history list(dict role/content)转成 LangChain messages。"""
|
||
msgs = []
|
||
for h in history or []:
|
||
role = h.get("role")
|
||
content = h.get("content", "")
|
||
if role == "user":
|
||
msgs.append(("user", content))
|
||
elif role == "assistant":
|
||
msgs.append(("assistant", content))
|
||
return msgs
|
||
|
||
|
||
async def agent_run(
|
||
*,
|
||
query: str,
|
||
uuid: str,
|
||
history: Optional[list] = None,
|
||
model_name: str = None,
|
||
temperature: float = 0.3,
|
||
max_tokens: Optional[int] = None,
|
||
user_prompt_name: str = "",
|
||
think_content: str = "",
|
||
) -> AsyncIterable[str]:
|
||
"""运行 LangGraph agent,yield 事件 JSON 字符串。
|
||
|
||
yield 协议(向后兼容 chat_test.py 的消费逻辑):
|
||
{"text": str} → 思考框/答案框文本(按出现位置区分)
|
||
{"answer": str} → token 级答案流(chat_test 包装为 {"text":...})
|
||
{"docs": str} → 工具返回的资料文档(参考文献区)
|
||
{"detail": str} → 详细资料累积(detail_answer 用)
|
||
{"tool_start": dict} → 调试/日志:工具开始
|
||
{"tool_end": dict} → 调试/日志:工具结束
|
||
"""
|
||
model_name = model_name or LLM_MODELS[0]
|
||
# 必须用 langchain_openai.ChatOpenAI(支持现代 tool calling 协议)
|
||
# 不能用 server.utils.get_ChatOpenAI(返回 langchain_community 老版,不支持 bind_tools)
|
||
cfg = get_model_worker_config(model_name)
|
||
llm = ChatOpenAI(
|
||
model=model_name,
|
||
base_url=cfg.get("api_base_url"),
|
||
api_key=cfg.get("api_key", "EMPTY"),
|
||
temperature=temperature,
|
||
max_tokens=max_tokens,
|
||
streaming=True,
|
||
)
|
||
|
||
tools = make_tools(uuid)
|
||
|
||
# 用 Think Test Bak + user_prompt 构造 system message
|
||
system_prompt = _build_system_prompt(user_prompt_name, query, think_content)
|
||
agent = create_react_agent(llm, tools=tools, messages_modifier=system_prompt)
|
||
|
||
msgs = _convert_history(history)
|
||
msgs.append(("user", query))
|
||
inputs = {"messages": msgs}
|
||
config = {"recursion_limit": 12} # 最多 12 步(远小于旧版 11 次外层 × N 内层)
|
||
|
||
answer_buf = []
|
||
try:
|
||
async for ev in agent.astream_events(inputs, config=config, version="v1"):
|
||
# 检查停止信号
|
||
if not shared_utils.get_shared_variable(uuid).get("status", True):
|
||
logger.info("Agent 收到停止信号")
|
||
break
|
||
|
||
kind = ev["event"]
|
||
name = ev.get("name", "")
|
||
|
||
if kind == "on_chat_model_stream":
|
||
chunk = ev["data"]["chunk"]
|
||
content = chunk.content or ""
|
||
if content:
|
||
answer_buf.append(content)
|
||
yield json.dumps({"answer": content}, ensure_ascii=False)
|
||
|
||
elif kind == "on_tool_start":
|
||
tool_input = ev["data"].get("input", {})
|
||
logger.info(f"工具调用开始: {name}({tool_input})")
|
||
# 工具说明落到思考框(前端的 thinking 区域)
|
||
yield json.dumps(
|
||
{"think": f"\n→ 调用工具:{name}\n"},
|
||
ensure_ascii=False,
|
||
)
|
||
|
||
elif kind == "on_tool_end":
|
||
output = str(ev["data"].get("output", ""))
|
||
logger.info(f"工具调用结束: {name} → {len(output)} chars")
|
||
|
||
# 知识库联想 / 联网思索 → 提取 source_docs 给前端参考文献区
|
||
if name in ("知识库联想", "联网思索"):
|
||
source = shared_utils.get_shared_variable(uuid)
|
||
source_docs = source.get("source_docs", [])
|
||
if source_docs:
|
||
try:
|
||
docs_string = "\n" + "\n".join(f"{str(d)}\n" for d in source_docs)
|
||
yield json.dumps({"docs": docs_string}, ensure_ascii=False)
|
||
except Exception:
|
||
logger.exception("docs 序列化失败")
|
||
|
||
# detail(详细搜索内容)累积到 docs_detail,给后续幻觉校验用
|
||
if name in ("知识库联想", "联网思索"):
|
||
yield json.dumps({"detail": output}, ensure_ascii=False)
|
||
|
||
except asyncio.CancelledError:
|
||
logger.info("Agent 被取消")
|
||
raise
|
||
except Exception as e:
|
||
logger.exception(f"Agent 运行异常: {e}")
|
||
# 给前端一个兜底答案
|
||
yield json.dumps(
|
||
{"answer": f"\n\n[Agent 运行异常] 已尽力使用工具但未能完整生成答案,请重试或简化问题。"},
|
||
ensure_ascii=False,
|
||
)
|
||
|
||
# 终态收尾
|
||
full_answer = "".join(answer_buf)
|
||
logger.info(f"Agent 完成:答案长度 {len(full_answer)} chars")
|