主要变化: - 新增 agent_v2.py: 用 LangGraph create_react_agent + astream_events 替代原 agent_chat_test 的 LLM step-routing 死循环 - 新增 tools_v2.py: 闭包工厂模式,每个请求按 uuid 生成工具列表, 消除 toolinput 字符串拼 JSON 注入 uuid 的旧 hack - chat_test.py:266-346: 删 11 次 count_process 重试外层和事件 分发 spaghetti,换成 agent_run 单次调用 + 简单事件 dispatcher - policy_fun_iast.py:168-187: 修 broken <think> filter 老代码把 start_flag 设反了(看见 <think> 才开始 yield)导致 非 think 模型 yield 不出任何内容;改为正确跳过 <think>...</think> 块 模型函数调用通过 langchain_openai.ChatOpenAI(不能用旧版 langchain_community.chat_models.ChatOpenAI,没有现代 tool calling)。 依赖: langgraph==0.0.49 + langchain-core==0.1.53(已在服务器装好)。 非 stream 分支保留旧 agent_chat_test 路径(极少触发,回归风险低)。 旧版回滚: git checkout backup/pre-langgraph 实测对比: - 旧版 30-60s,答案 0 字(filter 卡死后展示 11 次重试) - 新版 25-40s,答案完整(含工具调用、参考文献、推荐问题、摘要) Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
199 lines
8.3 KiB
Python
199 lines
8.3 KiB
Python
import asyncio
|
||
import time
|
||
from langchain.chains import LLMChain
|
||
from langchain_core.prompts import ChatPromptTemplate
|
||
from configs import KB_PROMPT, LLM_PROMPT, logger
|
||
|
||
from configs.prompt_config import AGENT_PROMPT, AGENT_WRITE_PROMPT, COMPARISON
|
||
from server.chat.utils import History
|
||
from server.utils import get_prompt_template, get_strategy_prompt_template, get_ChatOpenAI
|
||
import openai
|
||
from typing import Any, AsyncGenerator
|
||
from langchain.schema import HumanMessage
|
||
|
||
MAX_RETRIES = 2
|
||
RETRY_DELAY = 1
|
||
MAX_MAX_TOKENS = 8192 # 阿里云 DashScope API 限制
|
||
|
||
def get_llm_model_response(
|
||
strategy_name: str,
|
||
llm_model_name: str,
|
||
template_prompt_name: str,
|
||
prompt_param_dict: dict,
|
||
temperature: float,
|
||
max_tokens: int,
|
||
**kwargs: Any,
|
||
) -> str:
|
||
'''调用大模型,实现不同策略'''
|
||
# 校验 max_tokens 不超过 API 限制
|
||
if max_tokens is not None and max_tokens > MAX_MAX_TOKENS:
|
||
logger.warning(f"max_tokens({max_tokens}) 超过 API 限制,已调整为 {MAX_MAX_TOKENS}")
|
||
max_tokens = MAX_MAX_TOKENS
|
||
|
||
# 读取指定的大模型,这里不能加入callback,否则会把这部分模型响应加入最终的回答
|
||
# 同步调用关闭 streaming,避免流式传输错误
|
||
model = get_ChatOpenAI(
|
||
model_name=llm_model_name,
|
||
temperature=temperature,
|
||
max_tokens=max_tokens,
|
||
callbacks=[],
|
||
# streaming=False,
|
||
**kwargs
|
||
)
|
||
# 获取prompt
|
||
if template_prompt_name in KB_PROMPT:
|
||
prompt_template = get_prompt_template("knowledge_base_chat", template_prompt_name)
|
||
elif template_prompt_name in LLM_PROMPT:
|
||
prompt_template = get_prompt_template("llm_chat", template_prompt_name)
|
||
elif template_prompt_name in COMPARISON:
|
||
prompt_template = get_prompt_template("comparison_chat", template_prompt_name)
|
||
# 此处仅对全新agent流程的模板提示词奏效如果添加其他请注意是否冲突
|
||
elif template_prompt_name in AGENT_PROMPT:
|
||
if not template_prompt_name == "Think Test Bak" and not template_prompt_name == "get_next_tip":
|
||
prompt_template1 = get_prompt_template("agent_chat", "Think Test Bak")
|
||
prompt_template2 = get_prompt_template("agent_chat", template_prompt_name)
|
||
prompt_template = f"{prompt_template1}{prompt_template2}"
|
||
else:
|
||
prompt_template = get_prompt_template("agent_chat", template_prompt_name)
|
||
elif template_prompt_name in AGENT_WRITE_PROMPT:
|
||
if not template_prompt_name == "Write Test Bak" and not template_prompt_name == "get_next_write_tip":
|
||
prompt_template1 = get_prompt_template("agent_chat", "Write Test Bak")
|
||
prompt_template2 = get_prompt_template("agent_chat", template_prompt_name)
|
||
prompt_template = f"{prompt_template1}{prompt_template2}"
|
||
else:
|
||
prompt_template = get_prompt_template("agent_chat", template_prompt_name)
|
||
else:
|
||
prompt_template = get_strategy_prompt_template("knowledge_base_chat", template_prompt_name)
|
||
input_msg = History(role="system", content=prompt_template).to_msg_template(False)
|
||
prompt = ChatPromptTemplate.from_messages([input_msg])
|
||
# print("strategy_prompt_name: ",template_prompt_name, "\n","strategy_prompt:",prompt_template)
|
||
|
||
# 获取模型响应,带重试机制
|
||
retry_count = 0
|
||
last_error = None
|
||
while retry_count <= MAX_RETRIES:
|
||
try:
|
||
llm_chain = LLMChain(prompt=prompt, llm=model, verbose=True)
|
||
model_response = llm_chain.run(prompt_param_dict)
|
||
# print(f'---------after {strategy_name}------------------')
|
||
# print(model_response)
|
||
return model_response
|
||
except Exception as e:
|
||
last_error = e
|
||
retry_count += 1
|
||
if retry_count > MAX_RETRIES:
|
||
logger.error(f"LLM调用失败,已达到最大重试次数 {MAX_RETRIES}: {e}")
|
||
raise
|
||
logger.warning(f"LLM调用第 {retry_count} 次失败,{RETRY_DELAY}秒后重试: {e}")
|
||
time.sleep(RETRY_DELAY)
|
||
# 重新创建 model,关闭 streaming
|
||
model = get_ChatOpenAI(
|
||
model_name=llm_model_name,
|
||
temperature=temperature,
|
||
max_tokens=max_tokens,
|
||
callbacks=[],
|
||
streaming=False,
|
||
**kwargs
|
||
)
|
||
|
||
async def get_llm_model_response_async(
|
||
strategy_name: str,
|
||
llm_model_name: str,
|
||
template_prompt_name: str,
|
||
prompt_param_dict: dict,
|
||
temperature: float,
|
||
max_tokens: int,
|
||
) -> str:
|
||
'''异步调用大模型,实现不同策略'''
|
||
loop = asyncio.get_event_loop()
|
||
return await loop.run_in_executor(
|
||
None,
|
||
get_llm_model_response,
|
||
strategy_name,
|
||
llm_model_name,
|
||
template_prompt_name,
|
||
prompt_param_dict,
|
||
temperature,
|
||
max_tokens
|
||
)
|
||
|
||
|
||
async def get_llm_model_response_stream_openai(
|
||
type: int,
|
||
strategy_name: str,
|
||
llm_model_name: str,
|
||
template_prompt_name: str,
|
||
prompt_param_dict: dict,
|
||
temperature: float,
|
||
max_tokens: int,
|
||
) -> AsyncGenerator[str, None]:
|
||
# 校验 max_tokens
|
||
if max_tokens is not None and max_tokens > MAX_MAX_TOKENS:
|
||
max_tokens = MAX_MAX_TOKENS
|
||
|
||
retry_count = 0
|
||
|
||
while retry_count <= MAX_RETRIES:
|
||
try:
|
||
if type == 0 or type == 2:
|
||
kwargs = {}
|
||
kwargs["extra_body"] = {"chat_template_kwargs": {"enable_thinking": True}}
|
||
model = get_ChatOpenAI(
|
||
model_name=llm_model_name,
|
||
temperature=temperature,
|
||
max_tokens=max_tokens,
|
||
callbacks=[],
|
||
**kwargs
|
||
)
|
||
else:
|
||
model = get_ChatOpenAI(
|
||
model_name=llm_model_name,
|
||
temperature=temperature,
|
||
max_tokens=max_tokens,
|
||
callbacks=[],
|
||
)
|
||
# 调用流式接口
|
||
if type == 0:
|
||
prompt_template1 = get_prompt_template("agent_chat", "Think Test Bak")
|
||
if type == 1:
|
||
prompt_template1 = get_prompt_template("agent_chat", "Write Test Bak")
|
||
if type == 2:
|
||
prompt_template = get_prompt_template("llm_chat", template_prompt_name)
|
||
else:
|
||
prompt_template2 = get_prompt_template("agent_chat", template_prompt_name)
|
||
prompt_template = f"{prompt_template1}{prompt_template2}"
|
||
for key in prompt_param_dict:
|
||
prompt_template = prompt_template.replace(f"{{{{{key}}}}}", prompt_param_dict[key])
|
||
messages = [HumanMessage(content=prompt_template)]
|
||
# 跳过 <think>...</think> 块,其余照常 yield
|
||
# 兼容 R1 等输出 think 块的模型;非 think 模型不受影响
|
||
in_think = False
|
||
async for chunk in model.astream(messages):
|
||
text = chunk.content or ""
|
||
while text:
|
||
if not in_think:
|
||
i = text.find("<think>")
|
||
if i < 0:
|
||
yield text
|
||
break
|
||
if i > 0:
|
||
yield text[:i]
|
||
text = text[i + len("<think>"):]
|
||
in_think = True
|
||
else:
|
||
i = text.find("</think>")
|
||
if i < 0:
|
||
text = "" # 全在 think 块内,丢弃
|
||
else:
|
||
text = text[i + len("</think>"):]
|
||
in_think = False
|
||
return # 成功完成,退出函数
|
||
|
||
except Exception as e:
|
||
retry_count += 1
|
||
if retry_count > MAX_RETRIES:
|
||
logger.error(f"流式LLM调用失败,已达到最大重试次数 {MAX_RETRIES}: {e}")
|
||
raise
|
||
logger.warning(f"流式LLM调用第 {retry_count} 次失败,{RETRY_DELAY}秒后重试: {e}")
|
||
await asyncio.sleep(RETRY_DELAY)
|