from fastapi import Body from configs import LLM_MODELS, TEMPERATURE, MAX_TOKENS from server.chat.policy_fun_iast import get_llm_model_response from typing import Optional from langchain.chains import LLMChain from langchain.prompts import ChatPromptTemplate from server.chat.utils import History from server.utils import wrap_done, get_ChatOpenAI, get_prompt_template from langchain.callbacks import AsyncIteratorCallbackHandler import asyncio from server.knowledge_base.kb_service.base import TextRank from configs.basic_config import * async def sentence_reference( # context: str = Body(..., description="上文全文", examples=[""]), paragraph_content: str = Body(..., description="用户框选的内容,<=2句", examples=[""]), temperature: float = Body(0.9, description="LLM 采样温度", ge=0.0, le=2.0), max_tokens: Optional[int] = Body(1024, description="限制LLM生成Token数量,默认None代表模型最大值"), ): logger.info(f"开始提示句子...") # 定义生成摘要的函数 # def generate_summary(text: str) -> str: # """使用 TextRank 生成文本摘要""" # if len(text) <= 20000: # summary = TextRank(text, num_sentences=60) # 生成60句话的摘要 # else: # summary = TextRank(text, num_sentences=80) # 生成80句话的摘要 # return summary # # 根据上下文长度决定是否生成摘要 # if len(context) >= 15000: # context_summary = generate_summary(context) # logger.info(f"生成撰写文稿的摘要: %s", context_summary) # else: # context_summary = context # 直接使用原文 # logger.info(f"撰写文稿小于15000字符,使用原文") # 定义一个函数来调用 get_llm_model_response,并异步封装它 async def get_sentence_reference(): try: # 使用 asyncio.to_thread 封装同步函数 result = await asyncio.to_thread( get_llm_model_response, strategy_name="sentence_reference", llm_model_name=LLM_MODELS[0], template_prompt_name="sentence_reference", prompt_param_dict={ # "context": context_summary, # 使用摘要或原文 "paragraph_content": paragraph_content }, temperature=temperature, max_tokens=max_tokens ) return result except Exception as e: logger.error("生成提示句子内容时出错: %s", e) return "出错了。。请重试。。" # 并行调用三次 get_llm_model_response try: responses = await asyncio.gather( get_sentence_reference(), get_sentence_reference(), get_sentence_reference() ) except Exception as e: logger.error("并行调用 LLM 模型时出错: %s", e) return "出错了。。请重试。。" # 拼接结果 final_output = "\n\n".join([f"句子{i + 1}:{response}" for i, response in enumerate(responses)]) logger.info("生成的最终拼接内容: %s", final_output) return final_output