import aiohttp import asyncio import time from tqdm import tqdm import random import sys # 配置 # LLM_MODEL = "Qwen2-72B-Instruct" # LLM_ENDPOINT = "http://192.168.56.123:8822/v1" # LLM_MODEL = "deepseek-chat" # LLM_ENDPOINT = "https://api.deepseek.com/v1" LLM_MODEL = "qwen-max-2025-01-25" LLM_ENDPOINT = "https://dashscope.aliyuncs.com/compatible-mode/v1" TEMPERATURE = 0.7 # 确保每次返回的都不同 MAX_TOKENS = 8192 # 问题列表 questions = [ "为什么鸟儿会唱歌?", "为什么我们有季节?", "为什么星星会闪烁?", "为什么我们会打哈欠?", "为什么太阳是热的?", "为什么猫会咕噜咕噜叫?", "为什么狗会吠?", "为什么鱼会游泳?", "为什么我们有指纹?", "为什么我们会打喷嚏?", "为什么我们有眉毛?", "为什么我们有头发?", "为什么我们有指甲?", "为什么我们有牙齿?", "为什么我们有骨头?", "为什么我们有肌肉?", "为什么我们有血液?", "为什么我们有心脏?", "为什么我们有肺?", "为什么我们有大脑?", "为什么我们有皮肤?", "为什么我们有耳朵?", "为什么我们有眼睛?", "为什么我们有鼻子?", "为什么我们有嘴巴?", "为什么我们有舌头?", "为什么我们有胃?", "为什么我们有肠子?", "为什么我们有肝脏?", "为什么我们有肾脏?", "为什么我们有膀胱?", "为什么我们有胰腺?", "为什么我们有脾脏?", "为什么我们有胆囊?", "为什么我们有甲状腺?", "为什么我们有肾上腺?", "为什么我们有垂体?", "为什么我们有下丘脑?", "为什么我们有胸腺?", "为什么我们有淋巴结?", "为什么我们有脊髓?", "为什么我们有神经?", "为什么我们有循环系统?", "为什么我们有呼吸系统?", "为什么我们有消化系统?", "为什么我们有免疫系统?" ] def log_to_file(file, message): """将消息追加写入指定的文件""" with open(file, 'a', encoding='utf-8') as f: f.write(message + '\n') async def fetch(session, url, file=None): start_time = time.time() question = random.choice(questions) json_payload = { "model": LLM_MODEL, "messages": [ # {"role": "system", "content": "你的任务是学习和理解用户输入的文段,分析其中的实体关系,然后根据关系逻辑重新拟定你份合同,合同字数要在4000字以上。"}, # {"role": "system", "content": "你的使命是翻译用户输入的文段。注意,一定要翻译完整。"}, {"role": "system", "content": "你的任务是用专业学术语言,严谨的科学态度,全面的数据支持,完整详实地回答user的问题。"}, {"role": "user", "content": question} ], "temperature": TEMPERATURE, "max_tokens": MAX_TOKENS, "stream": False } headers = { "Content-Type": "application/json", # "Authorization": "Bearer sk-dba93353b0cc447ba55245e4f048c779" # deepseek "Authorization": "sk-8b498c0de2dc437aab8efa490d4021ba" # qwen } try: async with session.post(url, json=json_payload, headers=headers) as response: if response.status != 200: print(f"错误: 收到响应码 {response.status}") return 0, 0, 0 response_json = await response.json() end_time = time.time() request_time = end_time - start_time completion_tokens = 0 if 'usage' in response_json: usage = response_json['usage'] completion_tokens = usage.get('completion_tokens', 0) prompt_tokens = usage.get('prompt_tokens', 0) else: print("警告: 响应中缺少 'usage' 字段。") answer = "" if 'choices' in response_json and len(response_json['choices']) > 0: answer = response_json['choices'][0]['message']['content'] # completion_tokens = len(answer) / 1.5 # qwen # completion_tokens = len(answer) * 0.6 # deepseek else: print("警告: 响应中缺少 'choices' 字段或内容为空。") completion_tokens = 0 # 将输入输出写入文件(保持原有日志格式不变) if file: log_to_file(file, f"Q: {question}\nA: {answer}\n") # 打印输入和输出的token数 # print(f"输入token数: {input_tokens}, 输出token数: {output_tokens}") return prompt_tokens, completion_tokens, request_time except Exception as e: print(f"请求过程中发生异常: {e}") return 0, 0, 0 async def bound_fetch(sem, session, url, pbar, file=None): async with sem: result = await fetch(session, url, file=file) pbar.update(1) return result async def run(load_url, max_concurrent_requests, total_requests, output_file): sem = asyncio.Semaphore(max_concurrent_requests) timeout = aiohttp.ClientTimeout(total=6000, connect=6000, sock_read=6000, sock_connect=6000) async with aiohttp.ClientSession(timeout=timeout) as session: tasks = [] with tqdm(total=total_requests) as pbar: for _ in range(total_requests): task = asyncio.create_task(bound_fetch(sem, session, load_url, pbar, file=output_file)) tasks.append(task) results = await asyncio.gather(*tasks) # 聚合token数和响应时间 total_input_tokens = sum(result[0] for result in results) total_output_tokens = sum(result[1] for result in results) response_times = [result[2] for result in results] return total_input_tokens, total_output_tokens, response_times if __name__ == '__main__': if len(sys.argv) != 3: print("用法: python llm_test.py ") sys.exit(1) try: C = int(sys.argv[1]) # 最大并发数 N = int(sys.argv[2]) # 请求总数 except ValueError: print("错误: C 和 N 必须是整数。") sys.exit(1) url = f'{LLM_ENDPOINT}/chat/completions' output_file = 'A800_Qwen2.5-72Bint8_bench_.txt' with open(output_file, 'w', encoding='utf-8') as f: f.write('') # 清空文件内容 start_time = time.time() total_input_tokens, total_output_tokens, response_times = asyncio.run(run(url, C, N, output_file)) end_time = time.time() total_time = end_time - start_time avg_time_per_request = sum(response_times) / len(response_times) if response_times else 0 tokens_per_second = (total_output_tokens) / total_time if total_time > 0 else 0 final_output = ( "最终表现:\n" f" 输入token数 : {total_input_tokens:.2f}\n" f" 输出token数 : {total_output_tokens:.2f}\n" f" 并发数 : {C}\n" f" 总请求数 : {N}\n" f" 总耗时 : {total_time:.2f} 秒\n" f" 平均耗时 : {avg_time_per_request:.2f} 秒\n" f" 吞吐(QPS) : {tokens_per_second:.2f} tokens/s" ) print(final_output) log_to_file(output_file, final_output) # 将最终的表现也写入文件