Files
gangyan/langchain-chat/configs/translate_config.py

101 lines
2.3 KiB
Python
Raw Permalink Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

# ----------------
# 文件翻译相关配置
# ----------------
# 模型相关配置
from datetime import timedelta
import os
LLM_CONCURRENCY_LIMIT: int = 128 # 请求模型并发请求数 FIXME
LLM_RETRIES: int = 5 # 重试请求模型次数
LLM_ENDPOINT: str = "https://api.deepseek.com/v1"
LLM_MODEL: str = "deepseek-chat"
LLM_API_KEY: str = "sk-26858b50690a49828766fcfcf3290de9"
SUPPORTED_FILE_EXTENSIONS = frozenset(
["doc", "docx"]
)
# 中英文标点符号对照表
SYMBOLS_CN2WEST = frozenset(
[
("", ","),
("", "."),
("", "["),
("", "]"),
("", "("),
("", ")"),
("", " "),
("", " "),
("", "<"),
("", ">"),
("", ""),
("", ""),
("", "\\'"),
("", "\\'"),
("", "!"),
("", "?"),
("", ";"),
("", ":"),
("", ","),
]
)
# 所有标点符号列表
SYMBOLS = frozenset(
[symbol for symbol, _ in SYMBOLS_CN2WEST]
+ [west_symbol for _, west_symbol in SYMBOLS_CN2WEST]
+ [""]
)
# 用于语种校验
LANG_CODE_NAME = {
"auto": "auto",
"zh-cn": "中文",
"en": "英语",
"ja": "日语",
"ko": "韩语",
"fr": "法语",
"de": "德语",
"es": "西班牙语",
"it": "意大利语",
"pt": "葡萄牙语",
"ru": "俄语",
"ar": "阿拉伯语",
"hi": "印地语",
"bn": "孟加拉语",
"pa": "旁遮普语",
"jv": "爪哇语",
"ms": "马来语",
"vi": "越南语",
"th": "泰语",
"tr": "土耳其语",
"fa": "波斯语",
"pl": "波兰语",
"uk": "乌克兰语",
"ro": "罗马尼亚语",
"nl": "荷兰语",
"el": "希腊语"
}
# 数据库默认存储路径
TRANS_DB_ROOT_PATH = os.path.join(os.path.dirname(os.path.dirname(os.path.abspath(__file__))), "server", "translator_service", "translate_db", "translate_info.db")
# 确保目录存在
os.makedirs(os.path.dirname(TRANS_DB_ROOT_PATH), exist_ok=True)
# SQLAlchemy 数据库 URI
SQLALCHEMY_DATABASE_URI = f"sqlite:///{TRANS_DB_ROOT_PATH}"
# 任务最大重试次数
MAX_RETRIES = 3
# 重试任务过期时长(小时)
PROCESSING_TIMEOUT = timedelta(hours=48)
# 恢复任务扫描间隔时间(分钟)
RECOVERY_INTERVAL = 30
# 重试任务延迟时间(秒)
RETRY_DELAY = 5