[全量] 初始化项目代码、配置、文档及Agent协同harness
This commit is contained in:
106
langchain-chat/server/chat/check_language.py
Normal file
106
langchain-chat/server/chat/check_language.py
Normal file
@@ -0,0 +1,106 @@
|
||||
import re
|
||||
|
||||
from fastapi.responses import JSONResponse
|
||||
from configs import logger
|
||||
from pydantic import BaseModel
|
||||
|
||||
class CheckLanguage(BaseModel):
|
||||
query: str
|
||||
|
||||
def check_language(request: CheckLanguage) -> JSONResponse:
|
||||
'''
|
||||
语种检测逻辑:当中文占比大于50%,则判定翻译为英文,否则为中文
|
||||
'''
|
||||
query = request.query
|
||||
# 移除所有数字和空白字符
|
||||
query = re.sub(r'[\d\s]', '', query)
|
||||
|
||||
if not query: # 如果文本为空,返回报错
|
||||
return JSONResponse(content={
|
||||
"code": 500,
|
||||
"message": "输入为空,请重试"
|
||||
},
|
||||
status_code=500)
|
||||
|
||||
word_count = count_words(query)
|
||||
chinese_count = word_count['chinese_chars']
|
||||
english_word_count = word_count['english_words']
|
||||
total_count = word_count['total_count']
|
||||
|
||||
chinese_ratio = chinese_count / total_count if total_count > 0 else 0
|
||||
english_ratio = english_word_count / total_count if total_count > 0 else 0
|
||||
|
||||
logger.info(f"🔍[语言检测] 中文字符比例: {chinese_ratio:.2f}, 英文单词比例: {english_ratio:.2f}")
|
||||
|
||||
is_chinese = chinese_ratio > 0.5
|
||||
# is_english = english_ratio > 0.1
|
||||
try:
|
||||
if is_chinese:
|
||||
return JSONResponse(content={
|
||||
"code": 200,
|
||||
"message": "成功",
|
||||
"query": query,
|
||||
"to_lang": "en"
|
||||
})
|
||||
else:
|
||||
return JSONResponse(content={
|
||||
"code": 200,
|
||||
"message": "成功",
|
||||
"query": query,
|
||||
"to_lang": "zh-cn"
|
||||
})
|
||||
except Exception as e:
|
||||
return JSONResponse(content={"error": str(e)}, status_code=500)
|
||||
|
||||
def count_words(query: str) -> dict:
|
||||
# 统计中文字符
|
||||
chinese_chars = len(re.findall(r'[\u4e00-\u9fff]', query))
|
||||
|
||||
# 统计英文单词
|
||||
english_words = len(re.findall(r'[a-zA-Z]+', query))
|
||||
|
||||
return {
|
||||
"total_count": chinese_chars + english_words,
|
||||
"chinese_chars": chinese_chars,
|
||||
"english_words": english_words
|
||||
}
|
||||
|
||||
def get_supported_languages():
|
||||
'''
|
||||
获取当前支持的语言。\n
|
||||
code: 翻译时需要传入的to_lang参数的值\n
|
||||
name: 需要展示的语言名称
|
||||
'''
|
||||
language_mapping = {
|
||||
"zh-cn": "中文",
|
||||
"en": "英语",
|
||||
"ja": "日语",
|
||||
"ko": "韩语",
|
||||
"fr": "法语",
|
||||
"de": "德语",
|
||||
"es": "西班牙语",
|
||||
"it": "意大利语",
|
||||
"pt": "葡萄牙语",
|
||||
"ru": "俄语",
|
||||
"ar": "阿拉伯语",
|
||||
"hi": "印地语",
|
||||
"bn": "孟加拉语",
|
||||
"pa": "旁遮普语",
|
||||
"jv": "爪哇语",
|
||||
"ms": "马来语",
|
||||
"vi": "越南语",
|
||||
"th": "泰语",
|
||||
"tr": "土耳其语",
|
||||
"fa": "波斯语",
|
||||
"pl": "波兰语",
|
||||
"uk": "乌克兰语",
|
||||
"ro": "罗马尼亚语",
|
||||
"nl": "荷兰语",
|
||||
"el": "希腊语"
|
||||
}
|
||||
result = {
|
||||
"languages": [{"code": code, "name": name} for code, name in language_mapping.items()]
|
||||
}
|
||||
return JSONResponse(
|
||||
content=result
|
||||
)
|
||||
Reference in New Issue
Block a user