106 lines
3.1 KiB
Python
106 lines
3.1 KiB
Python
|
|
import re
|
|||
|
|
|
|||
|
|
from fastapi.responses import JSONResponse
|
|||
|
|
from configs import logger
|
|||
|
|
from pydantic import BaseModel
|
|||
|
|
|
|||
|
|
class CheckLanguage(BaseModel):
|
|||
|
|
query: str
|
|||
|
|
|
|||
|
|
def check_language(request: CheckLanguage) -> JSONResponse:
|
|||
|
|
'''
|
|||
|
|
语种检测逻辑:当中文占比大于50%,则判定翻译为英文,否则为中文
|
|||
|
|
'''
|
|||
|
|
query = request.query
|
|||
|
|
# 移除所有数字和空白字符
|
|||
|
|
query = re.sub(r'[\d\s]', '', query)
|
|||
|
|
|
|||
|
|
if not query: # 如果文本为空,返回报错
|
|||
|
|
return JSONResponse(content={
|
|||
|
|
"code": 500,
|
|||
|
|
"message": "输入为空,请重试"
|
|||
|
|
},
|
|||
|
|
status_code=500)
|
|||
|
|
|
|||
|
|
word_count = count_words(query)
|
|||
|
|
chinese_count = word_count['chinese_chars']
|
|||
|
|
english_word_count = word_count['english_words']
|
|||
|
|
total_count = word_count['total_count']
|
|||
|
|
|
|||
|
|
chinese_ratio = chinese_count / total_count if total_count > 0 else 0
|
|||
|
|
english_ratio = english_word_count / total_count if total_count > 0 else 0
|
|||
|
|
|
|||
|
|
logger.info(f"🔍[语言检测] 中文字符比例: {chinese_ratio:.2f}, 英文单词比例: {english_ratio:.2f}")
|
|||
|
|
|
|||
|
|
is_chinese = chinese_ratio > 0.5
|
|||
|
|
# is_english = english_ratio > 0.1
|
|||
|
|
try:
|
|||
|
|
if is_chinese:
|
|||
|
|
return JSONResponse(content={
|
|||
|
|
"code": 200,
|
|||
|
|
"message": "成功",
|
|||
|
|
"query": query,
|
|||
|
|
"to_lang": "en"
|
|||
|
|
})
|
|||
|
|
else:
|
|||
|
|
return JSONResponse(content={
|
|||
|
|
"code": 200,
|
|||
|
|
"message": "成功",
|
|||
|
|
"query": query,
|
|||
|
|
"to_lang": "zh-cn"
|
|||
|
|
})
|
|||
|
|
except Exception as e:
|
|||
|
|
return JSONResponse(content={"error": str(e)}, status_code=500)
|
|||
|
|
|
|||
|
|
def count_words(query: str) -> dict:
|
|||
|
|
# 统计中文字符
|
|||
|
|
chinese_chars = len(re.findall(r'[\u4e00-\u9fff]', query))
|
|||
|
|
|
|||
|
|
# 统计英文单词
|
|||
|
|
english_words = len(re.findall(r'[a-zA-Z]+', query))
|
|||
|
|
|
|||
|
|
return {
|
|||
|
|
"total_count": chinese_chars + english_words,
|
|||
|
|
"chinese_chars": chinese_chars,
|
|||
|
|
"english_words": english_words
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
def get_supported_languages():
|
|||
|
|
'''
|
|||
|
|
获取当前支持的语言。\n
|
|||
|
|
code: 翻译时需要传入的to_lang参数的值\n
|
|||
|
|
name: 需要展示的语言名称
|
|||
|
|
'''
|
|||
|
|
language_mapping = {
|
|||
|
|
"zh-cn": "中文",
|
|||
|
|
"en": "英语",
|
|||
|
|
"ja": "日语",
|
|||
|
|
"ko": "韩语",
|
|||
|
|
"fr": "法语",
|
|||
|
|
"de": "德语",
|
|||
|
|
"es": "西班牙语",
|
|||
|
|
"it": "意大利语",
|
|||
|
|
"pt": "葡萄牙语",
|
|||
|
|
"ru": "俄语",
|
|||
|
|
"ar": "阿拉伯语",
|
|||
|
|
"hi": "印地语",
|
|||
|
|
"bn": "孟加拉语",
|
|||
|
|
"pa": "旁遮普语",
|
|||
|
|
"jv": "爪哇语",
|
|||
|
|
"ms": "马来语",
|
|||
|
|
"vi": "越南语",
|
|||
|
|
"th": "泰语",
|
|||
|
|
"tr": "土耳其语",
|
|||
|
|
"fa": "波斯语",
|
|||
|
|
"pl": "波兰语",
|
|||
|
|
"uk": "乌克兰语",
|
|||
|
|
"ro": "罗马尼亚语",
|
|||
|
|
"nl": "荷兰语",
|
|||
|
|
"el": "希腊语"
|
|||
|
|
}
|
|||
|
|
result = {
|
|||
|
|
"languages": [{"code": code, "name": name} for code, name in language_mapping.items()]
|
|||
|
|
}
|
|||
|
|
return JSONResponse(
|
|||
|
|
content=result
|
|||
|
|
)
|