106 lines
3.1 KiB
Python
106 lines
3.1 KiB
Python
import re
|
||
|
||
from fastapi.responses import JSONResponse
|
||
from configs import logger
|
||
from pydantic import BaseModel
|
||
|
||
class CheckLanguage(BaseModel):
|
||
query: str
|
||
|
||
def check_language(request: CheckLanguage) -> JSONResponse:
|
||
'''
|
||
语种检测逻辑:当中文占比大于50%,则判定翻译为英文,否则为中文
|
||
'''
|
||
query = request.query
|
||
# 移除所有数字和空白字符
|
||
query = re.sub(r'[\d\s]', '', query)
|
||
|
||
if not query: # 如果文本为空,返回报错
|
||
return JSONResponse(content={
|
||
"code": 500,
|
||
"message": "输入为空,请重试"
|
||
},
|
||
status_code=500)
|
||
|
||
word_count = count_words(query)
|
||
chinese_count = word_count['chinese_chars']
|
||
english_word_count = word_count['english_words']
|
||
total_count = word_count['total_count']
|
||
|
||
chinese_ratio = chinese_count / total_count if total_count > 0 else 0
|
||
english_ratio = english_word_count / total_count if total_count > 0 else 0
|
||
|
||
logger.info(f"🔍[语言检测] 中文字符比例: {chinese_ratio:.2f}, 英文单词比例: {english_ratio:.2f}")
|
||
|
||
is_chinese = chinese_ratio > 0.5
|
||
# is_english = english_ratio > 0.1
|
||
try:
|
||
if is_chinese:
|
||
return JSONResponse(content={
|
||
"code": 200,
|
||
"message": "成功",
|
||
"query": query,
|
||
"to_lang": "en"
|
||
})
|
||
else:
|
||
return JSONResponse(content={
|
||
"code": 200,
|
||
"message": "成功",
|
||
"query": query,
|
||
"to_lang": "zh-cn"
|
||
})
|
||
except Exception as e:
|
||
return JSONResponse(content={"error": str(e)}, status_code=500)
|
||
|
||
def count_words(query: str) -> dict:
|
||
# 统计中文字符
|
||
chinese_chars = len(re.findall(r'[\u4e00-\u9fff]', query))
|
||
|
||
# 统计英文单词
|
||
english_words = len(re.findall(r'[a-zA-Z]+', query))
|
||
|
||
return {
|
||
"total_count": chinese_chars + english_words,
|
||
"chinese_chars": chinese_chars,
|
||
"english_words": english_words
|
||
}
|
||
|
||
def get_supported_languages():
|
||
'''
|
||
获取当前支持的语言。\n
|
||
code: 翻译时需要传入的to_lang参数的值\n
|
||
name: 需要展示的语言名称
|
||
'''
|
||
language_mapping = {
|
||
"zh-cn": "中文",
|
||
"en": "英语",
|
||
"ja": "日语",
|
||
"ko": "韩语",
|
||
"fr": "法语",
|
||
"de": "德语",
|
||
"es": "西班牙语",
|
||
"it": "意大利语",
|
||
"pt": "葡萄牙语",
|
||
"ru": "俄语",
|
||
"ar": "阿拉伯语",
|
||
"hi": "印地语",
|
||
"bn": "孟加拉语",
|
||
"pa": "旁遮普语",
|
||
"jv": "爪哇语",
|
||
"ms": "马来语",
|
||
"vi": "越南语",
|
||
"th": "泰语",
|
||
"tr": "土耳其语",
|
||
"fa": "波斯语",
|
||
"pl": "波兰语",
|
||
"uk": "乌克兰语",
|
||
"ro": "罗马尼亚语",
|
||
"nl": "荷兰语",
|
||
"el": "希腊语"
|
||
}
|
||
result = {
|
||
"languages": [{"code": code, "name": name} for code, name in language_mapping.items()]
|
||
}
|
||
return JSONResponse(
|
||
content=result
|
||
) |