174 lines
6.5 KiB
Python
174 lines
6.5 KiB
Python
|
|
import os
|
|||
|
|
import asyncio
|
|||
|
|
import shutil
|
|||
|
|
import logging
|
|||
|
|
from pathlib import Path
|
|||
|
|
from pydantic import BaseModel, Field
|
|||
|
|
from fastapi.responses import FileResponse
|
|||
|
|
from fastapi import FastAPI, BackgroundTasks, UploadFile, File, Query, HTTPException
|
|||
|
|
from configs.translate_config import LANG_CODE_NAME, SUPPORTED_FILE_EXTENSIONS
|
|||
|
|
from server.translator_service.task_manager import TaskManager, TaskStatusEnum
|
|||
|
|
from server.translator_service.utils import get_storage_abspath, task_to_dict
|
|||
|
|
from server.translator_service.converter import doc, docx
|
|||
|
|
|
|||
|
|
# app = FastAPI(lifespan=lifespan)
|
|||
|
|
logger = logging.getLogger(__name__)
|
|||
|
|
|
|||
|
|
class TranslateResponse(BaseModel):
|
|||
|
|
task_id: str = Field(..., description="翻译任务 ID")
|
|||
|
|
status: TaskStatusEnum = Field(..., description="任务状态")
|
|||
|
|
|
|||
|
|
async def translator(task_id: str, task, cancel_event: asyncio.Event) -> TaskStatusEnum:
|
|||
|
|
"""
|
|||
|
|
ORM 版翻译函数:根据 TranslationTask 实例执行翻译并更新 task 属性
|
|||
|
|
"""
|
|||
|
|
# 目录初始化
|
|||
|
|
file_dir = os.path.dirname(task.file_path)
|
|||
|
|
tmp_dir = os.path.join(file_dir, "tmp")
|
|||
|
|
try:
|
|||
|
|
logger.info(f"开始翻译任务: {task_id}")
|
|||
|
|
|
|||
|
|
# 创建临时工作目录
|
|||
|
|
if os.path.exists(tmp_dir):
|
|||
|
|
shutil.rmtree(tmp_dir)
|
|||
|
|
os.makedirs(tmp_dir)
|
|||
|
|
|
|||
|
|
# 生成输出路径
|
|||
|
|
ext = Path(task.file_path).suffix.lower()
|
|||
|
|
output_path = os.path.join(file_dir, f"translated{ext}")
|
|||
|
|
task.output_path = output_path
|
|||
|
|
|
|||
|
|
# 定义进度回调
|
|||
|
|
def progress_callback(progress: float):
|
|||
|
|
if cancel_event.is_set():
|
|||
|
|
raise asyncio.CancelledError("任务已被取消")
|
|||
|
|
task.progress = progress
|
|||
|
|
logger.info(f"{task_id} 翻译进度: {progress:.2f}")
|
|||
|
|
|
|||
|
|
# 按文件类型执行
|
|||
|
|
if ext == ".docx":
|
|||
|
|
await docx.processor(
|
|||
|
|
input_path=task.file_path,
|
|||
|
|
output_path=output_path,
|
|||
|
|
lang_in=task.src_lang,
|
|||
|
|
lang_out=task.dst_lang,
|
|||
|
|
is_dual_language=task.is_dual,
|
|||
|
|
work_dir=tmp_dir,
|
|||
|
|
progress_callback=progress_callback,
|
|||
|
|
cancel_event=cancel_event,
|
|||
|
|
)
|
|||
|
|
elif ext == ".doc":
|
|||
|
|
converted = await doc.convert_doc_to_docx(task.file_path)
|
|||
|
|
if not converted:
|
|||
|
|
raise ValueError(f"无法转换 DOC 文件: {task.file_path}")
|
|||
|
|
await docx.processor(
|
|||
|
|
input_path=task.file_path,
|
|||
|
|
output_path=output_path,
|
|||
|
|
lang_in=task.src_lang,
|
|||
|
|
lang_out=task.dst_lang,
|
|||
|
|
is_dual_language=task.is_dual,
|
|||
|
|
work_dir=tmp_dir,
|
|||
|
|
progress_callback=progress_callback,
|
|||
|
|
cancel_event=cancel_event,
|
|||
|
|
)
|
|||
|
|
else:
|
|||
|
|
raise ValueError(f"不支持的文件类型: {ext}")
|
|||
|
|
|
|||
|
|
logger.info(f"翻译完成: {task_id}")
|
|||
|
|
return TaskStatusEnum.COMPLETED
|
|||
|
|
|
|||
|
|
except asyncio.CancelledError:
|
|||
|
|
logger.info(f"任务被取消: {task_id}")
|
|||
|
|
cancel_event.set()
|
|||
|
|
raise
|
|||
|
|
|
|||
|
|
except Exception as e:
|
|||
|
|
logger.error(f"翻译任务失败: {task_id}, 错误: {e}")
|
|||
|
|
return TaskStatusEnum.FAILED
|
|||
|
|
|
|||
|
|
finally:
|
|||
|
|
# 清理临时目录
|
|||
|
|
if os.path.exists(tmp_dir):
|
|||
|
|
shutil.rmtree(tmp_dir, ignore_errors=True)
|
|||
|
|
|
|||
|
|
|
|||
|
|
async def save_file_and_get_path(file: UploadFile, task_id: str) -> str:
|
|||
|
|
"""
|
|||
|
|
保存上传文件到任务专属目录,并返回文件路径
|
|||
|
|
"""
|
|||
|
|
# 生成并创建任务目录
|
|||
|
|
file_dir = get_storage_abspath(task_id)
|
|||
|
|
os.makedirs(file_dir, exist_ok=True)
|
|||
|
|
# 原始文件名与后缀
|
|||
|
|
original_suffix = Path(file.filename).suffix.lower()
|
|||
|
|
file_name_without_ext = Path(file.filename).stem
|
|||
|
|
# 拼接存储路径
|
|||
|
|
filename = f"{file_name_without_ext}{original_suffix}"
|
|||
|
|
file_path = os.path.join(file_dir, filename)
|
|||
|
|
# 写入磁盘
|
|||
|
|
content = await file.read()
|
|||
|
|
with open(file_path, 'wb') as f:
|
|||
|
|
f.write(content)
|
|||
|
|
return file_path
|
|||
|
|
|
|||
|
|
manager = TaskManager(translate_fn=translator)
|
|||
|
|
|
|||
|
|
|
|||
|
|
async def translate_file(
|
|||
|
|
background_tasks: BackgroundTasks,
|
|||
|
|
file: UploadFile = File(..., description="要翻译的文档文件,当前支持.DOC/.DOCX"),
|
|||
|
|
to_language: str = Query("en", description="目标语言代码"),
|
|||
|
|
src_language: str = Query("auto", description="源语言代码"),
|
|||
|
|
is_dual_language: bool = Query(True, description="是否输出双语对照的译文,默认为是"),
|
|||
|
|
) -> TranslateResponse:
|
|||
|
|
file_extension = os.path.splitext(file.filename)[1][1:].lower()
|
|||
|
|
if (
|
|||
|
|
to_language not in LANG_CODE_NAME
|
|||
|
|
or src_language not in LANG_CODE_NAME
|
|||
|
|
):
|
|||
|
|
raise HTTPException(status_code=400, detail="不支持的语言代码")
|
|||
|
|
if to_language == src_language:
|
|||
|
|
raise HTTPException(status_code=400, detail="源语言和目标语言不能相同")
|
|||
|
|
if to_language == "auto":
|
|||
|
|
raise HTTPException(status_code=400, detail="目标语言不能为自动")
|
|||
|
|
if not file.filename or not file.size:
|
|||
|
|
raise HTTPException(status_code=400, detail="文件不能为空")
|
|||
|
|
if file_extension not in SUPPORTED_FILE_EXTENSIONS:
|
|||
|
|
raise HTTPException(status_code=400, detail="不支持的文件类型")
|
|||
|
|
|
|||
|
|
# 先生成 task_id
|
|||
|
|
task_id = manager.generate_task_id()
|
|||
|
|
# await 保存文件拿到 file_path
|
|||
|
|
file_path = await save_file_and_get_path(file, task_id)
|
|||
|
|
# 创建任务
|
|||
|
|
manager.add_task(
|
|||
|
|
filename=file.filename,
|
|||
|
|
file_path=file_path,
|
|||
|
|
src_lang=src_language,
|
|||
|
|
dst_lang=to_language,
|
|||
|
|
is_dual=is_dual_language,
|
|||
|
|
background_tasks=background_tasks,
|
|||
|
|
task_id=task_id, # 传入刚生成的 ID
|
|||
|
|
)
|
|||
|
|
return TranslateResponse(task_id=task_id, status=TaskStatusEnum.PROCESSING)
|
|||
|
|
|
|||
|
|
|
|||
|
|
async def get_progress(task_id: str = Query(..., description="文件翻译接口获取到的任务ID task_id")):
|
|||
|
|
task = manager.get_task(task_id)
|
|||
|
|
if not task:
|
|||
|
|
raise HTTPException(404, "任务不存在")
|
|||
|
|
return task_to_dict(task)
|
|||
|
|
|
|||
|
|
|
|||
|
|
async def download_result(task_id: str = Query(..., description="文件翻译接口获取到的任务ID task_id")):
|
|||
|
|
task = manager.get_task(task_id)
|
|||
|
|
if task and task.status == TaskStatusEnum.COMPLETED:
|
|||
|
|
return FileResponse(task.output_path, filename=task.filename)
|
|||
|
|
raise HTTPException(404, "文件不存在或未完成")
|
|||
|
|
|
|||
|
|
|
|||
|
|
async def cancel_task(task_id: str = Query(..., description="文件翻译接口获取到的任务ID task_id")):
|
|||
|
|
if manager.cancel_task(task_id):
|
|||
|
|
return {"status": TaskStatusEnum.CANCELLED}
|
|||
|
|
raise HTTPException(404, "无法取消任务")
|