diff --git a/.gitignore b/.gitignore index 389643a58..43658d24f 100644 --- a/.gitignore +++ b/.gitignore @@ -1,3 +1,8 @@ +# AI +.planning +.claude +.superpowers/* + # Experimental kernel files *.csv pdf2zh/kernel/PDFMathTranslate-next.git/*.csv @@ -178,3 +183,6 @@ cython_debug/ uv.lock *.pdf *.docx + +# Vue GUI build artifact (generated from submodule dist) +pdf2zh/static/ diff --git a/.gitmodules b/.gitmodules index a30f6578c..c5da8f09a 100644 --- a/.gitmodules +++ b/.gitmodules @@ -1,3 +1,6 @@ [submodule "vendor/PDFMathTranslate-next"] path = pdf2zh/kernel/PDFMathTranslate-next.git url = https://github.com/PDFMathTranslate/PDFMathTranslate-next.git +[submodule "pdf2zh/web"] + path = pdf2zh/web + url = https://github.com/PDFMathTranslate/PDFMathTranslate-Web.git diff --git a/pdf2zh/api.py b/pdf2zh/api.py new file mode 100644 index 000000000..15f5aefeb --- /dev/null +++ b/pdf2zh/api.py @@ -0,0 +1,829 @@ +"""Lightweight REST API server for pdf2zh. + +No Celery/Redis required — uses Python threading for background jobs. +Start with: pdf2zh --api +""" + +import hashlib +import io +import json +import logging +import os +import threading +import time +import uuid +from dataclasses import dataclass, field +from enum import Enum +from functools import wraps +from string import Template +from typing import Dict, Optional + +from flask import Flask, request, jsonify, send_file + +logger = logging.getLogger(__name__) + + +class _StatusFilter(logging.Filter): + """Suppress noisy werkzeug INFO access logs unless debug is enabled.""" + + def __init__(self, debug: bool): + super().__init__() + self.debug = debug + + def filter(self, record: logging.LogRecord) -> bool: + if self.debug: + return True + if record.name.startswith("status") and record.levelno < logging.WARNING: + return False + if record.name.startswith("v1/translate") and record.levelno < logging.WARNING: + return False + if record.name.startswith(" 200 -") and record.levelno < logging.WARNING: + return False + return True + + +def _configure_request_logging(debug: bool) -> None: + """Configure request/access logging visibility for Flask dev server.""" + # Direct logger-level guard for werkzeug loggers. + for name in ("werkzeug", "werkzeug.serving"): + logging.getLogger(name).setLevel(logging.INFO if debug else logging.WARNING) + + # Defensive handler filter: even if werkzeug resets its level internally, + # low-level access logs remain muted unless debug is explicitly enabled. + root_logger = logging.getLogger() + for handler in root_logger.handlers: + if getattr(handler, "_pdf2zh_status_filter_attached", False): + continue + handler.addFilter(_StatusFilter(debug=debug)) + handler._pdf2zh_status_filter_attached = True + + +# --------------------------------------------------------------------------- +# Auth +# --------------------------------------------------------------------------- + + +def require_auth(token: str): + """Decorator factory that validates Bearer token.""" + + def decorator(f): + @wraps(f) + def wrapper(*args, **kwargs): + auth = request.headers.get("Authorization", "") + if not auth.startswith("Bearer ") or auth[7:] != token: + return jsonify({"error": "unauthorized"}), 401 + return f(*args, **kwargs) + + return wrapper + + return decorator + + +# --------------------------------------------------------------------------- +# Job model +# --------------------------------------------------------------------------- + + +class JobStatus(str, Enum): + PENDING = "pending" + RUNNING = "running" + COMPLETED = "completed" + FAILED = "failed" + CANCELLED = "cancelled" + + +@dataclass +class Job: + id: str + status: JobStatus = JobStatus.PENDING + filename: str = "" + progress_current: int = 0 + progress_total: int = 0 + stage_name: str = "" + stage_progress: float = 0.0 + stage_current: int = 0 + stage_total: int = 0 + stage_event: str = "" + error: Optional[str] = None + result_mono: Optional[bytes] = field(default=None, repr=False) + result_dual: Optional[bytes] = field(default=None, repr=False) + created_at: float = field(default_factory=time.time) + params: Dict = field(default_factory=dict) + thread: Optional[threading.Thread] = field(default=None, repr=False) + cancel_event: Optional[threading.Event] = field(default=None, repr=False) + + def to_dict(self) -> dict: + has_mono_result = self.result_mono is not None + has_dual_result = self.result_dual is not None + return { + "id": self.id, + "status": self.status.value, + "filename": self.filename, + "progress": { + "current": self.progress_current, + "total": self.progress_total, + }, + "stage": { + "name": self.stage_name, + "progress": self.stage_progress, + "current": self.stage_current, + "total": self.stage_total, + "event": self.stage_event, + }, + "error": self.error, + "created_at": self.created_at, + "has_result": has_mono_result or has_dual_result, + "has_mono_result": has_mono_result, + "has_dual_result": has_dual_result, + } + + +# --------------------------------------------------------------------------- +# Job manager +# --------------------------------------------------------------------------- + + +class JobManager: + def __init__(self): + self._jobs: Dict[str, Job] = {} + self._lock = threading.Lock() + + def create(self, filename: str, params: dict) -> Job: + job_id = uuid.uuid4().hex[:12] + job = Job( + id=job_id, + filename=filename, + params=params, + cancel_event=threading.Event(), + ) + with self._lock: + self._jobs[job_id] = job + return job + + def get(self, job_id: str) -> Optional[Job]: + with self._lock: + return self._jobs.get(job_id) + + def list_all(self) -> list: + with self._lock: + return [j.to_dict() for j in self._jobs.values()] + + def cancel(self, job_id: str) -> bool: + with self._lock: + job = self._jobs.get(job_id) + if not job or job.status != JobStatus.RUNNING: + return False + job.cancel_event.set() + return True + + def reset(self): + with self._lock: + for job in self._jobs.values(): + if job.status == JobStatus.RUNNING and job.cancel_event: + job.cancel_event.set() + self._jobs.clear() + + def active_count(self) -> int: + with self._lock: + return sum(1 for j in self._jobs.values() if j.status == JobStatus.RUNNING) + + def total_count(self) -> int: + with self._lock: + return len(self._jobs) + + +# --------------------------------------------------------------------------- +# Translation worker +# --------------------------------------------------------------------------- + + +def _run_translation(job: Job, model): + import tqdm + import tempfile + from pathlib import Path + from pdf2zh.high_level import translate_stream + + def progress_callback(t: tqdm.tqdm): + job.progress_current = t.n + job.progress_total = t.total + + try: + job.status = JobStatus.RUNNING + params = job.params + backend = (params.get("backend") or "fast").strip() + + logger.debug( + "Job %s starting with params: lang_in=%r, lang_out=%r, service=%r, pages=%r, thread=%r", + job.id, + params.get("lang_in"), + params.get("lang_out"), + params.get("service"), + params.get("pages"), + params.get("thread"), + ) + + prompt = params.get("prompt") + if prompt: + prompt = Template(prompt) + + if backend == "precise": + # Use the v2/pdf2zh_next pipeline via the PreciseKernel subprocess. + from pdf2zh.kernel.registry import KernelRegistry + from pdf2zh.kernel.protocol import TranslateRequest + + kernel = KernelRegistry.get("precise") + + def v2_progress_callback(event: dict): + stage = event.get("stage") + if isinstance(stage, str): + job.stage_name = stage + event_type = event.get("event") + if isinstance(event_type, str): + job.stage_event = event_type + + stage_progress = event.get("stage_progress") + if isinstance(stage_progress, (int, float)): + # Normalize to a stable 0..100 API value. + stage_pct = ( + float(stage_progress) + if stage_progress > 1.0 + else float(stage_progress) * 100.0 + ) + job.stage_progress = max(0.0, min(100.0, stage_pct)) + + stage_current = event.get("stage_current") + if isinstance(stage_current, (int, float)): + job.stage_current = int(stage_current) + + stage_total = event.get("stage_total") + if isinstance(stage_total, (int, float)): + job.stage_total = int(stage_total) + + # Best-effort progress bridging. + # Prefer overall_progress (0..1 or 0..100), then fallback to + # stage_current/stage_total or stage_progress. + overall = event.get("overall_progress") + pct: int | None = None + if isinstance(overall, (int, float)): + pct = int(overall * 100) if overall <= 1.0 else int(overall) + else: + stage_current = event.get("stage_current") + stage_total = event.get("stage_total") + stage_progress = event.get("stage_progress") + if ( + isinstance(stage_current, (int, float)) + and isinstance(stage_total, (int, float)) + and stage_total > 0 + ): + pct = int((stage_current / stage_total) * 100) + elif isinstance(stage_progress, (int, float)): + pct = int(stage_progress if stage_progress > 1.0 else stage_progress * 100) + if pct is None: + return + job.progress_current = max(0, min(100, pct)) + job.progress_total = 100 + + # Persist upload bytes to a temp file for pdf2zh_next. + with tempfile.TemporaryDirectory(prefix="pdf2zh-api-") as td: + td_path = Path(td) + input_path = td_path / "input.pdf" + input_path.write_bytes(params["stream"]) + + out_dir = td_path / "out" + out_dir.mkdir(parents=True, exist_ok=True) + + req = TranslateRequest( + files=[str(input_path)], + output=str(out_dir), + pages=params.get("pages"), + lang_in=params.get("lang_in", ""), + lang_out=params.get("lang_out", ""), + service=params.get("service", ""), + thread=params.get("thread", 4), + vfont=params.get("vfont", ""), + vchar=params.get("vchar", ""), + envs=params.get("envs") or {}, + prompt=( + prompt.template + if prompt is not None and hasattr(prompt, "template") + else (prompt if isinstance(prompt, str) else None) + ), + skip_subset_fonts=params.get("skip_subset_fonts", False), + ignore_cache=params.get("ignore_cache", False), + ) + + results = kernel.translate( + req, callback=v2_progress_callback, cancellation_event=job.cancel_event + ) + if not results: + raise RuntimeError("Precise kernel returned no results") + + mono_path = results[0].mono_pdf + dual_path = results[0].dual_pdf + doc_mono = Path(mono_path).read_bytes() if mono_path else None + doc_dual = Path(dual_path).read_bytes() if dual_path else None + else: + # Default: fast in-process pipeline. + doc_mono, doc_dual = translate_stream( + stream=params["stream"], + pages=params.get("pages"), + lang_in=params.get("lang_in", ""), + lang_out=params.get("lang_out", ""), + service=params.get("service", ""), + thread=params.get("thread", 4), + vfont=params.get("vfont", ""), + vchar=params.get("vchar", ""), + callback=progress_callback, + cancellation_event=job.cancel_event, + model=model, + envs=params.get("envs"), + prompt=prompt, + skip_subset_fonts=params.get("skip_subset_fonts", False), + ignore_cache=params.get("ignore_cache", False), + ) + + if job.cancel_event.is_set(): + job.status = JobStatus.CANCELLED + else: + # Some pipelines don't emit a final "100%" progress event. + # Ensure the API reports completion deterministically. + if job.progress_total and job.progress_total > 0: + job.progress_current = job.progress_total + else: + job.progress_current = 100 + job.progress_total = 100 + logger.debug( + "Job %s finished: mono=%d bytes, dual=%d bytes, input=%d bytes", + job.id, + len(doc_mono) if doc_mono else 0, + len(doc_dual) if doc_dual else 0, + len(params.get("stream", b"")), + ) + job.result_mono = doc_mono + job.result_dual = doc_dual + job.status = JobStatus.COMPLETED + except Exception as e: + if job.cancel_event and job.cancel_event.is_set(): + job.status = JobStatus.CANCELLED + else: + job.status = JobStatus.FAILED + job.error = str(e) + logger.exception(f"Translation job {job.id} failed") + finally: + job.params.pop("stream", None) + + +# --------------------------------------------------------------------------- +# System resource helpers +# --------------------------------------------------------------------------- + + +def _get_cpu_info() -> dict: + """Return CPU usage and memory stats.""" + import platform + + info = { + "arch": platform.machine(), + "cores": os.cpu_count(), + } + try: + import psutil + + info["usage_percent"] = psutil.cpu_percent(interval=0.1) + mem = psutil.virtual_memory() + info["memory"] = { + "total_mb": round(mem.total / 1024 / 1024), + "used_mb": round(mem.used / 1024 / 1024), + "percent": mem.percent, + } + except ImportError: + info["usage_percent"] = None + info["memory"] = None + return info + + +def _get_gpu_info() -> list: + """Return GPU utilisation when possible (NVIDIA via pynvml, else empty).""" + gpus = [] + try: + import pynvml + + pynvml.nvmlInit() + count = pynvml.nvmlDeviceGetCount() + for i in range(count): + h = pynvml.nvmlDeviceGetHandleByIndex(i) + name = pynvml.nvmlDeviceGetName(h) + if isinstance(name, bytes): + name = name.decode() + mem = pynvml.nvmlDeviceGetMemoryInfo(h) + util = pynvml.nvmlDeviceGetUtilizationRates(h) + gpus.append( + { + "index": i, + "name": name, + "gpu_util_percent": util.gpu, + "memory_total_mb": round(mem.total / 1024 / 1024), + "memory_used_mb": round(mem.used / 1024 / 1024), + "memory_percent": ( + round(mem.used / mem.total * 100, 1) if mem.total else 0 + ), + } + ) + pynvml.nvmlShutdown() + except Exception: + pass + return gpus + + +# --------------------------------------------------------------------------- +# Frontend → backend env-key mapping +# --------------------------------------------------------------------------- + +# The web frontend uses lowercase keys (e.g. "siliconflow_api_key") while +# each translator class expects its own uppercase keys (e.g. "SILICON_API_KEY"). +# This table maps frontend names to backend names so set_envs() can find them. +_ENV_KEY_MAP: dict[str, str] = { + # OpenAI + "openai_api_key": "OPENAI_API_KEY", + "openai_model": "OPENAI_MODEL", + "openai_base_url": "OPENAI_BASE_URL", + # Azure OpenAI + "azure_openai_api_key": "AZURE_OPENAI_API_KEY", + "azure_openai_base_url": "AZURE_OPENAI_BASE_URL", + "azure_openai_model": "AZURE_OPENAI_MODEL", + "azure_openai_api_version": "AZURE_OPENAI_API_VERSION", + # DeepSeek + "deepseek_api_key": "DEEPSEEK_API_KEY", + "deepseek_model": "DEEPSEEK_MODEL", + # Ollama + "ollama_host": "OLLAMA_HOST", + "ollama_model": "OLLAMA_MODEL", + # Xinference + "xinference_host": "XINFERENCE_HOST", + "xinference_model": "XINFERENCE_MODEL", + # ModelScope + "modelscope_api_key": "MODELSCOPE_API_KEY", + "modelscope_model": "MODELSCOPE_MODEL", + # Zhipu + "zhipu_api_key": "ZHIPU_API_KEY", + "zhipu_model": "ZHIPU_MODEL", + # SiliconFlow + "siliconflow_api_key": "SILICON_API_KEY", + "siliconflow_model": "SILICON_MODEL", + # Gemini + "gemini_api_key": "GEMINI_API_KEY", + "gemini_model": "GEMINI_MODEL", + # Azure Translator + "azure_api_key": "AZURE_API_KEY", + "azure_endpoint": "AZURE_ENDPOINT", + # Tencent + "tencentcloud_secret_id": "TENCENTCLOUD_SECRET_ID", + "tencentcloud_secret_key": "TENCENTCLOUD_SECRET_KEY", + # AnythingLLM + "anythingllm_apikey": "AnythingLLM_APIKEY", + "anythingllm_url": "AnythingLLM_URL", + # Dify + "dify_apikey": "DIFY_API_KEY", + "dify_url": "DIFY_API_URL", + # Grok + "grok_api_key": "GROK_API_KEY", + "grok_model": "GROK_MODEL", + # Groq + "groq_api_key": "GROQ_API_KEY", + "groq_model": "GROQ_MODEL", + # QwenMt + "qwenmt_api_key": "ALI_API_KEY", + "qwenmt_model": "ALI_MODEL", + "qwenmt_base_url": "ALI_BASE_URL", + # OpenAI-compatible + "openai_compatible_api_key": "OPENAILIKED_API_KEY", + "openai_compatible_base_url": "OPENAILIKED_BASE_URL", + "openai_compatible_model": "OPENAILIKED_MODEL", + # Aliyun DashScope + "aliyun_dashscope_api_key": "ALI_API_KEY", + "aliyun_dashscope_model": "ALI_MODEL", + "aliyun_dashscope_base_url": "ALI_BASE_URL", + # DeepL + "deepl_auth_key": "DEEPL_AUTH_KEY", + # ClaudeCode + "claude_code_path": "CLAUDE_CODE_PATH", + "claude_code_model": "CLAUDE_CODE_MODEL", +} + +_ENV_KEY_MAP_PRECISE: dict[str, str] = { + # Most env keys match, but some providers use different naming in pdf2zh_next. + **_ENV_KEY_MAP, + # SiliconFlow (v2 expects SILICONFLOW_* keys) + "siliconflow_api_key": "SILICONFLOW_API_KEY", + "siliconflow_model": "SILICONFLOW_MODEL", + # Tencent (v2 expects TENCENT_* keys) + "tencentcloud_secret_id": "TENCENT_SECRET_ID", + "tencentcloud_secret_key": "TENCENT_SECRET_KEY", + # AnythingLLM (v2 expects ANYTHINGLLM_* keys) + "anythingllm_apikey": "ANYTHINGLLM_API_KEY", + "anythingllm_url": "ANYTHINGLLM_API_URL", +} + + +def _remap_envs(envs: dict | None, *, backend: str) -> dict | None: + """Remap frontend env key names to backend translator env key names.""" + if not envs: + return envs + remapped = {} + key_map = _ENV_KEY_MAP_PRECISE if backend == "precise" else _ENV_KEY_MAP + for k, v in envs.items(): + backend_key = key_map.get(k, k) + remapped[backend_key] = v + return remapped + + +# --------------------------------------------------------------------------- +# Flask app factory & routes +# --------------------------------------------------------------------------- + + +def create_api_app(token: Optional[str], model) -> tuple: + """Create Flask app and JobManager. Returns (app, jobs).""" + app = Flask("pdf2zh-api") + app.config["MAX_CONTENT_LENGTH"] = 100 * 1024 * 1024 # 100 MB + jobs = JobManager() + auth = require_auth(token) if token else lambda f: f + + @app.errorhandler(404) + def not_found(e): + return jsonify({"error": "not found"}), 404 + + @app.errorhandler(500) + def internal_error(e): + return jsonify({"error": "internal server error"}), 500 + + @app.route("/v1/translate", methods=["POST"]) + @auth + def create_translation(): + if "file" not in request.files: + return jsonify({"error": "no file uploaded"}), 400 + file = request.files["file"] + stream = file.stream.read() + if not stream: + return jsonify({"error": "empty file"}), 400 + + # The frontend may send params as top-level form fields OR inside + # a nested JSON "data" field. Merge both sources, preferring + # explicit top-level form values over the JSON blob. + data = {} + if request.form.get("data"): + try: + data = json.loads(request.form["data"]) + except json.JSONDecodeError: + return jsonify({"error": "invalid JSON in 'data' field"}), 400 + + def _val(key, default=""): + """Return a form field if present, else fall back to data JSON.""" + v = request.form.get(key) + if v is not None and v != "": + return v + return data.get(key, default) + + backend = _val("backend", "fast") + + # Map display names (e.g. "Simplified Chinese") to language codes + # (e.g. "zh"). The frontend stores and sends display names; the + # translation engine expects ISO-style codes. + _LANG_MAP = { + "Simplified Chinese": "zh", + "Traditional Chinese": "zh-TW", + "English": "en", + "French": "fr", + "German": "de", + "Japanese": "ja", + "Korean": "ko", + "Russian": "ru", + "Spanish": "es", + "Italian": "it", + } + + def _resolve_lang(raw: str) -> str: + return _LANG_MAP.get(raw, raw) # pass through if already a code + + # Default service per backend when none specified + service = _val("service", "") + if not service: + service = "google" if backend == "fast" else "siliconflowfree" + + # Validate service name against registry + from pdf2zh.services import SERVICE_BY_NAME + + if service not in SERVICE_BY_NAME: + return jsonify({"error": f"unknown service: {service!r}"}), 400 + + # Parse pages — may arrive as JSON list or comma-separated string + raw_pages = _val("pages", None) + if isinstance(raw_pages, str): + try: + raw_pages = json.loads(raw_pages) + except (json.JSONDecodeError, TypeError): + raw_pages = None + + raw_thread = _val("thread", 4) + if isinstance(raw_thread, str): + try: + raw_thread = int(raw_thread) + except ValueError: + raw_thread = 4 + + params = { + "stream": stream, + "pages": raw_pages, + "lang_in": _resolve_lang(_val("lang_in", "")), + "lang_out": _resolve_lang(_val("lang_out", "")), + "service": service, + "thread": raw_thread, + "vfont": _val("vfont", ""), + "vchar": _val("vchar", ""), + "envs": _remap_envs(data.get("envs"), backend=backend), + "prompt": _val("prompt", None), + "skip_subset_fonts": data.get("skip_subset_fonts", False), + "ignore_cache": data.get("ignore_cache", False), + "backend": backend, + } + + job = jobs.create(filename=file.filename or "upload.pdf", params=params) + + # Resolve model from KernelRegistry when available, fall back to + # the model passed at server startup. + try: + from pdf2zh.kernel.registry import KernelRegistry + + kernel = KernelRegistry.get(backend) + run_model = getattr(kernel, "model", model) + except Exception: + run_model = model + + t = threading.Thread( + target=_run_translation, + args=(job, run_model), + daemon=True, + ) + job.thread = t + t.start() + return jsonify({"id": job.id}), 202 + + @app.route("/v1/translate/", methods=["GET"]) + @auth + def get_translation(job_id: str): + job = jobs.get(job_id) + if not job: + return jsonify({"error": "job not found"}), 404 + return jsonify(job.to_dict()) + + @app.route("/v1/translate//download/", methods=["GET"]) + @auth + def download_result(job_id: str, fmt: str): + if fmt not in ("mono", "dual"): + return jsonify({"error": "format must be 'mono' or 'dual'"}), 400 + job = jobs.get(job_id) + if not job: + return jsonify({"error": "job not found"}), 404 + if job.status != JobStatus.COMPLETED: + return ( + jsonify({"error": "job not completed", "status": job.status.value}), + 400, + ) + data = job.result_mono if fmt == "mono" else job.result_dual + if not data: + return jsonify({"error": "result not available"}), 404 + basename = ( + job.filename.rsplit(".", 1)[0] if "." in job.filename else job.filename + ) + return send_file( + io.BytesIO(data), + mimetype="application/pdf", + download_name=f"{basename}-{fmt}.pdf", + ) + + @app.route("/v1/status", methods=["GET"]) + @auth + def health_status(): + logger.debug("Status requested") + return jsonify( + { + "status": "ok", + "active_jobs": jobs.active_count(), + "total_jobs": jobs.total_count(), + "cpu": _get_cpu_info(), + "gpu": _get_gpu_info(), + } + ) + + @app.route("/v1/list", methods=["GET"]) + @auth + def list_jobs(): + return jsonify({"jobs": jobs.list_all()}) + + @app.route("/v1/translate//stop", methods=["POST"]) + @auth + def stop_translation(job_id: str): + job = jobs.get(job_id) + if not job: + return jsonify({"error": "job not found"}), 404 + if jobs.cancel(job_id): + return jsonify({"message": "cancellation requested", "id": job_id}) + return jsonify({"error": "job is not running", "status": job.status.value}), 400 + + @app.route("/v1/reboot", methods=["POST"]) + @auth + def reboot(): + jobs.reset() + return jsonify({"message": "server state reset"}) + + @app.route("/v1/version", methods=["GET"]) + def get_version(): + from pdf2zh.kernel.legacy import LegacyKernel + from pdf2zh.kernel.precise import PreciseKernel + + fast = LegacyKernel() + precise = PreciseKernel() + return jsonify( + { + "version": fast.version, + "backends": { + "fast": {"version": fast.version, "available": fast.is_available()}, + "precise": { + "version": precise.version if precise.is_available() else None, + "available": precise.is_available(), + }, + }, + } + ) + + @app.route("/v1/config", methods=["GET"]) + def get_config(): + from pdf2zh.kernel.legacy import LegacyKernel + from pdf2zh.kernel.precise import PreciseKernel + from pdf2zh.services import SERVICES + + fast = LegacyKernel() + precise = PreciseKernel() + return jsonify( + { + "services": [ + { + "display": s.display, + "value": s.name, + "custom_prompt": s.custom_prompt, + } + for s in SERVICES + ], + "languages": { + "Simplified Chinese": "zh", + "Traditional Chinese": "zh-TW", + "English": "en", + "French": "fr", + "German": "de", + "Japanese": "ja", + "Korean": "ko", + "Russian": "ru", + "Spanish": "es", + "Italian": "it", + }, + "backends": { + "fast": {"available": fast.is_available(), "version": fast.version}, + "precise": { + "available": precise.is_available(), + "version": precise.version if precise.is_available() else None, + }, + }, + "default_backend": "fast", + } + ) + + return app, jobs + + +# --------------------------------------------------------------------------- +# Server entry point +# --------------------------------------------------------------------------- + + +def run_api_server( + host: str = "127.0.0.1", + port: int = 8787, + token: Optional[str] = None, + model=None, + debug: bool = False, +): + _configure_request_logging(debug) + + if token is None: + token = hashlib.sha1(os.urandom(32)).hexdigest() + logger.info(f"Generated API token: {token}") + print(f"\n{'=' * 60}") + print(f" API Token: {token}") + print(f" Keep this token secret. Use it in requests as:") + print(f" Authorization: Bearer {token}") + print(f"{'=' * 60}\n") + + app, _jobs = create_api_app(token=token, model=model) + logger.info(f"Starting API server on {host}:{port}") + print(f"API server listening on http://{host}:{port}") + app.run(host=host, port=port, threaded=True, debug=debug) diff --git a/pdf2zh/converter.py b/pdf2zh/converter.py index 28674aea2..aa4d35e5b 100644 --- a/pdf2zh/converter.py +++ b/pdf2zh/converter.py @@ -36,6 +36,7 @@ OpenAIlikedTranslator, OpenAITranslator, QwenMtTranslator, + SiliconFlowFreeTranslator, SiliconTranslator, TencentTranslator, XinferenceTranslator, @@ -43,6 +44,35 @@ X302AITranslator, ) +_TRANSLATOR_CLASSES = [ + GoogleTranslator, + BingTranslator, + DeepLTranslator, + DeepLXTranslator, + OllamaTranslator, + XinferenceTranslator, + AzureOpenAITranslator, + OpenAITranslator, + ZhipuTranslator, + ModelScopeTranslator, + SiliconTranslator, + SiliconFlowFreeTranslator, + GeminiTranslator, + AzureTranslator, + TencentTranslator, + DifyTranslator, + AnythingLLMTranslator, + ArgosTranslator, + GrokTranslator, + GroqTranslator, + DeepseekTranslator, + MiniMaxTranslator, + OpenAIlikedTranslator, + QwenMtTranslator, + X302AITranslator, +] +TRANSLATOR_BY_NAME = {cls.name: cls for cls in _TRANSLATOR_CLASSES} + log = logging.getLogger(__name__) @@ -155,17 +185,17 @@ def __init__( self.noto = noto self.translator: BaseTranslator = None # e.g. "ollama:gemma2:9b" -> ["ollama", "gemma2:9b"] + if not service: + service = "google" param = service.split(":", 1) service_name = param[0] service_model = param[1] if len(param) > 1 else None if not envs: envs = {} - for translator in [GoogleTranslator, BingTranslator, DeepLTranslator, DeepLXTranslator, OllamaTranslator, XinferenceTranslator, AzureOpenAITranslator, - OpenAITranslator, ZhipuTranslator, ModelScopeTranslator, SiliconTranslator, GeminiTranslator, AzureTranslator, TencentTranslator, DifyTranslator, AnythingLLMTranslator, ArgosTranslator, GrokTranslator, GroqTranslator, DeepseekTranslator, MiniMaxTranslator, OpenAIlikedTranslator, QwenMtTranslator, X302AITranslator]: - if service_name == translator.name: - self.translator = translator(lang_in, lang_out, service_model, envs=envs, prompt=prompt, ignore_cache=ignore_cache) - if not self.translator: - raise ValueError("Unsupported translation service") + translator_cls = TRANSLATOR_BY_NAME.get(service_name) + if translator_cls is None: + raise ValueError(f"Unsupported translation service: {service_name!r}") + self.translator = translator_cls(lang_in, lang_out, service_model, envs=envs, prompt=prompt, ignore_cache=ignore_cache) def receive_layout(self, ltpage: LTPage): # 段落 diff --git a/pdf2zh/gui.py b/pdf2zh/gui.py index 9fb4764af..2e87b1e9c 100644 --- a/pdf2zh/gui.py +++ b/pdf2zh/gui.py @@ -35,6 +35,7 @@ ModelScopeTranslator, OllamaTranslator, OpenAITranslator, + SiliconFlowFreeTranslator, SiliconTranslator, TencentTranslator, XinferenceTranslator, @@ -83,6 +84,7 @@ def __getattr__(self, name): "Zhipu": ZhipuTranslator, "ModelScope": ModelScopeTranslator, "Silicon": SiliconTranslator, + "SiliconFlow Free": SiliconFlowFreeTranslator, "Gemini": GeminiTranslator, "Azure": AzureTranslator, "Tencent": TencentTranslator, diff --git a/pdf2zh/gui_vue.py b/pdf2zh/gui_vue.py new file mode 100644 index 000000000..f43ca3a9d --- /dev/null +++ b/pdf2zh/gui_vue.py @@ -0,0 +1,49 @@ +"""Vue GUI launcher — serves pre-built Vue SPA via Flask.""" + +import logging +import os +import threading +import webbrowser + +logger = logging.getLogger(__name__) + +STATIC_DIR = os.path.join(os.path.dirname(__file__), "static") + + +def launch_vue_gui(host="127.0.0.1", port=8787, model=None): + """Start Flask server serving the Vue SPA and API endpoints.""" + try: + from flask import send_from_directory + except ImportError: + raise ImportError( + "Flask is required for the Vue GUI. " + "Install with: pip install pdf2zh[vue]" + ) + + from pdf2zh.api import create_api_app + + if not os.path.isfile(os.path.join(STATIC_DIR, "index.html")): + raise FileNotFoundError( + "Vue GUI not found at %s. " + "Install with: pip install pdf2zh[vue]" % STATIC_DIR + ) + + app, _jobs = create_api_app(token=None, model=model) + + @app.route("/") + def index(): + return send_from_directory(STATIC_DIR, "index.html") + + @app.route("/") + def static_files(path): + file_path = os.path.join(STATIC_DIR, path) + if os.path.isfile(file_path): + return send_from_directory(STATIC_DIR, path) + # SPA fallback for client-side routing + return send_from_directory(STATIC_DIR, "index.html") + + url = f"http://{host}:{port}" + threading.Timer(1.0, webbrowser.open, args=[url]).start() + + print(f"Vue GUI running at {url}") + app.run(host=host, port=port, threaded=True) diff --git a/pdf2zh/kernel/PDFMathTranslate-next.git b/pdf2zh/kernel/PDFMathTranslate-next.git index 61a6b68cc..3538a8195 160000 --- a/pdf2zh/kernel/PDFMathTranslate-next.git +++ b/pdf2zh/kernel/PDFMathTranslate-next.git @@ -1 +1 @@ -Subproject commit 61a6b68ccecf277bb02eb783c5f0ae4e4ce72b9f +Subproject commit 3538a8195d8379fe3fb4a0117c88d15c5b7b5e89 diff --git a/pdf2zh/kernel/precise.py b/pdf2zh/kernel/precise.py index 73f8ba3a3..a85944a6c 100644 --- a/pdf2zh/kernel/precise.py +++ b/pdf2zh/kernel/precise.py @@ -1,4 +1,4 @@ -"""Precise kernel adapter — runs pdf2zh_next in an isolated subprocess/venv.""" +"""Precise kernel adapter — runs pip-installed pdf2zh_next in an isolated venv.""" from __future__ import annotations @@ -16,9 +16,9 @@ logger = logging.getLogger(__name__) -# Resolve paths relative to the kernel package directory -_SUBMODULE_DIR = Path(__file__).resolve().parent / "PDFMathTranslate-next.git" -_VENV_DIR = _SUBMODULE_DIR / ".venv" +# Use a dedicated user cache venv for precise kernel runtime. +_CACHE_DIR = Path.home() / ".cache" / "pdf2zh" +_VENV_DIR = _CACHE_DIR / "precise-kernel-venv" _WORKER_SCRIPT = Path(__file__).resolve().parent / "v2_worker.py" @@ -50,31 +50,18 @@ def version(self) -> str: capture_output=True, text=True, timeout=30, - cwd=str(_SUBMODULE_DIR), ) return result.stdout.strip() or "unknown" except Exception: return "unknown" def is_available(self) -> bool: - """Check if submodule exists and venv is initialized.""" - return ( - _SUBMODULE_DIR.is_dir() - and (_SUBMODULE_DIR / "pyproject.toml").exists() - and Path(_venv_python()).exists() - ) + """Check if venv exists and pdf2zh_next is importable.""" + return Path(_venv_python()).exists() and self._package_importable() def ensure_venv(self) -> None: - """Create venv and install pdf2zh_next if not already set up.""" - if ( - not _SUBMODULE_DIR.is_dir() - or not (_SUBMODULE_DIR / "pyproject.toml").exists() - ): - raise RuntimeError( - "PDFMathTranslate-next submodule not found. " - "Run: git submodule update --init pdf2zh/kernel/PDFMathTranslate-next.git" - ) - + """Create venv and install/upgrade pdf2zh_next from pip.""" + _CACHE_DIR.mkdir(parents=True, exist_ok=True) venv_exists = Path(_venv_python()).exists() if venv_exists and self._package_importable(): @@ -88,12 +75,18 @@ def ensure_venv(self) -> None: timeout=60, ) - logger.info("Installing pdf2zh_next into venv...") + logger.info("Installing/upgrading pdf2zh-next from pip into precise venv...") subprocess.run( - [_venv_python(), "-m", "pip", "install", "-e", str(_SUBMODULE_DIR)], + [ + _venv_python(), + "-m", + "pip", + "install", + "--upgrade", + "pdf2zh-next", + ], check=True, timeout=300, - cwd=str(_SUBMODULE_DIR), ) logger.info("Precise kernel venv ready.") @@ -105,8 +98,6 @@ def _package_importable(self) -> bool: [_venv_python(), "-c", "import pdf2zh_next"], capture_output=True, timeout=30, - cwd=str(_SUBMODULE_DIR), - env={**os.environ, "PYTHONPATH": str(_SUBMODULE_DIR)}, ) return result.returncode == 0 except Exception: @@ -115,7 +106,6 @@ def _package_importable(self) -> bool: def _build_subprocess_env(self, request: TranslateRequest) -> dict[str, str]: """Build environment for the subprocess with PDF2ZH_ prefixed vars.""" env = os.environ.copy() - env["PYTHONPATH"] = str(_SUBMODULE_DIR) env.update(request_to_env(request)) return env @@ -140,7 +130,6 @@ def translate( stdout=subprocess.PIPE, stderr=subprocess.PIPE, text=True, - cwd=str(_SUBMODULE_DIR), env=env, ) @@ -152,19 +141,20 @@ def translate( proc.stdin.write(input_json) proc.stdin.close() - for line in proc.stderr: - line = line.strip() - if not line: - continue - stderr_lines.append(line) + for raw_line in proc.stderr: + line = raw_line.rstrip("\r\n") + if line: + stderr_lines.append(line) try: - event = json.loads(line) + event = self._parse_progress_event(line) if not isinstance(event, dict): raise ValueError("not a JSON object") if callback: callback(event) except (json.JSONDecodeError, ValueError): - print(line, file=sys.stderr, flush=True) + # Preserve original v2 formatting (indentation/progress text). + sys.stderr.write(raw_line) + sys.stderr.flush() stdout = proc.stdout.read() proc.wait() @@ -195,6 +185,22 @@ def translate( ) return results + @staticmethod + def _parse_progress_event(line: str) -> dict[str, Any]: + """Parse a JSON progress event from possibly mixed stderr output. + + Rich progress rendering may inject control text around event lines. + Try full-line JSON first, then extract the JSON object between braces. + """ + try: + return json.loads(line) + except json.JSONDecodeError: + start = line.find("{") + end = line.rfind("}") + if start == -1 or end == -1 or end <= start: + raise + return json.loads(line[start : end + 1]) + async def translate_async( self, request: TranslateRequest, @@ -213,7 +219,6 @@ async def translate_async( stdin=asyncio.subprocess.PIPE, stdout=asyncio.subprocess.PIPE, stderr=asyncio.subprocess.PIPE, - cwd=str(_SUBMODULE_DIR), env=env, ) diff --git a/pdf2zh/kernel/v2_bridge.py b/pdf2zh/kernel/v2_bridge.py index 7c624e496..4b2d22551 100644 --- a/pdf2zh/kernel/v2_bridge.py +++ b/pdf2zh/kernel/v2_bridge.py @@ -11,35 +11,39 @@ import os from typing import Any -# v1 service name → v2 CLI engine flag (lowercase) +# v1 service name → v2 CLI engine flag. +# v2 flags are translate_engine_type.lower() with NO hyphens/underscores. SERVICE_NAME_MAP: dict[str, str] = { "google": "google", "bing": "bing", "deepl": "deepl", - "deeplx": "deeplx", "ollama": "ollama", "openai": "openai", "azure": "azure", - "azureopenai": "azure", + "azureopenai": "azureopenai", "zhipu": "zhipu", "silicon": "siliconflow", "siliconflow": "siliconflow", + "siliconflowfree": "siliconflowfree", "gemini": "gemini", - "tencent": "tencent", + "tencent": "tencentmechinetranslation", "dify": "dify", "anythingllm": "anythingllm", - "argos": "argos", "grok": "grok", "groq": "groq", "deepseek": "deepseek", - "doubao": "doubao", - "openai-compatible": "openai_compatible", - "aliyun-dashscope": "aliyun_dashscope", + "openai-compatible": "openaicompatible", + "aliyun-dashscope": "aliyundashscope", "modelscope": "modelscope", + "qwen-mt": "qwenmt", + "claudecode": "claudecode", + "clitranslator": "clitranslator", } # Known engine-related env var names (without PDF2ZH_ prefix). # Used to forward relevant vars from os.environ into the subprocess. +# Names must match the pydantic field names in v2's translate_engine_model.py +# (ConfigManager reads them as PDF2ZH_{FIELD_NAME_UPPER}). _ENGINE_ENV_NAMES: set[str] = { "OPENAI_API_KEY", "OPENAI_BASE_URL", @@ -57,29 +61,32 @@ "OLLAMA_HOST", "OLLAMA_MODEL", "DEEPL_AUTH_KEY", - "DEEPLX_ENDPOINT", - "DEEPLX_AUTH_KEY", - "TENCENT_SECRET_ID", - "TENCENT_SECRET_KEY", - "DIFY_API_URL", - "DIFY_API_KEY", - "ANYTHINGLLM_API_URL", - "ANYTHINGLLM_API_KEY", + "TENCENTCLOUD_SECRET_ID", + "TENCENTCLOUD_SECRET_KEY", + "DIFY_URL", + "DIFY_APIKEY", + "ANYTHINGLLM_URL", + "ANYTHINGLLM_APIKEY", "GROK_API_KEY", "GROK_MODEL", "GROQ_API_KEY", "GROQ_MODEL", - "DOUBAO_API_KEY", - "DOUBAO_MODEL", "SILICONFLOW_API_KEY", "SILICONFLOW_MODEL", + "SILICONFLOW_BASE_URL", "OPENAI_COMPATIBLE_API_KEY", "OPENAI_COMPATIBLE_BASE_URL", "OPENAI_COMPATIBLE_MODEL", "ALIYUN_DASHSCOPE_API_KEY", "ALIYUN_DASHSCOPE_MODEL", + "ALIYUN_DASHSCOPE_BASE_URL", "MODELSCOPE_API_KEY", "MODELSCOPE_MODEL", + "QWENMT_API_KEY", + "QWENMT_MODEL", + "QWENMT_BASE_URL", + "CLAUDE_CODE_PATH", + "CLAUDE_CODE_MODEL", } @@ -120,10 +127,10 @@ def request_to_cli_args(request: Any) -> list[str]: if data.get("lang_out"): args.extend(["--lang-out", data["lang_out"]]) - # Engine flag: --google, --openai, etc. + # Engine flag: --google, --openai, --siliconflowfree, etc. engine_type = SERVICE_NAME_MAP.get(service.lower()) if engine_type: - args.append(f"--{engine_type.replace('_', '-')}") + args.append(f"--{engine_type}") if pages_v2: args.extend(["--pages", pages_v2]) diff --git a/pdf2zh/kernel/v2_worker.py b/pdf2zh/kernel/v2_worker.py index 2a1b84307..ff2f5bfd7 100644 --- a/pdf2zh/kernel/v2_worker.py +++ b/pdf2zh/kernel/v2_worker.py @@ -2,22 +2,31 @@ """Subprocess worker — runs pdf2zh_next translation in an isolated venv. Protocol: - - stdin: JSON array of CLI args (e.g. ["file.pdf", "--lang-out", "zh", "--openai"]) + - stdin: JSON array of CLI args (e.g. ["file.pdf", "--lang-out", "zh", "--siliconflowfree"]) - stdout: JSON result (last line, after all progress events) - stderr: JSON-lines progress events and log output This script is executed by PreciseKernel using the venv's Python interpreter. v2's ConfigManager handles all config parsing from sys.argv + PDF2ZH_* env vars. + +We call babeldoc directly (via create_babeldoc_config) instead of +do_translate_async_stream, because the latter now spawns a multiprocessing +subprocess internally — running that inside *this* subprocess causes +deadlocks and fd-inheritance issues on macOS/spawn. """ from __future__ import annotations import asyncio import json +import logging import os import sys import time +_GOOGLE_TRANSLATE_CONNECT_TIMEOUT = 10 +_GOOGLE_TRANSLATE_READ_TIMEOUT = 30 + def _redirect_stdout_to_stderr(): """Redirect stdout to stderr so library log output doesn't pollute JSON results. @@ -32,14 +41,73 @@ def _redirect_stdout_to_stderr(): # Redirect before any pdf2zh_next imports (they configure logging on import) _real_stdout = _redirect_stdout_to_stderr() +def _disable_translator_health_check(): + """Skip the blocking translate('Hello') health check in get_translator(). + + The submodule's _create_translator_instance calls translator.translate("Hello") + as a health check. For SiliconFlowFree this hits the real translation proxy + endpoint, which can hang for minutes (60s timeout × 3 retries + backoff). + The translator's __init__ already validates the proxy via /check and /config, + so this redundant health check just adds a failure mode with terrible UX. + """ + import pdf2zh_next.translator.utils as _tu + from pdf2zh_next.translator.base_translator import BaseTranslator + + _orig = _tu._create_translator_instance + + def _patched(settings, translator_config, rate_limiter, enforce_glossary_support=True): + _orig_translate = BaseTranslator.translate + BaseTranslator.translate = lambda self, *a, **kw: "ok" + try: + return _orig(settings, translator_config, rate_limiter, enforce_glossary_support) + finally: + BaseTranslator.translate = _orig_translate + + _tu._create_translator_instance = _patched + + +def _patch_google_request_timeout(): + """Bound precise Google translator requests so stalls fail instead of hanging. + + pdf2zh_next's Google translator uses requests.Session.get() without a timeout. + When Google stops responding mid-request, the worker can sit forever inside + Translate Paragraphs and the API/WebUI never see a terminal state. + """ + import requests.sessions + + original_get = requests.sessions.Session.get + if getattr(original_get, "__pdf2zh_google_timeout_patch__", False): + return + + def _patched_get(self, url, *args, **kwargs): + if ( + "timeout" not in kwargs + and isinstance(url, str) + and "translate.google.com/m" in url + ): + kwargs["timeout"] = ( + _GOOGLE_TRANSLATE_CONNECT_TIMEOUT, + _GOOGLE_TRANSLATE_READ_TIMEOUT, + ) + return original_get(self, url, *args, **kwargs) + + _patched_get.__pdf2zh_google_timeout_patch__ = True + requests.sessions.Session.get = _patched_get + async def run_translation(cli_args: list[str]) -> dict: - """Execute translation using v2's own config parsing.""" + """Execute translation using v2's config parsing and babeldoc directly.""" # Patch sys.argv so ConfigManager.initialize_config() picks up our args sys.argv = ["pdf2zh_next"] + cli_args + from pathlib import Path + from pdf2zh_next.config.main import ConfigManager - from pdf2zh_next.high_level import do_translate_async_stream + from pdf2zh_next.high_level import create_babeldoc_config + + import babeldoc.assets.assets + from babeldoc.format.pdf.high_level import do_translate, get_translation_stage + from babeldoc.progress_monitor import ProgressMonitor settings = ConfigManager().initialize_config() @@ -48,56 +116,74 @@ async def run_translation(cli_args: list[str]) -> dict: settings.basic.input_files = set() results = [] + errors: list[dict] = [] start_time = time.time() + # Skip the blocking health-check translate("Hello") for SiliconFlowFree. + # The translator __init__ already validates the proxy via /check and /config; + # the redundant full-translation health check can hang for minutes when the + # upstream API is slow or down. + if "--siliconflowfree" in cli_args: + _disable_translator_health_check() + # Automatic term extraction is the first heavy LLM phase in BabelDOC and + # commonly stalls around 30% with SiliconFlowFree due to long proxy timeouts. + settings.translation.no_auto_extract_glossary = True + elif "--google" in cli_args: + _patch_google_request_timeout() + + # Ensure babeldoc assets are available + babeldoc.assets.assets.warmup() + for file_path in input_files: try: - async for event in do_translate_async_stream(settings, file_path): - event_type = event.get("type", "") + config = create_babeldoc_config(settings, Path(file_path)) - if event_type in ("progress_start", "progress_update", "progress_end"): - # Forward babeldoc progress events with their actual fields - progress_event = { - "type": event_type, - "stage": event.get("stage", ""), - "stage_progress": event.get("stage_progress", 0.0), - "stage_current": event.get("stage_current", 0), - "stage_total": event.get("stage_total", 0), - "overall_progress": event.get("overall_progress", 0.0), - "part_index": event.get("part_index", 0), - "total_parts": event.get("total_parts", 0), - } - print(json.dumps(progress_event), file=sys.stderr, flush=True) - - elif event_type == "finish": - tr = event.get("translate_result") - result = { - "mono_pdf": ( - str(tr.mono_pdf_path) if tr and tr.mono_pdf_path else None - ), - "dual_pdf": ( - str(tr.dual_pdf_path) if tr and tr.dual_pdf_path else None - ), - "time_cost": tr.total_seconds if tr else 0.0, - } - results.append(result) - - elif event_type == "error": - error_event = { - "type": "error", - "message": event.get("error", "Unknown error"), - } - print(json.dumps(error_event), file=sys.stderr, flush=True) + def progress_change_callback(**event): + event_type = event.get("type", "") + if event_type not in ( + "progress_start", + "progress_update", + "progress_end", + ): + return + progress_event = { + "type": event_type, + "stage": event.get("stage", ""), + "stage_progress": event.get("stage_progress", 0.0), + "stage_current": event.get("stage_current", 0), + "stage_total": event.get("stage_total", 0), + "overall_progress": event.get("overall_progress", 0.0), + "part_index": event.get("part_index", 0), + "total_parts": event.get("total_parts", 0), + } + print(json.dumps(progress_event), file=sys.stderr, flush=True) + + with ProgressMonitor( + get_translation_stage(config), + progress_change_callback=progress_change_callback, + report_interval=config.report_interval, + ) as pm: + tr = do_translate(pm, config) + + result = { + "mono_pdf": str(tr.mono_pdf_path) if tr and tr.mono_pdf_path else None, + "dual_pdf": str(tr.dual_pdf_path) if tr and tr.dual_pdf_path else None, + "time_cost": tr.total_seconds if tr else 0.0, + } + results.append(result) except Exception as e: error_event = {"type": "error", "message": str(e)} + errors.append(error_event) print(json.dumps(error_event), file=sys.stderr, flush=True) elapsed = time.time() - start_time - return {"results": results, "time_cost": elapsed} + return {"results": results, "time_cost": elapsed, "errors": errors} def main(): + logging.basicConfig(level=logging.INFO) + raw = sys.stdin.read() try: cli_args = json.loads(raw) @@ -115,6 +201,11 @@ def main(): _real_stdout.write(json.dumps(result) + "\n") _real_stdout.flush() + # Treat as failure only when nothing succeeded. + # babeldoc can emit recoverable error events while still producing outputs. + if result.get("errors") and not result.get("results"): + sys.exit(2) + if __name__ == "__main__": main() diff --git a/pdf2zh/pdf2zh.py b/pdf2zh/pdf2zh.py index c662d4a7f..a217c50d9 100644 --- a/pdf2zh/pdf2zh.py +++ b/pdf2zh/pdf2zh.py @@ -99,16 +99,24 @@ def create_parser() -> argparse.ArgumentParser: default=4, help="The number of threads to execute translation.", ) + parse_params.add_argument( + "--gui", + nargs="?", + const="vue", + default=None, + choices=["vue", "legacy"], + help="Launch GUI. Default: Vue. Use '--gui legacy' for Gradio.", + ) parse_params.add_argument( "--interactive", "-i", action="store_true", - help="Interact with GUI.", + help="Launch Vue GUI (alias for --gui).", ) parse_params.add_argument( "--share", action="store_true", - help="Enable Gradio Share", + help="Enable Gradio Share (only with --gui legacy).", ) parse_params.add_argument( "--flask", @@ -124,7 +132,7 @@ def create_parser() -> argparse.ArgumentParser: "--authorized", type=str, nargs="+", - help="user name and password.", + help="user name and password (only with --gui legacy).", ) parse_params.add_argument( "--prompt", @@ -208,6 +216,30 @@ def create_parser() -> argparse.ArgumentParser: "--sse", action="store_true", help="Launch pdf2zh MCP server in SSE mode" ) + parse_params.add_argument( + "--api", + action="store_true", + help="Start lightweight REST API server (no Redis/Celery required).", + ) + parse_params.add_argument( + "--api-host", + type=str, + default="127.0.0.1", + help="API server bind address (default: 127.0.0.1).", + ) + parse_params.add_argument( + "--api-port", + type=int, + default=8787, + help="API server port (default: 8787).", + ) + parse_params.add_argument( + "--api-token", + type=str, + default=None, + help="API bearer token. If not set, a random token is generated at startup.", + ) + return parser @@ -286,7 +318,21 @@ def main(args: Optional[List[str]] = None) -> int: else: ModelInstance.value = OnnxModel.load_available() - if parsed_args.interactive: + # Resolve GUI mode: --gui [vue|legacy] or -i (alias for --gui vue) + gui_mode = parsed_args.gui + if parsed_args.interactive and gui_mode is None: + gui_mode = "vue" + + if gui_mode == "vue": + from pdf2zh.gui_vue import launch_vue_gui + + launch_vue_gui( + host=parsed_args.api_host, + port=parsed_args.api_port, + model=ModelInstance.value, + ) + return 0 + elif gui_mode == "legacy": from pdf2zh.gui import setup_gui if parsed_args.serverport: @@ -331,6 +377,18 @@ def main(args: Optional[List[str]] = None) -> int: mcp.run() return 0 + if parsed_args.api: + from pdf2zh.api import run_api_server + + run_api_server( + host=parsed_args.api_host, + port=parsed_args.api_port, + token=parsed_args.api_token, + model=ModelInstance.value, + debug=parsed_args.debug, + ) + return 0 + print(parsed_args) if parsed_args.babeldoc: diff --git a/pdf2zh/services.py b/pdf2zh/services.py new file mode 100644 index 000000000..ec0f8854c --- /dev/null +++ b/pdf2zh/services.py @@ -0,0 +1,47 @@ +"""Unified service registry — single source of truth for available translators.""" + +from __future__ import annotations + +from dataclasses import dataclass + + +@dataclass(frozen=True) +class ServiceInfo: + display: str + name: str + custom_prompt: bool + + +# Ordered list of all supported translation services. +# display: human-readable label for UIs +# name: internal identifier (must match translator.name in translator.py) +# custom_prompt: whether the service supports user-supplied prompt templates +SERVICES: list[ServiceInfo] = [ + ServiceInfo("Google", "google", False), + ServiceInfo("Bing", "bing", False), + ServiceInfo("DeepL", "deepl", False), + ServiceInfo("DeepLX", "deeplx", False), + ServiceInfo("Ollama", "ollama", True), + ServiceInfo("Xinference", "xinference", True), + ServiceInfo("OpenAI", "openai", True), + ServiceInfo("AzureOpenAI", "azure-openai", True), + ServiceInfo("Zhipu", "zhipu", True), + ServiceInfo("ModelScope", "modelscope", True), + ServiceInfo("SiliconFlow", "silicon", True), + ServiceInfo("SiliconFlow Free", "siliconflowfree", True), + ServiceInfo("Gemini", "gemini", True), + ServiceInfo("Azure", "azure", False), + ServiceInfo("Tencent", "tencent", False), + ServiceInfo("Dify", "dify", False), + ServiceInfo("AnythingLLM", "anythingllm", True), + ServiceInfo("Argos Translate", "argos", False), + ServiceInfo("Grok", "grok", True), + ServiceInfo("Groq", "groq", True), + ServiceInfo("DeepSeek", "deepseek", True), + ServiceInfo("MiniMax", "minimax", True), + ServiceInfo("OpenAI-compatible", "openailiked", True), + ServiceInfo("Ali Qwen-Translation", "qwen-mt", True), + ServiceInfo("302.AI", "302ai", True), +] + +SERVICE_BY_NAME: dict[str, ServiceInfo] = {s.name: s for s in SERVICES} diff --git a/pdf2zh/translator.py b/pdf2zh/translator.py index 0b09ef273..33f88a12a 100644 --- a/pdf2zh/translator.py +++ b/pdf2zh/translator.py @@ -182,14 +182,17 @@ def do_translate(self, text): params={"tl": self.lang_out, "sl": self.lang_in, "q": text}, headers=self.headers, ) + if response.status_code == 400: + return remove_control_characters("IRREPARABLE TRANSLATION ERROR") + response.raise_for_status() re_result = re.findall( r'(?s)class="(?:t0|result-container)">(.*?)<', response.text ) - if response.status_code == 400: - result = "IRREPARABLE TRANSLATION ERROR" - else: - response.raise_for_status() - result = html.unescape(re_result[0]) + if not re_result: + raise ValueError( + f"Google Translate returned unexpected response (status {response.status_code})" + ) + result = html.unescape(re_result[0]) return remove_control_characters(result) @@ -442,9 +445,16 @@ def __init__( "stop": stop_tokens, "max_tokens": max_tokens if max_tokens > 0 else None, } + resolved_api_key = api_key or self.envs.get("OPENAI_API_KEY") + if not resolved_api_key: + raise ValueError( + f"API key is required for {self.name} translator. " + f"Please set the appropriate API key in the configuration or environment variables." + ) self.client = openai.OpenAI( - base_url=base_url or self.envs["OPENAI_BASE_URL"], - api_key=api_key or self.envs["OPENAI_API_KEY"], + base_url=base_url + or self.envs.get("OPENAI_BASE_URL", "https://api.openai.com/v1"), + api_key=resolved_api_key, ) self.prompttext = prompt self.add_cache_impact_parameters("temperature", self.options["temperature"]) @@ -658,6 +668,43 @@ def __init__( self.add_cache_impact_parameters("prompt", self.prompt("", self.prompttext)) +class SiliconFlowFreeTranslator(BaseTranslator): + # Free translation proxy provided by pdf2zh-next project + name = "siliconflowfree" + envs = {} + CustomPrompt = True + + ENDPOINTS = [ + "https://api1.pdf2zh-next.com/chatproxy", + "https://api2.pdf2zh-next.com/chatproxy", + ] + + def __init__( + self, lang_in, lang_out, model, envs=None, prompt=None, ignore_cache=False + ): + super().__init__(lang_in, lang_out, model, ignore_cache=ignore_cache) + self.endpoint = self.ENDPOINTS[0] + self.prompttext = prompt + + def do_translate(self, text) -> str: + prompt_text = ( + f"You are a professional,authentic machine translation engine.\n\n" + f";; Treat next line as plain text input and translate it into " + f"{self.lang_out}, output translation ONLY. If translation is " + f"unnecessary (e.g. proper nouns, codes, {{{{1}}}}, etc.), return " + f"the original text. NO explanations. NO notes. Input:\n\n{text}" + ) + response = requests.post( + self.endpoint, + json={"text": prompt_text}, + timeout=100, + ) + if response.status_code == 429: + raise Exception("Rate limited by SiliconFlow Free proxy") + response.raise_for_status() + return response.json()["content"].strip() + + class X302AITranslator(OpenAITranslator): # https://doc.302.ai/ name = "302ai" diff --git a/pdf2zh/web b/pdf2zh/web new file mode 160000 index 000000000..1ec8106ec --- /dev/null +++ b/pdf2zh/web @@ -0,0 +1 @@ +Subproject commit 1ec8106ec2874fbfe1650d0164ee08d539303151 diff --git a/pyproject.toml b/pyproject.toml index 90d9f41b5..d74883daf 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -44,6 +44,9 @@ backend = [ "celery", "redis" ] +api = [ + "flask", +] argostranslate = [ "argostranslate" ] @@ -56,7 +59,8 @@ cuda = [ dml = [ "onnxruntime-directml", ] -precise = [] # marker — run `pdf2zh-setup-precise` after install to provision the isolated venv +precise = [] # marker — run `pdf2zh-setup-precise` to provision isolated runtime venv +vue = ["flask"] # marker — submodule provides pre-built Vue static files [dependency-groups] dev = [ @@ -75,6 +79,13 @@ Homepage = "https://github.com/Byaidu/PDFMathTranslate" requires = ["hatchling"] build-backend = "hatchling.build" +[tool.hatch.build.hooks.custom] +path = "script/hatch_build.py" + +[tool.hatch.build] +# Include Vue static files (built by the custom hook) in the wheel +artifacts = ["pdf2zh/static/**"] + [project.scripts] pdf2zh = "pdf2zh.pdf2zh:main" pdf2zh-setup-precise = "pdf2zh.kernel.precise:setup_precise_cli" diff --git a/script/hatch_build.py b/script/hatch_build.py new file mode 100644 index 000000000..38a0ee993 --- /dev/null +++ b/script/hatch_build.py @@ -0,0 +1,64 @@ +"""Hatchling build hook — builds the Vue submodule and copies dist to pdf2zh/static/.""" + +import os +import shutil +import subprocess + +from hatchling.builders.hooks.plugin.interface import BuildHookInterface + + +class VueBuildHook(BuildHookInterface): + PLUGIN_NAME = "vue-build" + + def initialize(self, version, build_data): + root = self.root + web_dir = os.path.join(root, "pdf2zh", "web") + dist_dir = os.path.join(web_dir, "dist") + static_dir = os.path.join(root, "pdf2zh", "static") + + # Only build if the submodule has a package.json + if not os.path.isfile(os.path.join(web_dir, "package.json")): + return + + # Check if rebuild is needed: no static dir, or submodule is newer + needs_build = not os.path.isdir(static_dir) + if not needs_build: + # Compare mtime of submodule src vs static index.html + static_index = os.path.join(static_dir, "index.html") + if os.path.isfile(static_index): + src_dir = os.path.join(web_dir, "src") + if os.path.isdir(src_dir): + static_mtime = os.path.getmtime(static_index) + for dirpath, _, filenames in os.walk(src_dir): + for f in filenames: + if ( + os.path.getmtime(os.path.join(dirpath, f)) + > static_mtime + ): + needs_build = True + break + if needs_build: + break + else: + needs_build = True + + if not needs_build: + return + + print("Building Vue frontend from submodule...") + subprocess.check_call(["bun", "install"], cwd=web_dir) + subprocess.check_call(["bun", "run", "build"], cwd=web_dir) + + if not os.path.isdir(dist_dir): + raise RuntimeError(f"Vue build did not produce {dist_dir}") + + if os.path.exists(static_dir): + shutil.rmtree(static_dir) + shutil.copytree(dist_dir, static_dir) + print(f"Copied Vue dist -> {static_dir}") + + # Include static files in the wheel + build_data["shared_data"] = {} + build_data["force_include"] = { + static_dir: "pdf2zh/static", + } diff --git a/test/test_api.py b/test/test_api.py new file mode 100644 index 000000000..73b1f5ede --- /dev/null +++ b/test/test_api.py @@ -0,0 +1,363 @@ +"""Tests for the lightweight REST API server.""" + +import io +import sys +import threading +import time +import unittest +from pathlib import Path +from unittest.mock import patch, MagicMock + +PROJECT_ROOT = Path(__file__).resolve().parent.parent +if str(PROJECT_ROOT) not in sys.path: + sys.path.insert(0, str(PROJECT_ROOT)) + +TEST_TOKEN = "test-token-abc123" +FAKE_PDF = b"%PDF-1.4 fake content" +FAKE_MONO = b"%PDF-1.4 mono result" +FAKE_DUAL = b"%PDF-1.4 dual result" + + +def _make_client(token=TEST_TOKEN): + """Create a Flask test client with mocked model.""" + from pdf2zh.api import create_api_app + + model = MagicMock() + app, jobs = create_api_app(token=token, model=model) + app.config["TESTING"] = True + return app.test_client(), jobs, model + + +def _auth_header(token=TEST_TOKEN): + return {"Authorization": f"Bearer {token}"} + + +def _upload(client, data=None, file_content=FAKE_PDF, filename="test.pdf"): + """Helper to POST a file to /v1/translate.""" + payload = {"file": (io.BytesIO(file_content), filename)} + form_data = {} + if data is not None: + import json + + form_data["data"] = json.dumps(data) + return client.post( + "/v1/translate", + data={**payload, **form_data}, + headers=_auth_header(), + content_type="multipart/form-data", + ) + + +class TestTokenAuth(unittest.TestCase): + def setUp(self): + self.client, self.jobs, _ = _make_client() + + def test_missing_auth_header_returns_401(self): + resp = self.client.get("/v1/status") + self.assertEqual(resp.status_code, 401) + self.assertEqual(resp.get_json()["error"], "unauthorized") + + def test_invalid_token_returns_401(self): + resp = self.client.get( + "/v1/status", headers={"Authorization": "Bearer wrong-token"} + ) + self.assertEqual(resp.status_code, 401) + + def test_wrong_scheme_returns_401(self): + resp = self.client.get( + "/v1/status", headers={"Authorization": f"Basic {TEST_TOKEN}"} + ) + self.assertEqual(resp.status_code, 401) + + def test_valid_token_passes(self): + resp = self.client.get("/v1/status", headers=_auth_header()) + self.assertEqual(resp.status_code, 200) + + +class TestStatusEndpoint(unittest.TestCase): + def setUp(self): + self.client, self.jobs, _ = _make_client() + + def test_health_check_returns_ok(self): + resp = self.client.get("/v1/status", headers=_auth_header()) + data = resp.get_json() + self.assertEqual(data["status"], "ok") + self.assertEqual(data["active_jobs"], 0) + self.assertEqual(data["total_jobs"], 0) + # CPU info should always be present + self.assertIn("cpu", data) + self.assertIn("cores", data["cpu"]) + self.assertIn("arch", data["cpu"]) + self.assertIsInstance(data["cpu"]["cores"], int) + # GPU info should be a list (possibly empty) + self.assertIn("gpu", data) + self.assertIsInstance(data["gpu"], list) + + +class TestTranslateEndpoint(unittest.TestCase): + def setUp(self): + self.client, self.jobs, _ = _make_client() + + @patch("pdf2zh.api._run_translation") + def test_upload_no_file_returns_400(self, mock_run): + resp = self.client.post( + "/v1/translate", + headers=_auth_header(), + content_type="multipart/form-data", + ) + self.assertEqual(resp.status_code, 400) + self.assertIn("no file", resp.get_json()["error"]) + + @patch("pdf2zh.api._run_translation") + def test_upload_empty_file_returns_400(self, mock_run): + resp = _upload(self.client, file_content=b"") + self.assertEqual(resp.status_code, 400) + self.assertIn("empty", resp.get_json()["error"]) + + @patch("pdf2zh.api.threading.Thread") + @patch("pdf2zh.api._run_translation") + def test_upload_valid_file_returns_202(self, mock_run, mock_thread_cls): + mock_thread = MagicMock() + mock_thread_cls.return_value = mock_thread + resp = _upload(self.client) + self.assertEqual(resp.status_code, 202) + data = resp.get_json() + self.assertIn("id", data) + self.assertTrue(len(data["id"]) > 0) + mock_thread.start.assert_called_once() + + @patch("pdf2zh.api.threading.Thread") + @patch("pdf2zh.api._run_translation") + def test_upload_invalid_json_data_returns_400(self, mock_run, mock_thread_cls): + payload = {"file": (io.BytesIO(FAKE_PDF), "test.pdf"), "data": "not-json{"} + resp = self.client.post( + "/v1/translate", + data=payload, + headers=_auth_header(), + content_type="multipart/form-data", + ) + self.assertEqual(resp.status_code, 400) + self.assertIn("invalid JSON", resp.get_json()["error"]) + + @patch("pdf2zh.api.threading.Thread") + @patch("pdf2zh.api._run_translation") + def test_upload_with_translation_params(self, mock_run, mock_thread_cls): + mock_thread = MagicMock() + mock_thread_cls.return_value = mock_thread + params = {"lang_in": "en", "lang_out": "zh", "service": "google"} + resp = _upload(self.client, data=params) + self.assertEqual(resp.status_code, 202) + job_id = resp.get_json()["id"] + job = self.jobs.get(job_id) + self.assertEqual(job.params["lang_in"], "en") + self.assertEqual(job.params["lang_out"], "zh") + self.assertEqual(job.params["service"], "google") + + +class TestJobLifecycle(unittest.TestCase): + def setUp(self): + self.client, self.jobs, self.model = _make_client() + + def test_get_nonexistent_job_returns_404(self): + resp = self.client.get("/v1/translate/nonexistent", headers=_auth_header()) + self.assertEqual(resp.status_code, 404) + + @patch("pdf2zh.high_level.translate_stream", return_value=(FAKE_MONO, FAKE_DUAL)) + def test_job_completes_and_download_works(self, mock_translate): + from pdf2zh.api import _run_translation, JobStatus + + # Create job directly via manager + job = self.jobs.create("test.pdf", {"stream": FAKE_PDF}) + _run_translation(job, self.model) + + self.assertEqual(job.status, JobStatus.COMPLETED) + self.assertEqual(job.progress_current, job.progress_total) + self.assertGreater(job.progress_total, 0) + + # Check status + resp = self.client.get(f"/v1/translate/{job.id}", headers=_auth_header()) + self.assertEqual(resp.status_code, 200) + data = resp.get_json() + self.assertEqual(data["status"], "completed") + self.assertTrue(data["has_result"]) + self.assertEqual(data["progress"]["current"], data["progress"]["total"]) + self.assertGreater(data["progress"]["total"], 0) + + # Download mono + resp = self.client.get( + f"/v1/translate/{job.id}/download/mono", headers=_auth_header() + ) + self.assertEqual(resp.status_code, 200) + self.assertEqual(resp.data, FAKE_MONO) + + # Download dual + resp = self.client.get( + f"/v1/translate/{job.id}/download/dual", headers=_auth_header() + ) + self.assertEqual(resp.status_code, 200) + self.assertEqual(resp.data, FAKE_DUAL) + + def test_download_invalid_format_returns_400(self): + from pdf2zh.api import JobStatus + + job = self.jobs.create("test.pdf", {}) + job.status = JobStatus.COMPLETED + job.result_mono = FAKE_MONO + resp = self.client.get( + f"/v1/translate/{job.id}/download/invalid", headers=_auth_header() + ) + self.assertEqual(resp.status_code, 400) + + def test_download_incomplete_job_returns_400(self): + job = self.jobs.create("test.pdf", {}) + resp = self.client.get( + f"/v1/translate/{job.id}/download/mono", headers=_auth_header() + ) + self.assertEqual(resp.status_code, 400) + self.assertIn("not completed", resp.get_json()["error"]) + + def test_download_nonexistent_job_returns_404(self): + resp = self.client.get( + "/v1/translate/nonexistent/download/mono", headers=_auth_header() + ) + self.assertEqual(resp.status_code, 404) + + @patch("pdf2zh.kernel.registry.KernelRegistry.get") + def test_precise_job_status_exposes_stage_progress(self, mock_kernel_get): + from pdf2zh.api import _run_translation, JobStatus + from pdf2zh.kernel.protocol import TranslateResult + + mock_kernel = MagicMock() + + def _fake_translate(_req, callback=None, cancellation_event=None): + if callback: + callback( + { + "event": "progress_update", + "stage": "layout_analysis", + "stage_progress": 0.25, + "stage_current": 1, + "stage_total": 4, + "overall_progress": 0.5, + } + ) + return [TranslateResult(mono_pdf=None, dual_pdf=None)] + + mock_kernel.translate.side_effect = _fake_translate + mock_kernel_get.return_value = mock_kernel + + job = self.jobs.create( + "test.pdf", + { + "stream": FAKE_PDF, + "backend": "precise", + "service": "siliconflowfree", + "lang_in": "en", + "lang_out": "zh", + "thread": 1, + }, + ) + _run_translation(job, self.model) + + self.assertEqual(job.status, JobStatus.COMPLETED) + resp = self.client.get(f"/v1/translate/{job.id}", headers=_auth_header()) + self.assertEqual(resp.status_code, 200) + data = resp.get_json() + self.assertIn("stage", data) + self.assertEqual(data["stage"]["name"], "layout_analysis") + self.assertEqual(data["stage"]["event"], "progress_update") + self.assertEqual(data["stage"]["current"], 1) + self.assertEqual(data["stage"]["total"], 4) + self.assertEqual(data["stage"]["progress"], 25.0) + self.assertEqual(data["progress"]["current"], 100) + self.assertEqual(data["progress"]["total"], 100) + + def test_dual_only_result_is_reported_as_available(self): + from pdf2zh.api import JobStatus + + job = self.jobs.create("test.pdf", {}) + job.status = JobStatus.COMPLETED + job.result_dual = FAKE_DUAL + + resp = self.client.get(f"/v1/translate/{job.id}", headers=_auth_header()) + self.assertEqual(resp.status_code, 200) + data = resp.get_json() + self.assertTrue(data["has_result"]) + self.assertFalse(data["has_mono_result"]) + self.assertTrue(data["has_dual_result"]) + + resp = self.client.get( + f"/v1/translate/{job.id}/download/dual", headers=_auth_header() + ) + self.assertEqual(resp.status_code, 200) + self.assertEqual(resp.data, FAKE_DUAL) + + @patch("pdf2zh.api.threading.Thread") + @patch("pdf2zh.api._run_translation") + def test_list_jobs_returns_all(self, mock_run, mock_thread_cls): + mock_thread_cls.return_value = MagicMock() + # Create two jobs + _upload(self.client) + _upload(self.client) + resp = self.client.get("/v1/list", headers=_auth_header()) + self.assertEqual(resp.status_code, 200) + data = resp.get_json() + self.assertEqual(len(data["jobs"]), 2) + + +class TestJobCancellation(unittest.TestCase): + def setUp(self): + self.client, self.jobs, _ = _make_client() + + def test_stop_nonexistent_job_returns_404(self): + resp = self.client.post( + "/v1/translate/nonexistent/stop", headers=_auth_header() + ) + self.assertEqual(resp.status_code, 404) + + def test_stop_completed_job_returns_400(self): + from pdf2zh.api import JobStatus + + job = self.jobs.create("test.pdf", {}) + job.status = JobStatus.COMPLETED + resp = self.client.post(f"/v1/translate/{job.id}/stop", headers=_auth_header()) + self.assertEqual(resp.status_code, 400) + self.assertIn("not running", resp.get_json()["error"]) + + def test_stop_running_job(self): + from pdf2zh.api import JobStatus + + job = self.jobs.create("test.pdf", {}) + job.status = JobStatus.RUNNING + resp = self.client.post(f"/v1/translate/{job.id}/stop", headers=_auth_header()) + self.assertEqual(resp.status_code, 200) + self.assertIn("cancellation requested", resp.get_json()["message"]) + self.assertTrue(job.cancel_event.is_set()) + + +class TestReboot(unittest.TestCase): + def setUp(self): + self.client, self.jobs, _ = _make_client() + + def test_reboot_clears_all_jobs(self): + from pdf2zh.api import JobStatus + + self.jobs.create("a.pdf", {}) + self.jobs.create("b.pdf", {}) + self.assertEqual(self.jobs.total_count(), 2) + + resp = self.client.post("/v1/reboot", headers=_auth_header()) + self.assertEqual(resp.status_code, 200) + self.assertEqual(self.jobs.total_count(), 0) + + def test_reboot_cancels_running_jobs(self): + from pdf2zh.api import JobStatus + + job = self.jobs.create("test.pdf", {}) + job.status = JobStatus.RUNNING + self.client.post("/v1/reboot", headers=_auth_header()) + self.assertTrue(job.cancel_event.is_set()) + + +if __name__ == "__main__": + unittest.main()