127 lines
4.8 KiB
Python
127 lines
4.8 KiB
Python
#!/usr/bin/env python3
|
|
"""
|
|
pipeline-translator.py
|
|
───────────────────────────────────────────────────────────────
|
|
Translation OCR pipeline (Batch Processing Only)
|
|
|
|
Usage:
|
|
python pipeline-translator.py /path/to/chapter/folder
|
|
"""
|
|
|
|
import os
|
|
import sys
|
|
import argparse
|
|
import importlib.util
|
|
from pathlib import Path
|
|
|
|
# ─────────────────────────────────────────────
|
|
# DYNAMIC MODULE LOADER
|
|
# ─────────────────────────────────────────────
|
|
def load_module(name, filepath):
|
|
spec = importlib.util.spec_from_file_location(name, filepath)
|
|
if spec is None or spec.loader is None:
|
|
raise FileNotFoundError(f"Cannot load spec for {filepath}")
|
|
module = importlib.util.module_from_spec(spec)
|
|
spec.loader.exec_module(module)
|
|
return module
|
|
|
|
# ─────────────────────────────────────────────
|
|
# HELPERS
|
|
# ─────────────────────────────────────────────
|
|
def sorted_pages(chapter_dir):
|
|
exts = {".jpg", ".jpeg", ".png", ".webp"}
|
|
pages = [
|
|
p for p in Path(chapter_dir).iterdir()
|
|
if p.is_file() and p.suffix.lower() in exts
|
|
]
|
|
return sorted(pages, key=lambda p: p.stem)
|
|
|
|
def make_page_workdir(chapter_dir, page_stem):
|
|
workdir = Path(chapter_dir) / "translated" / page_stem
|
|
workdir.mkdir(parents=True, exist_ok=True)
|
|
return workdir
|
|
|
|
# ─────────────────────────────────────────────
|
|
# PER-PAGE PIPELINE
|
|
# ─────────────────────────────────────────────
|
|
def process_page(page_path, workdir, translator_module):
|
|
print(f"\n{'─' * 70}")
|
|
print(f"PAGE: {page_path.name}")
|
|
print(f"{'─' * 70}")
|
|
|
|
orig_dir = os.getcwd()
|
|
try:
|
|
# Isolate execution to the specific page's folder
|
|
os.chdir(workdir)
|
|
|
|
print(" ⏳ Extracting text and translating...")
|
|
|
|
# 1) Translate using ONLY the required path arguments.
|
|
# This forces the function to use its own internal default variables
|
|
# (like source_lang, target_lang, confidence_threshold) directly from manga-translator.py
|
|
translator_module.translate_manga_text(
|
|
image_path=str(page_path.resolve()),
|
|
export_to_file="output.txt",
|
|
export_bubbles_to="bubbles.json"
|
|
)
|
|
print(" ✅ Translation and OCR data saved successfully")
|
|
|
|
return True
|
|
|
|
except Exception as e:
|
|
print(f" ❌ Failed: {e}")
|
|
return False
|
|
|
|
finally:
|
|
os.chdir(orig_dir)
|
|
|
|
# ─────────────────────────────────────────────
|
|
# MAIN
|
|
# ─────────────────────────────────────────────
|
|
def main():
|
|
parser = argparse.ArgumentParser(description="Manga Translation OCR Batch Pipeline")
|
|
parser.add_argument("chapter_dir", help="Path to the folder containing manga pages")
|
|
args = parser.parse_args()
|
|
|
|
chapter_dir = Path(args.chapter_dir).resolve()
|
|
|
|
print("Loading translator module...")
|
|
script_dir = Path(__file__).parent
|
|
|
|
try:
|
|
translator = load_module("manga_translator", str(script_dir / "manga-translator.py"))
|
|
except Exception as e:
|
|
print(f"❌ Could not load manga-translator.py: {e}")
|
|
sys.exit(1)
|
|
|
|
pages = sorted_pages(chapter_dir)
|
|
if not pages:
|
|
print(f"❌ No images found in: {chapter_dir}")
|
|
sys.exit(1)
|
|
|
|
print(f"\n📖 Chapter : {chapter_dir.name}")
|
|
print(f" Pages : {len(pages)}")
|
|
print(" Note : Using translation settings directly from manga-translator.py\n")
|
|
|
|
succeeded, failed = [], []
|
|
|
|
for i, page_path in enumerate(pages, start=1):
|
|
print(f"[{i}/{len(pages)}] Processing...")
|
|
workdir = make_page_workdir(chapter_dir, page_path.stem)
|
|
|
|
if process_page(page_path, workdir, translator):
|
|
succeeded.append(page_path.name)
|
|
else:
|
|
failed.append(page_path.name)
|
|
|
|
print(f"\n{'═' * 70}")
|
|
print("PIPELINE COMPLETE")
|
|
print(f"✅ {len(succeeded)} page(s) succeeded")
|
|
if failed:
|
|
print(f"❌ {len(failed)} page(s) failed:")
|
|
for f in failed:
|
|
print(f" • {f}")
|
|
print(f"{'═' * 70}\n")
|
|
|
|
if __name__ == "__main__":
|
|
main() |