Files
manga-translator/pipeline.py
Guillem Hernandez Sola 4fb553e940 Pipeline
2026-04-12 16:35:26 +02:00

223 lines
8.3 KiB
Python

#!/usr/bin/env python3
"""
pipeline.py
───────────────────────────────────────────────────────────────
Translation-only pipeline for Dandadan_059_2022_Digital
Flow per page:
1. Run translate_manga_text() → output.txt + bubbles.json
2. Copy original image to workdir for reference
Folder structure produced:
Dandadan_059_2022_Digital_1r0n/
└── translated/
├── 00/
│ ├── output.txt ← translations to review
│ ├── bubbles.json ← bubble boxes
│ └── debug_clusters.png ← cluster debug (if DEBUG=True)
├── 01/
│ └── ...
└── ...
Dandadan_059_translated.cbz ← original pages + translations
zipped for reference
"""
import os
import sys
import shutil
import zipfile
import importlib.util
from pathlib import Path
# ─────────────────────────────────────────────
# CONFIG — edit these as needed
# ─────────────────────────────────────────────
CHAPTER_DIR = "/Users/guillemhernandezsola/Downloads/Spy_x_Family_076_2023_Digital_1r0n"
OUTPUT_CBZ = "/Users/guillemhernandezsola/Downloads/Spy_x_Family_076_2023_Digital_1r0n_translated.cbz"
SOURCE_LANG = "en"
TARGET_LANG = "ca"
# manga-translator.py settings
CONFIDENCE_THRESHOLD = 0.10
MIN_TEXT_LENGTH = 2
CLUSTER_EPS = "auto"
PROXIMITY_PX = 80
FILTER_SFX = True
QUALITY_THRESHOLD = 0.5
UPSCALE_FACTOR = 2.5
BBOX_PADDING = 5
DEBUG = True
# ─────────────────────────────────────────────
# DYNAMIC MODULE LOADER
# ─────────────────────────────────────────────
def load_module(name, filepath):
spec = importlib.util.spec_from_file_location(name, filepath)
module = importlib.util.module_from_spec(spec)
spec.loader.exec_module(module)
return module
# ─────────────────────────────────────────────
# HELPERS
# ─────────────────────────────────────────────
def sorted_pages(chapter_dir):
exts = {".jpg", ".jpeg", ".png", ".webp"}
pages = [
p for p in Path(chapter_dir).iterdir()
if p.suffix.lower() in exts
]
return sorted(pages, key=lambda p: p.stem)
def make_page_workdir(chapter_dir, page_stem):
workdir = Path(chapter_dir) / "translated" / page_stem
workdir.mkdir(parents=True, exist_ok=True)
return workdir
def pack_cbz(chapter_dir, translated_dir, output_cbz):
"""
Packs into CBZ:
- All original pages (from chapter_dir root)
- All output.txt (one per page subfolder)
Sorted by page stem for correct reading order.
"""
exts = {".jpg", ".jpeg", ".png", ".webp"}
pages = sorted(
[p for p in Path(chapter_dir).iterdir()
if p.suffix.lower() in exts],
key=lambda p: p.stem
)
txts = sorted(
translated_dir.rglob("output.txt"),
key=lambda p: p.parent.name
)
if not pages:
print("⚠️ No original pages found — CBZ not created.")
return
with zipfile.ZipFile(output_cbz, "w",
compression=zipfile.ZIP_STORED) as zf:
# Original pages
for img in pages:
arcname = f"pages/{img.name}"
zf.write(img, arcname)
print(f" 🖼 {arcname}")
# Translation text files
for txt in txts:
arcname = f"translations/{txt.parent.name}_output.txt"
zf.write(txt, arcname)
print(f" 📄 {arcname}")
print(f"\n✅ CBZ saved → {output_cbz} "
f"({len(pages)} page(s), {len(txts)} translation(s))")
# ─────────────────────────────────────────────
# PER-PAGE PIPELINE
# ─────────────────────────────────────────────
def process_page(page_path, workdir, translator_module):
"""
Runs translator for a single page.
All output files land in workdir.
Returns True on success, False on failure.
"""
print(f"\n{''*60}")
print(f" PAGE: {page_path.name}")
print(f"{''*60}")
orig_dir = os.getcwd()
try:
# chdir into workdir so debug_clusters.png,
# temp files etc. all land there
os.chdir(workdir)
translator_module.translate_manga_text(
image_path = str(page_path.resolve()),
source_lang = SOURCE_LANG,
target_lang = TARGET_LANG,
confidence_threshold = CONFIDENCE_THRESHOLD,
export_to_file = "output.txt",
export_bubbles_to = "bubbles.json",
min_text_length = MIN_TEXT_LENGTH,
cluster_eps = CLUSTER_EPS,
proximity_px = PROXIMITY_PX,
filter_sound_effects = FILTER_SFX,
quality_threshold = QUALITY_THRESHOLD,
upscale_factor = UPSCALE_FACTOR,
bbox_padding = BBOX_PADDING,
debug = DEBUG,
)
print(f" ✅ Translated → {workdir}")
return True
except Exception as e:
print(f" ❌ Failed: {e}")
return False
finally:
os.chdir(orig_dir)
# ─────────────────────────────────────────────
# MAIN
# ─────────────────────────────────────────────
def main():
# ── Load translator module ────────────────────────────────────
print("Loading manga-translator.py...")
try:
translator = load_module(
"manga_translator", "manga-translator.py")
except FileNotFoundError as e:
print(f"❌ Could not load module: {e}")
sys.exit(1)
# ── Discover pages ────────────────────────────────────────────
pages = sorted_pages(CHAPTER_DIR)
if not pages:
print(f"❌ No images found in: {CHAPTER_DIR}")
sys.exit(1)
print(f"\n📖 Chapter : {CHAPTER_DIR}")
print(f" Pages : {len(pages)}")
print(f" Source : {SOURCE_LANG} → Target: {TARGET_LANG}\n")
# ── Process each page ─────────────────────────────────────────
translated_dir = Path(CHAPTER_DIR) / "translated"
succeeded = []
failed = []
for i, page_path in enumerate(pages, start=1):
print(f"\n[{i}/{len(pages)}] {page_path.name}")
workdir = make_page_workdir(CHAPTER_DIR, page_path.stem)
ok = process_page(page_path, workdir, translator)
if ok:
succeeded.append(page_path.name)
else:
failed.append(page_path.name)
# ── Summary ───────────────────────────────────────────────────
print(f"\n{''*60}")
print(f" PIPELINE COMPLETE")
print(f"{len(succeeded)} page(s) succeeded")
if failed:
print(f"{len(failed)} page(s) failed:")
for f in failed:
print(f"{f}")
print(f"{''*60}\n")
# ── Pack CBZ ──────────────────────────────────────────────────
print("Packing CBZ...")
pack_cbz(CHAPTER_DIR, translated_dir, OUTPUT_CBZ)
if __name__ == "__main__":
main()