Files
manga-translator/pipeline.py
Guillem Hernandez Sola 90a6849080 Trying pipeline
2026-04-11 14:57:05 +02:00

251 lines
9.5 KiB
Python

#!/usr/bin/env python3
"""
pipeline.py
───────────────────────────────────────────────────────────────
Full chapter translation pipeline for Dandadan_059_2022_Digital
Flow per page:
1. Run manga-translator.py → output.txt + bubbles.json
2. Run manga-renderer.py → translated image
3. Collect all translated images → .cbz
Folder structure produced:
Dandadan_059_2022_Digital_1r0n/
├── 00.jpg ← original (untouched)
├── ...
└── translated/
├── 00/
│ ├── output.txt ← raw translations
│ ├── bubbles.json ← bubble boxes
│ ├── debug_clusters.png ← cluster debug
│ └── 00_translated.jpg ← rendered output
├── 01/
│ └── ...
└── ...
Dandadan_059_translated.cbz ← final output
"""
import os
import sys
import shutil
import zipfile
import importlib.util
from pathlib import Path
# ─────────────────────────────────────────────
# CONFIG — edit these as needed
# ─────────────────────────────────────────────
CHAPTER_DIR = "/Users/guillemhernandezsola/Downloads/Dandadan_059_2022_Digital_1r0n"
OUTPUT_CBZ = "/Users/guillemhernandezsola/Downloads/Dandadan_059_translated.cbz"
SOURCE_LANG = "en"
TARGET_LANG = "ca"
# manga-translator.py settings
CONFIDENCE_THRESHOLD = 0.10
MIN_TEXT_LENGTH = 2
CLUSTER_EPS = "auto"
PROXIMITY_PX = 80
FILTER_SFX = True
QUALITY_THRESHOLD = 0.5
UPSCALE_FACTOR = 2.5
BBOX_PADDING = 5
DEBUG = True
# manga-renderer.py settings
FONT_PATH = "fonts/ComicRelief-Regular.ttf"
FONT_COLOR = (0, 0, 0)
# ─────────────────────────────────────────────
# DYNAMIC MODULE LOADER
# Loads manga-translator.py and manga-renderer.py
# by file path (handles hyphens in filenames)
# ─────────────────────────────────────────────
def load_module(name, filepath):
spec = importlib.util.spec_from_file_location(name, filepath)
module = importlib.util.module_from_spec(spec)
spec.loader.exec_module(module)
return module
# ─────────────────────────────────────────────
# HELPERS
# ─────────────────────────────────────────────
def sorted_pages(chapter_dir):
"""
Returns sorted list of image paths in the chapter folder.
Supports .jpg, .jpeg, .png, .webp
"""
exts = {".jpg", ".jpeg", ".png", ".webp"}
pages = [
p for p in Path(chapter_dir).iterdir()
if p.suffix.lower() in exts
]
return sorted(pages, key=lambda p: p.stem)
def make_page_workdir(chapter_dir, page_stem):
"""
Creates and returns:
<chapter_dir>/translated/<page_stem>/
"""
workdir = Path(chapter_dir) / "translated" / page_stem
workdir.mkdir(parents=True, exist_ok=True)
return workdir
def pack_cbz(translated_dir, output_cbz):
"""
Collects all *_translated.* images from translated/*/
sorted by page stem, packs into a .cbz file.
"""
images = sorted(
translated_dir.rglob("*_translated.*"),
key=lambda p: p.parent.name
)
if not images:
print("⚠️ No translated images found — CBZ not created.")
return
with zipfile.ZipFile(output_cbz, "w",
compression=zipfile.ZIP_STORED) as zf:
for img in images:
# Archive name keeps the page stem for ordering
arcname = img.name
zf.write(img, arcname)
print(f" 📄 Added: {arcname}")
print(f"\n✅ CBZ saved → {output_cbz} "
f"({len(images)} page(s))")
# ─────────────────────────────────────────────
# PER-PAGE PIPELINE
# ─────────────────────────────────────────────
def process_page(page_path, workdir,
translator_module, renderer_module):
"""
Runs translator + renderer for a single page.
All intermediate files land in workdir.
Returns path to the translated image, or None on failure.
"""
page_stem = page_path.stem
suffix = page_path.suffix
# Paths inside workdir
output_txt = str(workdir / "output.txt")
bubbles_json= str(workdir / "bubbles.json")
debug_png = str(workdir / "debug_clusters.png")
translated = str(workdir / f"{page_stem}_translated{suffix}")
print(f"\n{''*60}")
print(f" PAGE: {page_path.name}")
print(f"{''*60}")
# ── Step 1: Translate ─────────────────────────────────────────
print(f"\n[1/2] Translating...")
try:
# Temporarily redirect file outputs to workdir
orig_dir = os.getcwd()
os.chdir(workdir)
translator_module.translate_manga_text(
image_path = str(page_path.resolve()),
source_lang = SOURCE_LANG,
target_lang = TARGET_LANG,
confidence_threshold = CONFIDENCE_THRESHOLD,
export_to_file = "output.txt",
export_bubbles_to = "bubbles.json",
min_text_length = MIN_TEXT_LENGTH,
cluster_eps = CLUSTER_EPS,
proximity_px = PROXIMITY_PX,
filter_sound_effects = FILTER_SFX,
quality_threshold = QUALITY_THRESHOLD,
upscale_factor = UPSCALE_FACTOR,
bbox_padding = BBOX_PADDING,
debug = DEBUG,
)
except Exception as e:
print(f" ❌ Translation failed: {e}")
os.chdir(orig_dir)
return None
finally:
os.chdir(orig_dir)
# ── Step 2: Render ────────────────────────────────────────────
print(f"\n[2/2] Rendering...")
try:
renderer_module.render_translations(
input_image = str(page_path.resolve()),
output_image = translated,
translations_file = output_txt,
bubbles_file = bubbles_json,
font_path = str(Path(orig_dir) / FONT_PATH),
font_color = FONT_COLOR,
)
except Exception as e:
print(f" ❌ Rendering failed: {e}")
return None
print(f" ✅ Done → {translated}")
return translated
# ─────────────────────────────────────────────
# MAIN
# ─────────────────────────────────────────────
def main():
# ── Load modules ──────────────────────────────────────────────
print("Loading modules...")
try:
translator = load_module(
"manga_translator", "manga-translator.py")
renderer = load_module(
"manga_renderer", "manga-renderer.py")
except FileNotFoundError as e:
print(f"❌ Could not load module: {e}")
sys.exit(1)
# ── Discover pages ────────────────────────────────────────────
pages = sorted_pages(CHAPTER_DIR)
if not pages:
print(f"❌ No images found in: {CHAPTER_DIR}")
sys.exit(1)
print(f"\n📖 Chapter : {CHAPTER_DIR}")
print(f" Pages : {len(pages)}")
print(f" Source : {SOURCE_LANG} → Target: {TARGET_LANG}")
print(f" Output : {OUTPUT_CBZ}\n")
# ── Process each page ─────────────────────────────────────────
translated_dir = Path(CHAPTER_DIR) / "translated"
succeeded = []
failed = []
for page_path in pages:
workdir = make_page_workdir(CHAPTER_DIR, page_path.stem)
result = process_page(page_path, workdir,
translator, renderer)
if result:
succeeded.append(result)
else:
failed.append(page_path.name)
# ── Summary ───────────────────────────────────────────────────
print(f"\n{''*60}")
print(f" PIPELINE COMPLETE")
print(f"{len(succeeded)} page(s) succeeded")
if failed:
print(f"{len(failed)} page(s) failed: {failed}")
print(f"{''*60}\n")
# ── Pack CBZ ──────────────────────────────────────────────────
print("Packing CBZ...")
pack_cbz(translated_dir, OUTPUT_CBZ)
if __name__ == "__main__":
main()