This commit is contained in:
Guillem Hernandez Sola
2026-04-12 16:35:26 +02:00
parent 90a6849080
commit 4fb553e940

View File

@@ -2,27 +2,25 @@
"""
pipeline.py
───────────────────────────────────────────────────────────────
Full chapter translation pipeline for Dandadan_059_2022_Digital
Translation-only pipeline for Dandadan_059_2022_Digital
Flow per page:
1. Run manga-translator.py → output.txt + bubbles.json
2. Run manga-renderer.py → translated image
3. Collect all translated images → .cbz
1. Run translate_manga_text() → output.txt + bubbles.json
2. Copy original image to workdir for reference
Folder structure produced:
Dandadan_059_2022_Digital_1r0n/
├── 00.jpg ← original (untouched)
├── ...
└── translated/
├── 00/
│ ├── output.txt ← raw translations
│ ├── output.txt ← translations to review
│ ├── bubbles.json ← bubble boxes
── debug_clusters.png ← cluster debug
│ └── 00_translated.jpg ← rendered output
── debug_clusters.png ← cluster debug (if DEBUG=True)
├── 01/
│ └── ...
└── ...
Dandadan_059_translated.cbz ← final output
Dandadan_059_translated.cbz ← original pages + translations
zipped for reference
"""
import os
@@ -36,8 +34,8 @@ from pathlib import Path
# ─────────────────────────────────────────────
# CONFIG — edit these as needed
# ─────────────────────────────────────────────
CHAPTER_DIR = "/Users/guillemhernandezsola/Downloads/Dandadan_059_2022_Digital_1r0n"
OUTPUT_CBZ = "/Users/guillemhernandezsola/Downloads/Dandadan_059_translated.cbz"
CHAPTER_DIR = "/Users/guillemhernandezsola/Downloads/Spy_x_Family_076_2023_Digital_1r0n"
OUTPUT_CBZ = "/Users/guillemhernandezsola/Downloads/Spy_x_Family_076_2023_Digital_1r0n_translated.cbz"
SOURCE_LANG = "en"
TARGET_LANG = "ca"
@@ -52,15 +50,9 @@ UPSCALE_FACTOR = 2.5
BBOX_PADDING = 5
DEBUG = True
# manga-renderer.py settings
FONT_PATH = "fonts/ComicRelief-Regular.ttf"
FONT_COLOR = (0, 0, 0)
# ─────────────────────────────────────────────
# DYNAMIC MODULE LOADER
# Loads manga-translator.py and manga-renderer.py
# by file path (handles hyphens in filenames)
# ─────────────────────────────────────────────
def load_module(name, filepath):
spec = importlib.util.spec_from_file_location(name, filepath)
@@ -73,10 +65,6 @@ def load_module(name, filepath):
# HELPERS
# ─────────────────────────────────────────────
def sorted_pages(chapter_dir):
"""
Returns sorted list of image paths in the chapter folder.
Supports .jpg, .jpeg, .png, .webp
"""
exts = {".jpg", ".jpeg", ".png", ".webp"}
pages = [
p for p in Path(chapter_dir).iterdir()
@@ -86,69 +74,68 @@ def sorted_pages(chapter_dir):
def make_page_workdir(chapter_dir, page_stem):
"""
Creates and returns:
<chapter_dir>/translated/<page_stem>/
"""
workdir = Path(chapter_dir) / "translated" / page_stem
workdir.mkdir(parents=True, exist_ok=True)
return workdir
def pack_cbz(translated_dir, output_cbz):
def pack_cbz(chapter_dir, translated_dir, output_cbz):
"""
Collects all *_translated.* images from translated/*/
sorted by page stem, packs into a .cbz file.
Packs into CBZ:
- All original pages (from chapter_dir root)
- All output.txt (one per page subfolder)
Sorted by page stem for correct reading order.
"""
images = sorted(
translated_dir.rglob("*_translated.*"),
exts = {".jpg", ".jpeg", ".png", ".webp"}
pages = sorted(
[p for p in Path(chapter_dir).iterdir()
if p.suffix.lower() in exts],
key=lambda p: p.stem
)
txts = sorted(
translated_dir.rglob("output.txt"),
key=lambda p: p.parent.name
)
if not images:
print("⚠️ No translated images found — CBZ not created.")
if not pages:
print("⚠️ No original pages found — CBZ not created.")
return
with zipfile.ZipFile(output_cbz, "w",
compression=zipfile.ZIP_STORED) as zf:
for img in images:
# Archive name keeps the page stem for ordering
arcname = img.name
# Original pages
for img in pages:
arcname = f"pages/{img.name}"
zf.write(img, arcname)
print(f" 📄 Added: {arcname}")
print(f" 🖼 {arcname}")
# Translation text files
for txt in txts:
arcname = f"translations/{txt.parent.name}_output.txt"
zf.write(txt, arcname)
print(f" 📄 {arcname}")
print(f"\n✅ CBZ saved → {output_cbz} "
f"({len(images)} page(s))")
f"({len(pages)} page(s), {len(txts)} translation(s))")
# ─────────────────────────────────────────────
# PER-PAGE PIPELINE
# ─────────────────────────────────────────────
def process_page(page_path, workdir,
translator_module, renderer_module):
def process_page(page_path, workdir, translator_module):
"""
Runs translator + renderer for a single page.
All intermediate files land in workdir.
Returns path to the translated image, or None on failure.
Runs translator for a single page.
All output files land in workdir.
Returns True on success, False on failure.
"""
page_stem = page_path.stem
suffix = page_path.suffix
# Paths inside workdir
output_txt = str(workdir / "output.txt")
bubbles_json= str(workdir / "bubbles.json")
debug_png = str(workdir / "debug_clusters.png")
translated = str(workdir / f"{page_stem}_translated{suffix}")
print(f"\n{''*60}")
print(f" PAGE: {page_path.name}")
print(f"{''*60}")
# ── Step 1: Translate ─────────────────────────────────────────
print(f"\n[1/2] Translating...")
try:
# Temporarily redirect file outputs to workdir
orig_dir = os.getcwd()
try:
# chdir into workdir so debug_clusters.png,
# temp files etc. all land there
os.chdir(workdir)
translator_module.translate_manga_text(
@@ -167,43 +154,27 @@ def process_page(page_path, workdir,
bbox_padding = BBOX_PADDING,
debug = DEBUG,
)
print(f" ✅ Translated → {workdir}")
return True
except Exception as e:
print(f"Translation failed: {e}")
os.chdir(orig_dir)
return None
print(f"Failed: {e}")
return False
finally:
os.chdir(orig_dir)
# ── Step 2: Render ────────────────────────────────────────────
print(f"\n[2/2] Rendering...")
try:
renderer_module.render_translations(
input_image = str(page_path.resolve()),
output_image = translated,
translations_file = output_txt,
bubbles_file = bubbles_json,
font_path = str(Path(orig_dir) / FONT_PATH),
font_color = FONT_COLOR,
)
except Exception as e:
print(f" ❌ Rendering failed: {e}")
return None
print(f" ✅ Done → {translated}")
return translated
# ─────────────────────────────────────────────
# MAIN
# ─────────────────────────────────────────────
def main():
# ── Load modules ──────────────────────────────────────────────
print("Loading modules...")
# ── Load translator module ────────────────────────────────────
print("Loading manga-translator.py...")
try:
translator = load_module(
"manga_translator", "manga-translator.py")
renderer = load_module(
"manga_renderer", "manga-renderer.py")
except FileNotFoundError as e:
print(f"❌ Could not load module: {e}")
sys.exit(1)
@@ -216,20 +187,19 @@ def main():
print(f"\n📖 Chapter : {CHAPTER_DIR}")
print(f" Pages : {len(pages)}")
print(f" Source : {SOURCE_LANG} → Target: {TARGET_LANG}")
print(f" Output : {OUTPUT_CBZ}\n")
print(f" Source : {SOURCE_LANG} → Target: {TARGET_LANG}\n")
# ── Process each page ─────────────────────────────────────────
translated_dir = Path(CHAPTER_DIR) / "translated"
succeeded = []
failed = []
for page_path in pages:
for i, page_path in enumerate(pages, start=1):
print(f"\n[{i}/{len(pages)}] {page_path.name}")
workdir = make_page_workdir(CHAPTER_DIR, page_path.stem)
result = process_page(page_path, workdir,
translator, renderer)
if result:
succeeded.append(result)
ok = process_page(page_path, workdir, translator)
if ok:
succeeded.append(page_path.name)
else:
failed.append(page_path.name)
@@ -238,12 +208,14 @@ def main():
print(f" PIPELINE COMPLETE")
print(f"{len(succeeded)} page(s) succeeded")
if failed:
print(f"{len(failed)} page(s) failed: {failed}")
print(f"{len(failed)} page(s) failed:")
for f in failed:
print(f"{f}")
print(f"{''*60}\n")
# ── Pack CBZ ──────────────────────────────────────────────────
print("Packing CBZ...")
pack_cbz(translated_dir, OUTPUT_CBZ)
pack_cbz(CHAPTER_DIR, translated_dir, OUTPUT_CBZ)
if __name__ == "__main__":