This commit is contained in:
Guillem Hernandez Sola
2026-04-12 16:35:26 +02:00
parent 90a6849080
commit 4fb553e940

View File

@@ -2,27 +2,25 @@
""" """
pipeline.py pipeline.py
─────────────────────────────────────────────────────────────── ───────────────────────────────────────────────────────────────
Full chapter translation pipeline for Dandadan_059_2022_Digital Translation-only pipeline for Dandadan_059_2022_Digital
Flow per page: Flow per page:
1. Run manga-translator.py → output.txt + bubbles.json 1. Run translate_manga_text() → output.txt + bubbles.json
2. Run manga-renderer.py → translated image 2. Copy original image to workdir for reference
3. Collect all translated images → .cbz
Folder structure produced: Folder structure produced:
Dandadan_059_2022_Digital_1r0n/ Dandadan_059_2022_Digital_1r0n/
├── 00.jpg ← original (untouched)
├── ...
└── translated/ └── translated/
├── 00/ ├── 00/
│ ├── output.txt ← raw translations │ ├── output.txt ← translations to review
│ ├── bubbles.json ← bubble boxes │ ├── bubbles.json ← bubble boxes
── debug_clusters.png ← cluster debug ── debug_clusters.png ← cluster debug (if DEBUG=True)
│ └── 00_translated.jpg ← rendered output
├── 01/ ├── 01/
│ └── ... │ └── ...
└── ... └── ...
Dandadan_059_translated.cbz ← final output
Dandadan_059_translated.cbz ← original pages + translations
zipped for reference
""" """
import os import os
@@ -36,10 +34,10 @@ from pathlib import Path
# ───────────────────────────────────────────── # ─────────────────────────────────────────────
# CONFIG — edit these as needed # CONFIG — edit these as needed
# ───────────────────────────────────────────── # ─────────────────────────────────────────────
CHAPTER_DIR = "/Users/guillemhernandezsola/Downloads/Dandadan_059_2022_Digital_1r0n" CHAPTER_DIR = "/Users/guillemhernandezsola/Downloads/Spy_x_Family_076_2023_Digital_1r0n"
OUTPUT_CBZ = "/Users/guillemhernandezsola/Downloads/Dandadan_059_translated.cbz" OUTPUT_CBZ = "/Users/guillemhernandezsola/Downloads/Spy_x_Family_076_2023_Digital_1r0n_translated.cbz"
SOURCE_LANG = "en" SOURCE_LANG = "en"
TARGET_LANG = "ca" TARGET_LANG = "ca"
# manga-translator.py settings # manga-translator.py settings
CONFIDENCE_THRESHOLD = 0.10 CONFIDENCE_THRESHOLD = 0.10
@@ -52,15 +50,9 @@ UPSCALE_FACTOR = 2.5
BBOX_PADDING = 5 BBOX_PADDING = 5
DEBUG = True DEBUG = True
# manga-renderer.py settings
FONT_PATH = "fonts/ComicRelief-Regular.ttf"
FONT_COLOR = (0, 0, 0)
# ───────────────────────────────────────────── # ─────────────────────────────────────────────
# DYNAMIC MODULE LOADER # DYNAMIC MODULE LOADER
# Loads manga-translator.py and manga-renderer.py
# by file path (handles hyphens in filenames)
# ───────────────────────────────────────────── # ─────────────────────────────────────────────
def load_module(name, filepath): def load_module(name, filepath):
spec = importlib.util.spec_from_file_location(name, filepath) spec = importlib.util.spec_from_file_location(name, filepath)
@@ -73,10 +65,6 @@ def load_module(name, filepath):
# HELPERS # HELPERS
# ───────────────────────────────────────────── # ─────────────────────────────────────────────
def sorted_pages(chapter_dir): def sorted_pages(chapter_dir):
"""
Returns sorted list of image paths in the chapter folder.
Supports .jpg, .jpeg, .png, .webp
"""
exts = {".jpg", ".jpeg", ".png", ".webp"} exts = {".jpg", ".jpeg", ".png", ".webp"}
pages = [ pages = [
p for p in Path(chapter_dir).iterdir() p for p in Path(chapter_dir).iterdir()
@@ -86,69 +74,68 @@ def sorted_pages(chapter_dir):
def make_page_workdir(chapter_dir, page_stem): def make_page_workdir(chapter_dir, page_stem):
"""
Creates and returns:
<chapter_dir>/translated/<page_stem>/
"""
workdir = Path(chapter_dir) / "translated" / page_stem workdir = Path(chapter_dir) / "translated" / page_stem
workdir.mkdir(parents=True, exist_ok=True) workdir.mkdir(parents=True, exist_ok=True)
return workdir return workdir
def pack_cbz(translated_dir, output_cbz): def pack_cbz(chapter_dir, translated_dir, output_cbz):
""" """
Collects all *_translated.* images from translated/*/ Packs into CBZ:
sorted by page stem, packs into a .cbz file. - All original pages (from chapter_dir root)
- All output.txt (one per page subfolder)
Sorted by page stem for correct reading order.
""" """
images = sorted( exts = {".jpg", ".jpeg", ".png", ".webp"}
translated_dir.rglob("*_translated.*"), pages = sorted(
[p for p in Path(chapter_dir).iterdir()
if p.suffix.lower() in exts],
key=lambda p: p.stem
)
txts = sorted(
translated_dir.rglob("output.txt"),
key=lambda p: p.parent.name key=lambda p: p.parent.name
) )
if not images: if not pages:
print("⚠️ No translated images found — CBZ not created.") print("⚠️ No original pages found — CBZ not created.")
return return
with zipfile.ZipFile(output_cbz, "w", with zipfile.ZipFile(output_cbz, "w",
compression=zipfile.ZIP_STORED) as zf: compression=zipfile.ZIP_STORED) as zf:
for img in images: # Original pages
# Archive name keeps the page stem for ordering for img in pages:
arcname = img.name arcname = f"pages/{img.name}"
zf.write(img, arcname) zf.write(img, arcname)
print(f" 📄 Added: {arcname}") print(f" 🖼 {arcname}")
# Translation text files
for txt in txts:
arcname = f"translations/{txt.parent.name}_output.txt"
zf.write(txt, arcname)
print(f" 📄 {arcname}")
print(f"\n✅ CBZ saved → {output_cbz} " print(f"\n✅ CBZ saved → {output_cbz} "
f"({len(images)} page(s))") f"({len(pages)} page(s), {len(txts)} translation(s))")
# ───────────────────────────────────────────── # ─────────────────────────────────────────────
# PER-PAGE PIPELINE # PER-PAGE PIPELINE
# ───────────────────────────────────────────── # ─────────────────────────────────────────────
def process_page(page_path, workdir, def process_page(page_path, workdir, translator_module):
translator_module, renderer_module):
""" """
Runs translator + renderer for a single page. Runs translator for a single page.
All intermediate files land in workdir. All output files land in workdir.
Returns path to the translated image, or None on failure. Returns True on success, False on failure.
""" """
page_stem = page_path.stem
suffix = page_path.suffix
# Paths inside workdir
output_txt = str(workdir / "output.txt")
bubbles_json= str(workdir / "bubbles.json")
debug_png = str(workdir / "debug_clusters.png")
translated = str(workdir / f"{page_stem}_translated{suffix}")
print(f"\n{''*60}") print(f"\n{''*60}")
print(f" PAGE: {page_path.name}") print(f" PAGE: {page_path.name}")
print(f"{''*60}") print(f"{''*60}")
# ── Step 1: Translate ───────────────────────────────────────── orig_dir = os.getcwd()
print(f"\n[1/2] Translating...")
try: try:
# Temporarily redirect file outputs to workdir # chdir into workdir so debug_clusters.png,
orig_dir = os.getcwd() # temp files etc. all land there
os.chdir(workdir) os.chdir(workdir)
translator_module.translate_manga_text( translator_module.translate_manga_text(
@@ -167,43 +154,27 @@ def process_page(page_path, workdir,
bbox_padding = BBOX_PADDING, bbox_padding = BBOX_PADDING,
debug = DEBUG, debug = DEBUG,
) )
print(f" ✅ Translated → {workdir}")
return True
except Exception as e: except Exception as e:
print(f"Translation failed: {e}") print(f"Failed: {e}")
os.chdir(orig_dir) return False
return None
finally: finally:
os.chdir(orig_dir) os.chdir(orig_dir)
# ── Step 2: Render ────────────────────────────────────────────
print(f"\n[2/2] Rendering...")
try:
renderer_module.render_translations(
input_image = str(page_path.resolve()),
output_image = translated,
translations_file = output_txt,
bubbles_file = bubbles_json,
font_path = str(Path(orig_dir) / FONT_PATH),
font_color = FONT_COLOR,
)
except Exception as e:
print(f" ❌ Rendering failed: {e}")
return None
print(f" ✅ Done → {translated}")
return translated
# ───────────────────────────────────────────── # ─────────────────────────────────────────────
# MAIN # MAIN
# ───────────────────────────────────────────── # ─────────────────────────────────────────────
def main(): def main():
# ── Load modules ────────────────────────────────────────────── # ── Load translator module ────────────────────────────────────
print("Loading modules...") print("Loading manga-translator.py...")
try: try:
translator = load_module( translator = load_module(
"manga_translator", "manga-translator.py") "manga_translator", "manga-translator.py")
renderer = load_module(
"manga_renderer", "manga-renderer.py")
except FileNotFoundError as e: except FileNotFoundError as e:
print(f"❌ Could not load module: {e}") print(f"❌ Could not load module: {e}")
sys.exit(1) sys.exit(1)
@@ -216,20 +187,19 @@ def main():
print(f"\n📖 Chapter : {CHAPTER_DIR}") print(f"\n📖 Chapter : {CHAPTER_DIR}")
print(f" Pages : {len(pages)}") print(f" Pages : {len(pages)}")
print(f" Source : {SOURCE_LANG} → Target: {TARGET_LANG}") print(f" Source : {SOURCE_LANG} → Target: {TARGET_LANG}\n")
print(f" Output : {OUTPUT_CBZ}\n")
# ── Process each page ───────────────────────────────────────── # ── Process each page ─────────────────────────────────────────
translated_dir = Path(CHAPTER_DIR) / "translated" translated_dir = Path(CHAPTER_DIR) / "translated"
succeeded = [] succeeded = []
failed = [] failed = []
for page_path in pages: for i, page_path in enumerate(pages, start=1):
print(f"\n[{i}/{len(pages)}] {page_path.name}")
workdir = make_page_workdir(CHAPTER_DIR, page_path.stem) workdir = make_page_workdir(CHAPTER_DIR, page_path.stem)
result = process_page(page_path, workdir, ok = process_page(page_path, workdir, translator)
translator, renderer) if ok:
if result: succeeded.append(page_path.name)
succeeded.append(result)
else: else:
failed.append(page_path.name) failed.append(page_path.name)
@@ -238,12 +208,14 @@ def main():
print(f" PIPELINE COMPLETE") print(f" PIPELINE COMPLETE")
print(f"{len(succeeded)} page(s) succeeded") print(f"{len(succeeded)} page(s) succeeded")
if failed: if failed:
print(f"{len(failed)} page(s) failed: {failed}") print(f"{len(failed)} page(s) failed:")
for f in failed:
print(f"{f}")
print(f"{''*60}\n") print(f"{''*60}\n")
# ── Pack CBZ ────────────────────────────────────────────────── # ── Pack CBZ ──────────────────────────────────────────────────
print("Packing CBZ...") print("Packing CBZ...")
pack_cbz(translated_dir, OUTPUT_CBZ) pack_cbz(CHAPTER_DIR, translated_dir, OUTPUT_CBZ)
if __name__ == "__main__": if __name__ == "__main__":