Added pipeline

This commit is contained in:
Guillem Hernandez Sola
2026-04-16 19:58:05 +02:00
parent 5aa79d986a
commit 39765a6cf1

View File

@@ -4,67 +4,40 @@ pipeline.py
─────────────────────────────────────────────────────────────── ───────────────────────────────────────────────────────────────
Translation + render pipeline Translation + render pipeline
Flow per page: Usage:
1) translate_manga_text() -> output.txt + bubbles.json (+ debug_clusters.png if DEBUG) python pipeline.py /path/to/chapter/folder
2) render_translations() -> page_translated.png
3) Pack CBZ with originals + rendered pages + text outputs
Folder structure:
<CHAPTER_DIR>/
├── 000.png
├── 001.png
└── translated/
├── 000/
│ ├── output.txt
│ ├── bubbles.json
│ ├── page_translated.png
│ └── debug_clusters.png (optional)
├── 001/
│ └── ...
└── ...
CBZ:
- pages/<original pages>
- rendered/<page_stem>_translated.png
- translations/<page_stem>_output.txt
""" """
import os import os
import sys import sys
import argparse
import zipfile import zipfile
import importlib.util import importlib.util
from pathlib import Path from pathlib import Path
# ───────────────────────────────────────────── # ─────────────────────────────────────────────
# CONFIG # CONFIG
# ───────────────────────────────────────────── # ─────────────────────────────────────────────
CHAPTER_DIR = "/Users/guillemhernandezsola/Downloads/Dandadan_059_2022_Digital_1r0n"
OUTPUT_CBZ = "/Users/guillemhernandezsola/Downloads/Dandadan_059_2022_Digital_1r0n_translated.cbz"
SOURCE_LANG = "en" SOURCE_LANG = "en"
TARGET_LANG = "ca" TARGET_LANG = "ca"
# translator (NEW signature-compatible) # Translator Settings
CONFIDENCE_THRESHOLD = 0.10 CONFIDENCE_THRESHOLD = 0.10
MIN_TEXT_LENGTH = 1 MIN_TEXT_LENGTH = 1
GAP_PX = "auto" # was cluster/proximity in old version GAP_PX = "auto"
FILTER_SFX = True FILTER_SFX = True
QUALITY_THRESHOLD = 0.50 QUALITY_THRESHOLD = 0.50
READING_MODE = "ltr" READING_MODE = "ltr"
DEBUG = True DEBUG = True
# renderer # Renderer Settings
RENDER_ENABLED = True RENDER_ENABLED = True
RENDER_OUTPUT_NAME = "page_translated.png" RENDER_OUTPUT_NAME = "page_translated.png"
FONT_CANDIDATES = [
# optional custom font list for renderer
FONT_CANDIDATES = [
"fonts/ComicNeue-Regular.ttf", "fonts/ComicNeue-Regular.ttf",
"fonts/ComicRelief-Regular.ttf" "fonts/ComicRelief-Regular.ttf"
] ]
# ───────────────────────────────────────────── # ─────────────────────────────────────────────
# DYNAMIC MODULE LOADER # DYNAMIC MODULE LOADER
# ───────────────────────────────────────────── # ─────────────────────────────────────────────
@@ -76,7 +49,6 @@ def load_module(name, filepath):
spec.loader.exec_module(module) spec.loader.exec_module(module)
return module return module
# ───────────────────────────────────────────── # ─────────────────────────────────────────────
# HELPERS # HELPERS
# ───────────────────────────────────────────── # ─────────────────────────────────────────────
@@ -88,80 +60,61 @@ def sorted_pages(chapter_dir):
] ]
return sorted(pages, key=lambda p: p.stem) return sorted(pages, key=lambda p: p.stem)
def make_page_workdir(chapter_dir, page_stem): def make_page_workdir(chapter_dir, page_stem):
workdir = Path(chapter_dir) / "translated" / page_stem workdir = Path(chapter_dir) / "translated" / page_stem
workdir.mkdir(parents=True, exist_ok=True) workdir.mkdir(parents=True, exist_ok=True)
return workdir return workdir
def pack_cbz(chapter_dir, translated_dir, output_cbz): def pack_cbz(chapter_dir, translated_dir, output_cbz):
exts = {".jpg", ".jpeg", ".png", ".webp"} exts = {".jpg", ".jpeg", ".png", ".webp"}
pages = sorted( pages = sorted(
[p for p in Path(chapter_dir).iterdir() [p for p in Path(chapter_dir).iterdir() if p.is_file() and p.suffix.lower() in exts],
if p.is_file() and p.suffix.lower() in exts],
key=lambda p: p.stem key=lambda p: p.stem
) )
txts = sorted( txts = sorted(translated_dir.rglob("output.txt"), key=lambda p: p.parent.name)
translated_dir.rglob("output.txt"), rendered = sorted(translated_dir.rglob(RENDER_OUTPUT_NAME), key=lambda p: p.parent.name)
key=lambda p: p.parent.name
)
rendered = sorted(
translated_dir.rglob(RENDER_OUTPUT_NAME),
key=lambda p: p.parent.name
)
if not pages: if not pages:
print("⚠️ No original pages found — CBZ not created.") print("⚠️ No original pages found — CBZ not created.")
return return
with zipfile.ZipFile(output_cbz, "w", compression=zipfile.ZIP_STORED) as zf: with zipfile.ZipFile(output_cbz, "w", compression=zipfile.ZIP_STORED) as zf:
# original pages # Original pages
for img in pages: for img in pages:
arcname = f"pages/{img.name}" arcname = f"pages/{img.name}"
zf.write(img, arcname) zf.write(img, arcname)
print(f" 🖼 {arcname}")
# rendered pages # Rendered pages
for rp in rendered: for rp in rendered:
arcname = f"rendered/{rp.parent.name}_translated.png" arcname = f"rendered/{rp.parent.name}_translated.png"
zf.write(rp, arcname) zf.write(rp, arcname)
print(f" 🎨 {arcname}")
# text outputs # Text outputs
for txt in txts: for txt in txts:
arcname = f"translations/{txt.parent.name}_output.txt" arcname = f"translations/{txt.parent.name}_output.txt"
zf.write(txt, arcname) zf.write(txt, arcname)
print(f" 📄 {arcname}")
print(
f"\n✅ CBZ saved → {output_cbz} "
f"({len(pages)} original, {len(rendered)} rendered, {len(txts)} text)"
)
print(f"\n✅ CBZ saved → {output_cbz}")
print(f"📦 Contains: {len(pages)} original, {len(rendered)} rendered, {len(txts)} text files.")
# ───────────────────────────────────────────── # ─────────────────────────────────────────────
# PER-PAGE PIPELINE # PER-PAGE PIPELINE
# ───────────────────────────────────────────── # ─────────────────────────────────────────────
def process_page(page_path, workdir, translator_module, renderer_module): def process_page(page_path, workdir, translator_module, renderer_module):
"""
Runs translator + renderer for one page.
All generated files are written inside workdir.
"""
print(f"\n{'' * 70}") print(f"\n{'' * 70}")
print(f"PAGE: {page_path.name}") print(f"PAGE: {page_path.name}")
print(f"{'' * 70}") print(f"{'' * 70}")
orig_dir = os.getcwd() orig_dir = os.getcwd()
try: try:
# Isolate execution to the specific page's folder
os.chdir(workdir) os.chdir(workdir)
# 1) translate # 1) Translate
translator_module.translate_manga_text( translator_module.translate_manga_text(
image_path= str(page_path.resolve()), image_path=str(page_path.resolve()),
source_lang=SOURCE_LANG, source_lang=SOURCE_LANG,
target_lang=TARGET_LANG, target_lang=TARGET_LANG,
confidence_threshold=CONFIDENCE_THRESHOLD, confidence_threshold=CONFIDENCE_THRESHOLD,
@@ -174,9 +127,9 @@ def process_page(page_path, workdir, translator_module, renderer_module):
reading_mode=READING_MODE, reading_mode=READING_MODE,
debug=DEBUG debug=DEBUG
) )
print("translator done") print("Translator done")
# 2) render # 2) Render
if RENDER_ENABLED: if RENDER_ENABLED:
renderer_module.render_translations( renderer_module.render_translations(
input_image=str(page_path.resolve()), input_image=str(page_path.resolve()),
@@ -185,7 +138,7 @@ def process_page(page_path, workdir, translator_module, renderer_module):
bubbles_file="bubbles.json", bubbles_file="bubbles.json",
font_candidates=FONT_CANDIDATES font_candidates=FONT_CANDIDATES
) )
print("renderer done") print("Renderer done")
return True return True
@@ -196,44 +149,52 @@ def process_page(page_path, workdir, translator_module, renderer_module):
finally: finally:
os.chdir(orig_dir) os.chdir(orig_dir)
# ───────────────────────────────────────────── # ─────────────────────────────────────────────
# MAIN # MAIN
# ───────────────────────────────────────────── # ─────────────────────────────────────────────
def main(): def main():
parser = argparse.ArgumentParser(description="Manga Translation Pipeline")
parser.add_argument("chapter_dir", help="Path to the folder containing manga pages")
args = parser.parse_args()
chapter_dir = Path(args.chapter_dir).resolve()
output_cbz = chapter_dir.parent / f"{chapter_dir.name}_translated.cbz"
print("Loading modules...") print("Loading modules...")
# Ensure we are loading from the directory where pipeline.py is located
script_dir = Path(__file__).parent
try: try:
translator = load_module("manga_translator", "manga-translator.py") translator = load_module("manga_translator", str(script_dir / "manga-translator.py"))
except Exception as e: except Exception as e:
print(f"❌ Could not load manga-translator.py: {e}") print(f"❌ Could not load manga-translator.py: {e}")
sys.exit(1) sys.exit(1)
try: try:
renderer = load_module("manga_renderer", "manga-renderer.py") renderer = load_module("manga_renderer", str(script_dir / "manga-renderer.py"))
except Exception as e: except Exception as e:
print(f"❌ Could not load manga-renderer.py: {e}") print(f"❌ Could not load manga-renderer.py: {e}")
sys.exit(1) sys.exit(1)
pages = sorted_pages(CHAPTER_DIR) pages = sorted_pages(chapter_dir)
if not pages: if not pages:
print(f"❌ No images found in: {CHAPTER_DIR}") print(f"❌ No images found in: {chapter_dir}")
sys.exit(1) sys.exit(1)
print(f"\n📖 Chapter : {CHAPTER_DIR}") print(f"\n📖 Chapter : {chapter_dir}")
print(f" Pages : {len(pages)}") print(f" Pages : {len(pages)}")
print(f" Source : {SOURCE_LANG} → Target: {TARGET_LANG}") print(f" Source : {SOURCE_LANG} → Target: {TARGET_LANG}")
print(f" Render : {'ON' if RENDER_ENABLED else 'OFF'}\n") print(f" Render : {'ON' if RENDER_ENABLED else 'OFF'}\n")
translated_dir = Path(CHAPTER_DIR) / "translated" translated_dir = chapter_dir / "translated"
succeeded = [] succeeded, failed = [], []
failed = []
for i, page_path in enumerate(pages, start=1): for i, page_path in enumerate(pages, start=1):
print(f"[{i}/{len(pages)}] {page_path.name}") print(f"[{i}/{len(pages)}] Processing...")
workdir = make_page_workdir(CHAPTER_DIR, page_path.stem) workdir = make_page_workdir(chapter_dir, page_path.stem)
ok = process_page(page_path, workdir, translator, renderer)
if ok: if process_page(page_path, workdir, translator, renderer):
succeeded.append(page_path.name) succeeded.append(page_path.name)
else: else:
failed.append(page_path.name) failed.append(page_path.name)
@@ -248,8 +209,7 @@ def main():
print(f"{'' * 70}\n") print(f"{'' * 70}\n")
print("Packing CBZ...") print("Packing CBZ...")
pack_cbz(CHAPTER_DIR, translated_dir, OUTPUT_CBZ) pack_cbz(chapter_dir, translated_dir, output_cbz)
if __name__ == "__main__": if __name__ == "__main__":
main() main()