Pipeline
This commit is contained in:
152
pipeline.py
152
pipeline.py
@@ -2,27 +2,25 @@
|
||||
"""
|
||||
pipeline.py
|
||||
───────────────────────────────────────────────────────────────
|
||||
Full chapter translation pipeline for Dandadan_059_2022_Digital
|
||||
Translation-only pipeline for Dandadan_059_2022_Digital
|
||||
|
||||
Flow per page:
|
||||
1. Run manga-translator.py → output.txt + bubbles.json
|
||||
2. Run manga-renderer.py → translated image
|
||||
3. Collect all translated images → .cbz
|
||||
1. Run translate_manga_text() → output.txt + bubbles.json
|
||||
2. Copy original image to workdir for reference
|
||||
|
||||
Folder structure produced:
|
||||
Dandadan_059_2022_Digital_1r0n/
|
||||
├── 00.jpg ← original (untouched)
|
||||
├── ...
|
||||
└── translated/
|
||||
├── 00/
|
||||
│ ├── output.txt ← raw translations
|
||||
│ ├── output.txt ← translations to review
|
||||
│ ├── bubbles.json ← bubble boxes
|
||||
│ ├── debug_clusters.png ← cluster debug
|
||||
│ └── 00_translated.jpg ← rendered output
|
||||
│ └── debug_clusters.png ← cluster debug (if DEBUG=True)
|
||||
├── 01/
|
||||
│ └── ...
|
||||
└── ...
|
||||
Dandadan_059_translated.cbz ← final output
|
||||
|
||||
Dandadan_059_translated.cbz ← original pages + translations
|
||||
zipped for reference
|
||||
"""
|
||||
|
||||
import os
|
||||
@@ -36,8 +34,8 @@ from pathlib import Path
|
||||
# ─────────────────────────────────────────────
|
||||
# CONFIG — edit these as needed
|
||||
# ─────────────────────────────────────────────
|
||||
CHAPTER_DIR = "/Users/guillemhernandezsola/Downloads/Dandadan_059_2022_Digital_1r0n"
|
||||
OUTPUT_CBZ = "/Users/guillemhernandezsola/Downloads/Dandadan_059_translated.cbz"
|
||||
CHAPTER_DIR = "/Users/guillemhernandezsola/Downloads/Spy_x_Family_076_2023_Digital_1r0n"
|
||||
OUTPUT_CBZ = "/Users/guillemhernandezsola/Downloads/Spy_x_Family_076_2023_Digital_1r0n_translated.cbz"
|
||||
SOURCE_LANG = "en"
|
||||
TARGET_LANG = "ca"
|
||||
|
||||
@@ -52,15 +50,9 @@ UPSCALE_FACTOR = 2.5
|
||||
BBOX_PADDING = 5
|
||||
DEBUG = True
|
||||
|
||||
# manga-renderer.py settings
|
||||
FONT_PATH = "fonts/ComicRelief-Regular.ttf"
|
||||
FONT_COLOR = (0, 0, 0)
|
||||
|
||||
|
||||
# ─────────────────────────────────────────────
|
||||
# DYNAMIC MODULE LOADER
|
||||
# Loads manga-translator.py and manga-renderer.py
|
||||
# by file path (handles hyphens in filenames)
|
||||
# ─────────────────────────────────────────────
|
||||
def load_module(name, filepath):
|
||||
spec = importlib.util.spec_from_file_location(name, filepath)
|
||||
@@ -73,10 +65,6 @@ def load_module(name, filepath):
|
||||
# HELPERS
|
||||
# ─────────────────────────────────────────────
|
||||
def sorted_pages(chapter_dir):
|
||||
"""
|
||||
Returns sorted list of image paths in the chapter folder.
|
||||
Supports .jpg, .jpeg, .png, .webp
|
||||
"""
|
||||
exts = {".jpg", ".jpeg", ".png", ".webp"}
|
||||
pages = [
|
||||
p for p in Path(chapter_dir).iterdir()
|
||||
@@ -86,69 +74,68 @@ def sorted_pages(chapter_dir):
|
||||
|
||||
|
||||
def make_page_workdir(chapter_dir, page_stem):
|
||||
"""
|
||||
Creates and returns:
|
||||
<chapter_dir>/translated/<page_stem>/
|
||||
"""
|
||||
workdir = Path(chapter_dir) / "translated" / page_stem
|
||||
workdir.mkdir(parents=True, exist_ok=True)
|
||||
return workdir
|
||||
|
||||
|
||||
def pack_cbz(translated_dir, output_cbz):
|
||||
def pack_cbz(chapter_dir, translated_dir, output_cbz):
|
||||
"""
|
||||
Collects all *_translated.* images from translated/*/
|
||||
sorted by page stem, packs into a .cbz file.
|
||||
Packs into CBZ:
|
||||
- All original pages (from chapter_dir root)
|
||||
- All output.txt (one per page subfolder)
|
||||
Sorted by page stem for correct reading order.
|
||||
"""
|
||||
images = sorted(
|
||||
translated_dir.rglob("*_translated.*"),
|
||||
exts = {".jpg", ".jpeg", ".png", ".webp"}
|
||||
pages = sorted(
|
||||
[p for p in Path(chapter_dir).iterdir()
|
||||
if p.suffix.lower() in exts],
|
||||
key=lambda p: p.stem
|
||||
)
|
||||
txts = sorted(
|
||||
translated_dir.rglob("output.txt"),
|
||||
key=lambda p: p.parent.name
|
||||
)
|
||||
|
||||
if not images:
|
||||
print("⚠️ No translated images found — CBZ not created.")
|
||||
if not pages:
|
||||
print("⚠️ No original pages found — CBZ not created.")
|
||||
return
|
||||
|
||||
with zipfile.ZipFile(output_cbz, "w",
|
||||
compression=zipfile.ZIP_STORED) as zf:
|
||||
for img in images:
|
||||
# Archive name keeps the page stem for ordering
|
||||
arcname = img.name
|
||||
# Original pages
|
||||
for img in pages:
|
||||
arcname = f"pages/{img.name}"
|
||||
zf.write(img, arcname)
|
||||
print(f" 📄 Added: {arcname}")
|
||||
print(f" 🖼 {arcname}")
|
||||
|
||||
# Translation text files
|
||||
for txt in txts:
|
||||
arcname = f"translations/{txt.parent.name}_output.txt"
|
||||
zf.write(txt, arcname)
|
||||
print(f" 📄 {arcname}")
|
||||
|
||||
print(f"\n✅ CBZ saved → {output_cbz} "
|
||||
f"({len(images)} page(s))")
|
||||
f"({len(pages)} page(s), {len(txts)} translation(s))")
|
||||
|
||||
|
||||
# ─────────────────────────────────────────────
|
||||
# PER-PAGE PIPELINE
|
||||
# ─────────────────────────────────────────────
|
||||
def process_page(page_path, workdir,
|
||||
translator_module, renderer_module):
|
||||
def process_page(page_path, workdir, translator_module):
|
||||
"""
|
||||
Runs translator + renderer for a single page.
|
||||
All intermediate files land in workdir.
|
||||
Returns path to the translated image, or None on failure.
|
||||
Runs translator for a single page.
|
||||
All output files land in workdir.
|
||||
Returns True on success, False on failure.
|
||||
"""
|
||||
page_stem = page_path.stem
|
||||
suffix = page_path.suffix
|
||||
|
||||
# Paths inside workdir
|
||||
output_txt = str(workdir / "output.txt")
|
||||
bubbles_json= str(workdir / "bubbles.json")
|
||||
debug_png = str(workdir / "debug_clusters.png")
|
||||
translated = str(workdir / f"{page_stem}_translated{suffix}")
|
||||
|
||||
print(f"\n{'─'*60}")
|
||||
print(f" PAGE: {page_path.name}")
|
||||
print(f"{'─'*60}")
|
||||
|
||||
# ── Step 1: Translate ─────────────────────────────────────────
|
||||
print(f"\n[1/2] Translating...")
|
||||
try:
|
||||
# Temporarily redirect file outputs to workdir
|
||||
orig_dir = os.getcwd()
|
||||
try:
|
||||
# chdir into workdir so debug_clusters.png,
|
||||
# temp files etc. all land there
|
||||
os.chdir(workdir)
|
||||
|
||||
translator_module.translate_manga_text(
|
||||
@@ -167,43 +154,27 @@ def process_page(page_path, workdir,
|
||||
bbox_padding = BBOX_PADDING,
|
||||
debug = DEBUG,
|
||||
)
|
||||
|
||||
print(f" ✅ Translated → {workdir}")
|
||||
return True
|
||||
|
||||
except Exception as e:
|
||||
print(f" ❌ Translation failed: {e}")
|
||||
os.chdir(orig_dir)
|
||||
return None
|
||||
print(f" ❌ Failed: {e}")
|
||||
return False
|
||||
|
||||
finally:
|
||||
os.chdir(orig_dir)
|
||||
|
||||
# ── Step 2: Render ────────────────────────────────────────────
|
||||
print(f"\n[2/2] Rendering...")
|
||||
try:
|
||||
renderer_module.render_translations(
|
||||
input_image = str(page_path.resolve()),
|
||||
output_image = translated,
|
||||
translations_file = output_txt,
|
||||
bubbles_file = bubbles_json,
|
||||
font_path = str(Path(orig_dir) / FONT_PATH),
|
||||
font_color = FONT_COLOR,
|
||||
)
|
||||
except Exception as e:
|
||||
print(f" ❌ Rendering failed: {e}")
|
||||
return None
|
||||
|
||||
print(f" ✅ Done → {translated}")
|
||||
return translated
|
||||
|
||||
|
||||
# ─────────────────────────────────────────────
|
||||
# MAIN
|
||||
# ─────────────────────────────────────────────
|
||||
def main():
|
||||
# ── Load modules ──────────────────────────────────────────────
|
||||
print("Loading modules...")
|
||||
# ── Load translator module ────────────────────────────────────
|
||||
print("Loading manga-translator.py...")
|
||||
try:
|
||||
translator = load_module(
|
||||
"manga_translator", "manga-translator.py")
|
||||
renderer = load_module(
|
||||
"manga_renderer", "manga-renderer.py")
|
||||
except FileNotFoundError as e:
|
||||
print(f"❌ Could not load module: {e}")
|
||||
sys.exit(1)
|
||||
@@ -216,20 +187,19 @@ def main():
|
||||
|
||||
print(f"\n📖 Chapter : {CHAPTER_DIR}")
|
||||
print(f" Pages : {len(pages)}")
|
||||
print(f" Source : {SOURCE_LANG} → Target: {TARGET_LANG}")
|
||||
print(f" Output : {OUTPUT_CBZ}\n")
|
||||
print(f" Source : {SOURCE_LANG} → Target: {TARGET_LANG}\n")
|
||||
|
||||
# ── Process each page ─────────────────────────────────────────
|
||||
translated_dir = Path(CHAPTER_DIR) / "translated"
|
||||
succeeded = []
|
||||
failed = []
|
||||
|
||||
for page_path in pages:
|
||||
for i, page_path in enumerate(pages, start=1):
|
||||
print(f"\n[{i}/{len(pages)}] {page_path.name}")
|
||||
workdir = make_page_workdir(CHAPTER_DIR, page_path.stem)
|
||||
result = process_page(page_path, workdir,
|
||||
translator, renderer)
|
||||
if result:
|
||||
succeeded.append(result)
|
||||
ok = process_page(page_path, workdir, translator)
|
||||
if ok:
|
||||
succeeded.append(page_path.name)
|
||||
else:
|
||||
failed.append(page_path.name)
|
||||
|
||||
@@ -238,12 +208,14 @@ def main():
|
||||
print(f" PIPELINE COMPLETE")
|
||||
print(f" ✅ {len(succeeded)} page(s) succeeded")
|
||||
if failed:
|
||||
print(f" ❌ {len(failed)} page(s) failed: {failed}")
|
||||
print(f" ❌ {len(failed)} page(s) failed:")
|
||||
for f in failed:
|
||||
print(f" • {f}")
|
||||
print(f"{'═'*60}\n")
|
||||
|
||||
# ── Pack CBZ ──────────────────────────────────────────────────
|
||||
print("Packing CBZ...")
|
||||
pack_cbz(translated_dir, OUTPUT_CBZ)
|
||||
pack_cbz(CHAPTER_DIR, translated_dir, OUTPUT_CBZ)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
|
||||
Reference in New Issue
Block a user