223 lines
8.3 KiB
Python
223 lines
8.3 KiB
Python
#!/usr/bin/env python3
|
|
"""
|
|
pipeline.py
|
|
───────────────────────────────────────────────────────────────
|
|
Translation-only pipeline for Dandadan_059_2022_Digital
|
|
|
|
Flow per page:
|
|
1. Run translate_manga_text() → output.txt + bubbles.json
|
|
2. Copy original image to workdir for reference
|
|
|
|
Folder structure produced:
|
|
Dandadan_059_2022_Digital_1r0n/
|
|
└── translated/
|
|
├── 00/
|
|
│ ├── output.txt ← translations to review
|
|
│ ├── bubbles.json ← bubble boxes
|
|
│ └── debug_clusters.png ← cluster debug (if DEBUG=True)
|
|
├── 01/
|
|
│ └── ...
|
|
└── ...
|
|
|
|
Dandadan_059_translated.cbz ← original pages + translations
|
|
zipped for reference
|
|
"""
|
|
|
|
import os
|
|
import sys
|
|
import shutil
|
|
import zipfile
|
|
import importlib.util
|
|
from pathlib import Path
|
|
|
|
|
|
# ─────────────────────────────────────────────
|
|
# CONFIG — edit these as needed
|
|
# ─────────────────────────────────────────────
|
|
CHAPTER_DIR = "/Users/guillemhernandezsola/Downloads/Spy_x_Family_076_2023_Digital_1r0n"
|
|
OUTPUT_CBZ = "/Users/guillemhernandezsola/Downloads/Spy_x_Family_076_2023_Digital_1r0n_translated.cbz"
|
|
SOURCE_LANG = "en"
|
|
TARGET_LANG = "ca"
|
|
|
|
# manga-translator.py settings
|
|
CONFIDENCE_THRESHOLD = 0.10
|
|
MIN_TEXT_LENGTH = 2
|
|
CLUSTER_EPS = "auto"
|
|
PROXIMITY_PX = 80
|
|
FILTER_SFX = True
|
|
QUALITY_THRESHOLD = 0.5
|
|
UPSCALE_FACTOR = 2.5
|
|
BBOX_PADDING = 5
|
|
DEBUG = True
|
|
|
|
|
|
# ─────────────────────────────────────────────
|
|
# DYNAMIC MODULE LOADER
|
|
# ─────────────────────────────────────────────
|
|
def load_module(name, filepath):
|
|
spec = importlib.util.spec_from_file_location(name, filepath)
|
|
module = importlib.util.module_from_spec(spec)
|
|
spec.loader.exec_module(module)
|
|
return module
|
|
|
|
|
|
# ─────────────────────────────────────────────
|
|
# HELPERS
|
|
# ─────────────────────────────────────────────
|
|
def sorted_pages(chapter_dir):
|
|
exts = {".jpg", ".jpeg", ".png", ".webp"}
|
|
pages = [
|
|
p for p in Path(chapter_dir).iterdir()
|
|
if p.suffix.lower() in exts
|
|
]
|
|
return sorted(pages, key=lambda p: p.stem)
|
|
|
|
|
|
def make_page_workdir(chapter_dir, page_stem):
|
|
workdir = Path(chapter_dir) / "translated" / page_stem
|
|
workdir.mkdir(parents=True, exist_ok=True)
|
|
return workdir
|
|
|
|
|
|
def pack_cbz(chapter_dir, translated_dir, output_cbz):
|
|
"""
|
|
Packs into CBZ:
|
|
- All original pages (from chapter_dir root)
|
|
- All output.txt (one per page subfolder)
|
|
Sorted by page stem for correct reading order.
|
|
"""
|
|
exts = {".jpg", ".jpeg", ".png", ".webp"}
|
|
pages = sorted(
|
|
[p for p in Path(chapter_dir).iterdir()
|
|
if p.suffix.lower() in exts],
|
|
key=lambda p: p.stem
|
|
)
|
|
txts = sorted(
|
|
translated_dir.rglob("output.txt"),
|
|
key=lambda p: p.parent.name
|
|
)
|
|
|
|
if not pages:
|
|
print("⚠️ No original pages found — CBZ not created.")
|
|
return
|
|
|
|
with zipfile.ZipFile(output_cbz, "w",
|
|
compression=zipfile.ZIP_STORED) as zf:
|
|
# Original pages
|
|
for img in pages:
|
|
arcname = f"pages/{img.name}"
|
|
zf.write(img, arcname)
|
|
print(f" 🖼 {arcname}")
|
|
|
|
# Translation text files
|
|
for txt in txts:
|
|
arcname = f"translations/{txt.parent.name}_output.txt"
|
|
zf.write(txt, arcname)
|
|
print(f" 📄 {arcname}")
|
|
|
|
print(f"\n✅ CBZ saved → {output_cbz} "
|
|
f"({len(pages)} page(s), {len(txts)} translation(s))")
|
|
|
|
|
|
# ─────────────────────────────────────────────
|
|
# PER-PAGE PIPELINE
|
|
# ─────────────────────────────────────────────
|
|
def process_page(page_path, workdir, translator_module):
|
|
"""
|
|
Runs translator for a single page.
|
|
All output files land in workdir.
|
|
Returns True on success, False on failure.
|
|
"""
|
|
print(f"\n{'─'*60}")
|
|
print(f" PAGE: {page_path.name}")
|
|
print(f"{'─'*60}")
|
|
|
|
orig_dir = os.getcwd()
|
|
try:
|
|
# chdir into workdir so debug_clusters.png,
|
|
# temp files etc. all land there
|
|
os.chdir(workdir)
|
|
|
|
translator_module.translate_manga_text(
|
|
image_path = str(page_path.resolve()),
|
|
source_lang = SOURCE_LANG,
|
|
target_lang = TARGET_LANG,
|
|
confidence_threshold = CONFIDENCE_THRESHOLD,
|
|
export_to_file = "output.txt",
|
|
export_bubbles_to = "bubbles.json",
|
|
min_text_length = MIN_TEXT_LENGTH,
|
|
cluster_eps = CLUSTER_EPS,
|
|
proximity_px = PROXIMITY_PX,
|
|
filter_sound_effects = FILTER_SFX,
|
|
quality_threshold = QUALITY_THRESHOLD,
|
|
upscale_factor = UPSCALE_FACTOR,
|
|
bbox_padding = BBOX_PADDING,
|
|
debug = DEBUG,
|
|
)
|
|
|
|
print(f" ✅ Translated → {workdir}")
|
|
return True
|
|
|
|
except Exception as e:
|
|
print(f" ❌ Failed: {e}")
|
|
return False
|
|
|
|
finally:
|
|
os.chdir(orig_dir)
|
|
|
|
|
|
# ─────────────────────────────────────────────
|
|
# MAIN
|
|
# ─────────────────────────────────────────────
|
|
def main():
|
|
# ── Load translator module ────────────────────────────────────
|
|
print("Loading manga-translator.py...")
|
|
try:
|
|
translator = load_module(
|
|
"manga_translator", "manga-translator.py")
|
|
except FileNotFoundError as e:
|
|
print(f"❌ Could not load module: {e}")
|
|
sys.exit(1)
|
|
|
|
# ── Discover pages ────────────────────────────────────────────
|
|
pages = sorted_pages(CHAPTER_DIR)
|
|
if not pages:
|
|
print(f"❌ No images found in: {CHAPTER_DIR}")
|
|
sys.exit(1)
|
|
|
|
print(f"\n📖 Chapter : {CHAPTER_DIR}")
|
|
print(f" Pages : {len(pages)}")
|
|
print(f" Source : {SOURCE_LANG} → Target: {TARGET_LANG}\n")
|
|
|
|
# ── Process each page ─────────────────────────────────────────
|
|
translated_dir = Path(CHAPTER_DIR) / "translated"
|
|
succeeded = []
|
|
failed = []
|
|
|
|
for i, page_path in enumerate(pages, start=1):
|
|
print(f"\n[{i}/{len(pages)}] {page_path.name}")
|
|
workdir = make_page_workdir(CHAPTER_DIR, page_path.stem)
|
|
ok = process_page(page_path, workdir, translator)
|
|
if ok:
|
|
succeeded.append(page_path.name)
|
|
else:
|
|
failed.append(page_path.name)
|
|
|
|
# ── Summary ───────────────────────────────────────────────────
|
|
print(f"\n{'═'*60}")
|
|
print(f" PIPELINE COMPLETE")
|
|
print(f" ✅ {len(succeeded)} page(s) succeeded")
|
|
if failed:
|
|
print(f" ❌ {len(failed)} page(s) failed:")
|
|
for f in failed:
|
|
print(f" • {f}")
|
|
print(f"{'═'*60}\n")
|
|
|
|
# ── Pack CBZ ──────────────────────────────────────────────────
|
|
print("Packing CBZ...")
|
|
pack_cbz(CHAPTER_DIR, translated_dir, OUTPUT_CBZ)
|
|
|
|
|
|
if __name__ == "__main__":
|
|
main()
|