Added pipeline
This commit is contained in:
124
pipeline.py
124
pipeline.py
@@ -4,67 +4,40 @@ pipeline.py
|
|||||||
───────────────────────────────────────────────────────────────
|
───────────────────────────────────────────────────────────────
|
||||||
Translation + render pipeline
|
Translation + render pipeline
|
||||||
|
|
||||||
Flow per page:
|
Usage:
|
||||||
1) translate_manga_text() -> output.txt + bubbles.json (+ debug_clusters.png if DEBUG)
|
python pipeline.py /path/to/chapter/folder
|
||||||
2) render_translations() -> page_translated.png
|
|
||||||
3) Pack CBZ with originals + rendered pages + text outputs
|
|
||||||
|
|
||||||
Folder structure:
|
|
||||||
<CHAPTER_DIR>/
|
|
||||||
├── 000.png
|
|
||||||
├── 001.png
|
|
||||||
└── translated/
|
|
||||||
├── 000/
|
|
||||||
│ ├── output.txt
|
|
||||||
│ ├── bubbles.json
|
|
||||||
│ ├── page_translated.png
|
|
||||||
│ └── debug_clusters.png (optional)
|
|
||||||
├── 001/
|
|
||||||
│ └── ...
|
|
||||||
└── ...
|
|
||||||
|
|
||||||
CBZ:
|
|
||||||
- pages/<original pages>
|
|
||||||
- rendered/<page_stem>_translated.png
|
|
||||||
- translations/<page_stem>_output.txt
|
|
||||||
"""
|
"""
|
||||||
|
|
||||||
import os
|
import os
|
||||||
import sys
|
import sys
|
||||||
|
import argparse
|
||||||
import zipfile
|
import zipfile
|
||||||
import importlib.util
|
import importlib.util
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
|
|
||||||
|
|
||||||
# ─────────────────────────────────────────────
|
# ─────────────────────────────────────────────
|
||||||
# CONFIG
|
# CONFIG
|
||||||
# ─────────────────────────────────────────────
|
# ─────────────────────────────────────────────
|
||||||
CHAPTER_DIR = "/Users/guillemhernandezsola/Downloads/Dandadan_059_2022_Digital_1r0n"
|
|
||||||
OUTPUT_CBZ = "/Users/guillemhernandezsola/Downloads/Dandadan_059_2022_Digital_1r0n_translated.cbz"
|
|
||||||
|
|
||||||
SOURCE_LANG = "en"
|
SOURCE_LANG = "en"
|
||||||
TARGET_LANG = "ca"
|
TARGET_LANG = "ca"
|
||||||
|
|
||||||
# translator (NEW signature-compatible)
|
# Translator Settings
|
||||||
CONFIDENCE_THRESHOLD = 0.10
|
CONFIDENCE_THRESHOLD = 0.10
|
||||||
MIN_TEXT_LENGTH = 1
|
MIN_TEXT_LENGTH = 1
|
||||||
GAP_PX = "auto" # was cluster/proximity in old version
|
GAP_PX = "auto"
|
||||||
FILTER_SFX = True
|
FILTER_SFX = True
|
||||||
QUALITY_THRESHOLD = 0.50
|
QUALITY_THRESHOLD = 0.50
|
||||||
READING_MODE = "ltr"
|
READING_MODE = "ltr"
|
||||||
DEBUG = True
|
DEBUG = True
|
||||||
|
|
||||||
# renderer
|
# Renderer Settings
|
||||||
RENDER_ENABLED = True
|
RENDER_ENABLED = True
|
||||||
RENDER_OUTPUT_NAME = "page_translated.png"
|
RENDER_OUTPUT_NAME = "page_translated.png"
|
||||||
|
|
||||||
# optional custom font list for renderer
|
|
||||||
FONT_CANDIDATES = [
|
FONT_CANDIDATES = [
|
||||||
"fonts/ComicNeue-Regular.ttf",
|
"fonts/ComicNeue-Regular.ttf",
|
||||||
"fonts/ComicRelief-Regular.ttf"
|
"fonts/ComicRelief-Regular.ttf"
|
||||||
]
|
]
|
||||||
|
|
||||||
|
|
||||||
# ─────────────────────────────────────────────
|
# ─────────────────────────────────────────────
|
||||||
# DYNAMIC MODULE LOADER
|
# DYNAMIC MODULE LOADER
|
||||||
# ─────────────────────────────────────────────
|
# ─────────────────────────────────────────────
|
||||||
@@ -76,7 +49,6 @@ def load_module(name, filepath):
|
|||||||
spec.loader.exec_module(module)
|
spec.loader.exec_module(module)
|
||||||
return module
|
return module
|
||||||
|
|
||||||
|
|
||||||
# ─────────────────────────────────────────────
|
# ─────────────────────────────────────────────
|
||||||
# HELPERS
|
# HELPERS
|
||||||
# ─────────────────────────────────────────────
|
# ─────────────────────────────────────────────
|
||||||
@@ -88,80 +60,61 @@ def sorted_pages(chapter_dir):
|
|||||||
]
|
]
|
||||||
return sorted(pages, key=lambda p: p.stem)
|
return sorted(pages, key=lambda p: p.stem)
|
||||||
|
|
||||||
|
|
||||||
def make_page_workdir(chapter_dir, page_stem):
|
def make_page_workdir(chapter_dir, page_stem):
|
||||||
workdir = Path(chapter_dir) / "translated" / page_stem
|
workdir = Path(chapter_dir) / "translated" / page_stem
|
||||||
workdir.mkdir(parents=True, exist_ok=True)
|
workdir.mkdir(parents=True, exist_ok=True)
|
||||||
return workdir
|
return workdir
|
||||||
|
|
||||||
|
|
||||||
def pack_cbz(chapter_dir, translated_dir, output_cbz):
|
def pack_cbz(chapter_dir, translated_dir, output_cbz):
|
||||||
exts = {".jpg", ".jpeg", ".png", ".webp"}
|
exts = {".jpg", ".jpeg", ".png", ".webp"}
|
||||||
|
|
||||||
pages = sorted(
|
pages = sorted(
|
||||||
[p for p in Path(chapter_dir).iterdir()
|
[p for p in Path(chapter_dir).iterdir() if p.is_file() and p.suffix.lower() in exts],
|
||||||
if p.is_file() and p.suffix.lower() in exts],
|
|
||||||
key=lambda p: p.stem
|
key=lambda p: p.stem
|
||||||
)
|
)
|
||||||
|
|
||||||
txts = sorted(
|
txts = sorted(translated_dir.rglob("output.txt"), key=lambda p: p.parent.name)
|
||||||
translated_dir.rglob("output.txt"),
|
rendered = sorted(translated_dir.rglob(RENDER_OUTPUT_NAME), key=lambda p: p.parent.name)
|
||||||
key=lambda p: p.parent.name
|
|
||||||
)
|
|
||||||
|
|
||||||
rendered = sorted(
|
|
||||||
translated_dir.rglob(RENDER_OUTPUT_NAME),
|
|
||||||
key=lambda p: p.parent.name
|
|
||||||
)
|
|
||||||
|
|
||||||
if not pages:
|
if not pages:
|
||||||
print("⚠️ No original pages found — CBZ not created.")
|
print("⚠️ No original pages found — CBZ not created.")
|
||||||
return
|
return
|
||||||
|
|
||||||
with zipfile.ZipFile(output_cbz, "w", compression=zipfile.ZIP_STORED) as zf:
|
with zipfile.ZipFile(output_cbz, "w", compression=zipfile.ZIP_STORED) as zf:
|
||||||
# original pages
|
# Original pages
|
||||||
for img in pages:
|
for img in pages:
|
||||||
arcname = f"pages/{img.name}"
|
arcname = f"pages/{img.name}"
|
||||||
zf.write(img, arcname)
|
zf.write(img, arcname)
|
||||||
print(f" 🖼 {arcname}")
|
|
||||||
|
|
||||||
# rendered pages
|
# Rendered pages
|
||||||
for rp in rendered:
|
for rp in rendered:
|
||||||
arcname = f"rendered/{rp.parent.name}_translated.png"
|
arcname = f"rendered/{rp.parent.name}_translated.png"
|
||||||
zf.write(rp, arcname)
|
zf.write(rp, arcname)
|
||||||
print(f" 🎨 {arcname}")
|
|
||||||
|
|
||||||
# text outputs
|
# Text outputs
|
||||||
for txt in txts:
|
for txt in txts:
|
||||||
arcname = f"translations/{txt.parent.name}_output.txt"
|
arcname = f"translations/{txt.parent.name}_output.txt"
|
||||||
zf.write(txt, arcname)
|
zf.write(txt, arcname)
|
||||||
print(f" 📄 {arcname}")
|
|
||||||
|
|
||||||
print(
|
|
||||||
f"\n✅ CBZ saved → {output_cbz} "
|
|
||||||
f"({len(pages)} original, {len(rendered)} rendered, {len(txts)} text)"
|
|
||||||
)
|
|
||||||
|
|
||||||
|
print(f"\n✅ CBZ saved → {output_cbz}")
|
||||||
|
print(f"📦 Contains: {len(pages)} original, {len(rendered)} rendered, {len(txts)} text files.")
|
||||||
|
|
||||||
# ─────────────────────────────────────────────
|
# ─────────────────────────────────────────────
|
||||||
# PER-PAGE PIPELINE
|
# PER-PAGE PIPELINE
|
||||||
# ─────────────────────────────────────────────
|
# ─────────────────────────────────────────────
|
||||||
def process_page(page_path, workdir, translator_module, renderer_module):
|
def process_page(page_path, workdir, translator_module, renderer_module):
|
||||||
"""
|
|
||||||
Runs translator + renderer for one page.
|
|
||||||
All generated files are written inside workdir.
|
|
||||||
"""
|
|
||||||
print(f"\n{'─' * 70}")
|
print(f"\n{'─' * 70}")
|
||||||
print(f"PAGE: {page_path.name}")
|
print(f"PAGE: {page_path.name}")
|
||||||
print(f"{'─' * 70}")
|
print(f"{'─' * 70}")
|
||||||
|
|
||||||
orig_dir = os.getcwd()
|
orig_dir = os.getcwd()
|
||||||
try:
|
try:
|
||||||
|
# Isolate execution to the specific page's folder
|
||||||
os.chdir(workdir)
|
os.chdir(workdir)
|
||||||
|
|
||||||
# 1) translate
|
# 1) Translate
|
||||||
translator_module.translate_manga_text(
|
translator_module.translate_manga_text(
|
||||||
image_path= str(page_path.resolve()),
|
image_path=str(page_path.resolve()),
|
||||||
source_lang=SOURCE_LANG,
|
source_lang=SOURCE_LANG,
|
||||||
target_lang=TARGET_LANG,
|
target_lang=TARGET_LANG,
|
||||||
confidence_threshold=CONFIDENCE_THRESHOLD,
|
confidence_threshold=CONFIDENCE_THRESHOLD,
|
||||||
@@ -174,9 +127,9 @@ def process_page(page_path, workdir, translator_module, renderer_module):
|
|||||||
reading_mode=READING_MODE,
|
reading_mode=READING_MODE,
|
||||||
debug=DEBUG
|
debug=DEBUG
|
||||||
)
|
)
|
||||||
print(" ✅ translator done")
|
print(" ✅ Translator done")
|
||||||
|
|
||||||
# 2) render
|
# 2) Render
|
||||||
if RENDER_ENABLED:
|
if RENDER_ENABLED:
|
||||||
renderer_module.render_translations(
|
renderer_module.render_translations(
|
||||||
input_image=str(page_path.resolve()),
|
input_image=str(page_path.resolve()),
|
||||||
@@ -185,7 +138,7 @@ def process_page(page_path, workdir, translator_module, renderer_module):
|
|||||||
bubbles_file="bubbles.json",
|
bubbles_file="bubbles.json",
|
||||||
font_candidates=FONT_CANDIDATES
|
font_candidates=FONT_CANDIDATES
|
||||||
)
|
)
|
||||||
print(" ✅ renderer done")
|
print(" ✅ Renderer done")
|
||||||
|
|
||||||
return True
|
return True
|
||||||
|
|
||||||
@@ -196,44 +149,52 @@ def process_page(page_path, workdir, translator_module, renderer_module):
|
|||||||
finally:
|
finally:
|
||||||
os.chdir(orig_dir)
|
os.chdir(orig_dir)
|
||||||
|
|
||||||
|
|
||||||
# ─────────────────────────────────────────────
|
# ─────────────────────────────────────────────
|
||||||
# MAIN
|
# MAIN
|
||||||
# ─────────────────────────────────────────────
|
# ─────────────────────────────────────────────
|
||||||
def main():
|
def main():
|
||||||
|
parser = argparse.ArgumentParser(description="Manga Translation Pipeline")
|
||||||
|
parser.add_argument("chapter_dir", help="Path to the folder containing manga pages")
|
||||||
|
args = parser.parse_args()
|
||||||
|
|
||||||
|
chapter_dir = Path(args.chapter_dir).resolve()
|
||||||
|
output_cbz = chapter_dir.parent / f"{chapter_dir.name}_translated.cbz"
|
||||||
|
|
||||||
print("Loading modules...")
|
print("Loading modules...")
|
||||||
|
|
||||||
|
# Ensure we are loading from the directory where pipeline.py is located
|
||||||
|
script_dir = Path(__file__).parent
|
||||||
|
|
||||||
try:
|
try:
|
||||||
translator = load_module("manga_translator", "manga-translator.py")
|
translator = load_module("manga_translator", str(script_dir / "manga-translator.py"))
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
print(f"❌ Could not load manga-translator.py: {e}")
|
print(f"❌ Could not load manga-translator.py: {e}")
|
||||||
sys.exit(1)
|
sys.exit(1)
|
||||||
|
|
||||||
try:
|
try:
|
||||||
renderer = load_module("manga_renderer", "manga-renderer.py")
|
renderer = load_module("manga_renderer", str(script_dir / "manga-renderer.py"))
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
print(f"❌ Could not load manga-renderer.py: {e}")
|
print(f"❌ Could not load manga-renderer.py: {e}")
|
||||||
sys.exit(1)
|
sys.exit(1)
|
||||||
|
|
||||||
pages = sorted_pages(CHAPTER_DIR)
|
pages = sorted_pages(chapter_dir)
|
||||||
if not pages:
|
if not pages:
|
||||||
print(f"❌ No images found in: {CHAPTER_DIR}")
|
print(f"❌ No images found in: {chapter_dir}")
|
||||||
sys.exit(1)
|
sys.exit(1)
|
||||||
|
|
||||||
print(f"\n📖 Chapter : {CHAPTER_DIR}")
|
print(f"\n📖 Chapter : {chapter_dir}")
|
||||||
print(f" Pages : {len(pages)}")
|
print(f" Pages : {len(pages)}")
|
||||||
print(f" Source : {SOURCE_LANG} → Target: {TARGET_LANG}")
|
print(f" Source : {SOURCE_LANG} → Target: {TARGET_LANG}")
|
||||||
print(f" Render : {'ON' if RENDER_ENABLED else 'OFF'}\n")
|
print(f" Render : {'ON' if RENDER_ENABLED else 'OFF'}\n")
|
||||||
|
|
||||||
translated_dir = Path(CHAPTER_DIR) / "translated"
|
translated_dir = chapter_dir / "translated"
|
||||||
succeeded = []
|
succeeded, failed = [], []
|
||||||
failed = []
|
|
||||||
|
|
||||||
for i, page_path in enumerate(pages, start=1):
|
for i, page_path in enumerate(pages, start=1):
|
||||||
print(f"[{i}/{len(pages)}] {page_path.name}")
|
print(f"[{i}/{len(pages)}] Processing...")
|
||||||
workdir = make_page_workdir(CHAPTER_DIR, page_path.stem)
|
workdir = make_page_workdir(chapter_dir, page_path.stem)
|
||||||
ok = process_page(page_path, workdir, translator, renderer)
|
|
||||||
if ok:
|
if process_page(page_path, workdir, translator, renderer):
|
||||||
succeeded.append(page_path.name)
|
succeeded.append(page_path.name)
|
||||||
else:
|
else:
|
||||||
failed.append(page_path.name)
|
failed.append(page_path.name)
|
||||||
@@ -248,8 +209,7 @@ def main():
|
|||||||
print(f"{'═' * 70}\n")
|
print(f"{'═' * 70}\n")
|
||||||
|
|
||||||
print("Packing CBZ...")
|
print("Packing CBZ...")
|
||||||
pack_cbz(CHAPTER_DIR, translated_dir, OUTPUT_CBZ)
|
pack_cbz(chapter_dir, translated_dir, output_cbz)
|
||||||
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
main()
|
main()
|
||||||
|
|||||||
Reference in New Issue
Block a user