Started pipelines, render not working
This commit is contained in:
189
pipeline-translator.py
Normal file
189
pipeline-translator.py
Normal file
@@ -0,0 +1,189 @@
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
pipeline.py
|
||||
───────────────────────────────────────────────────────────────
|
||||
Translation OCR pipeline (No Rendering)
|
||||
|
||||
Usage:
|
||||
python pipeline.py /path/to/chapter/folder
|
||||
"""
|
||||
|
||||
import os
|
||||
import sys
|
||||
import argparse
|
||||
import zipfile
|
||||
import importlib.util
|
||||
from pathlib import Path
|
||||
|
||||
# ─────────────────────────────────────────────
|
||||
# CONFIG
|
||||
# ─────────────────────────────────────────────
|
||||
SOURCE_LANG = "en"
|
||||
TARGET_LANG = "ca"
|
||||
|
||||
# Translator Settings
|
||||
CONFIDENCE_THRESHOLD = 0.10
|
||||
MIN_TEXT_LENGTH = 1
|
||||
GAP_PX = "auto"
|
||||
FILTER_SFX = True
|
||||
QUALITY_THRESHOLD = 0.50
|
||||
READING_MODE = "ltr"
|
||||
DEBUG = True
|
||||
|
||||
# ─────────────────────────────────────────────
|
||||
# DYNAMIC MODULE LOADER
|
||||
# ─────────────────────────────────────────────
|
||||
def load_module(name, filepath):
|
||||
spec = importlib.util.spec_from_file_location(name, filepath)
|
||||
if spec is None or spec.loader is None:
|
||||
raise FileNotFoundError(f"Cannot load spec for {filepath}")
|
||||
module = importlib.util.module_from_spec(spec)
|
||||
spec.loader.exec_module(module)
|
||||
return module
|
||||
|
||||
# ─────────────────────────────────────────────
|
||||
# HELPERS
|
||||
# ─────────────────────────────────────────────
|
||||
def sorted_pages(chapter_dir):
|
||||
exts = {".jpg", ".jpeg", ".png", ".webp"}
|
||||
pages = [
|
||||
p for p in Path(chapter_dir).iterdir()
|
||||
if p.is_file() and p.suffix.lower() in exts
|
||||
]
|
||||
return sorted(pages, key=lambda p: p.stem)
|
||||
|
||||
def make_page_workdir(chapter_dir, page_stem):
|
||||
workdir = Path(chapter_dir) / "translated" / page_stem
|
||||
workdir.mkdir(parents=True, exist_ok=True)
|
||||
return workdir
|
||||
|
||||
def pack_cbz(chapter_dir, translated_dir, output_cbz):
|
||||
exts = {".jpg", ".jpeg", ".png", ".webp"}
|
||||
|
||||
pages = sorted(
|
||||
[p for p in Path(chapter_dir).iterdir() if p.is_file() and p.suffix.lower() in exts],
|
||||
key=lambda p: p.stem
|
||||
)
|
||||
|
||||
txts = sorted(translated_dir.rglob("output.txt"), key=lambda p: p.parent.name)
|
||||
jsons = sorted(translated_dir.rglob("bubbles.json"), key=lambda p: p.parent.name)
|
||||
|
||||
if not pages:
|
||||
print("⚠️ No original pages found — CBZ not created.")
|
||||
return
|
||||
|
||||
with zipfile.ZipFile(output_cbz, "w", compression=zipfile.ZIP_STORED) as zf:
|
||||
# Original pages
|
||||
for img in pages:
|
||||
arcname = f"pages/{img.name}"
|
||||
zf.write(img, arcname)
|
||||
|
||||
# Text outputs
|
||||
for txt in txts:
|
||||
arcname = f"translations/{txt.parent.name}_output.txt"
|
||||
zf.write(txt, arcname)
|
||||
|
||||
# JSON outputs
|
||||
for j in jsons:
|
||||
arcname = f"data/{j.parent.name}_bubbles.json"
|
||||
zf.write(j, arcname)
|
||||
|
||||
print(f"\n✅ CBZ saved → {output_cbz}")
|
||||
print(f"📦 Contains: {len(pages)} original pages, {len(txts)} text files, {len(jsons)} JSON files.")
|
||||
|
||||
# ─────────────────────────────────────────────
|
||||
# PER-PAGE PIPELINE
|
||||
# ─────────────────────────────────────────────
|
||||
def process_page(page_path, workdir, translator_module):
|
||||
print(f"\n{'─' * 70}")
|
||||
print(f"PAGE: {page_path.name}")
|
||||
print(f"{'─' * 70}")
|
||||
|
||||
orig_dir = os.getcwd()
|
||||
try:
|
||||
# Isolate execution to the specific page's folder
|
||||
os.chdir(workdir)
|
||||
|
||||
# 1) Translate
|
||||
translator_module.translate_manga_text(
|
||||
image_path=str(page_path.resolve()),
|
||||
source_lang=SOURCE_LANG,
|
||||
target_lang=TARGET_LANG,
|
||||
confidence_threshold=CONFIDENCE_THRESHOLD,
|
||||
min_text_length=MIN_TEXT_LENGTH,
|
||||
gap_px=GAP_PX,
|
||||
filter_sound_effects=FILTER_SFX,
|
||||
quality_threshold=QUALITY_THRESHOLD,
|
||||
export_to_file="output.txt",
|
||||
export_bubbles_to="bubbles.json",
|
||||
reading_mode=READING_MODE,
|
||||
debug=DEBUG
|
||||
)
|
||||
print(" ✅ Translator done")
|
||||
|
||||
return True
|
||||
|
||||
except Exception as e:
|
||||
print(f" ❌ Failed: {e}")
|
||||
return False
|
||||
|
||||
finally:
|
||||
os.chdir(orig_dir)
|
||||
|
||||
# ─────────────────────────────────────────────
|
||||
# MAIN
|
||||
# ─────────────────────────────────────────────
|
||||
def main():
|
||||
parser = argparse.ArgumentParser(description="Manga Translation OCR Pipeline")
|
||||
parser.add_argument("chapter_dir", help="Path to the folder containing manga pages")
|
||||
args = parser.parse_args()
|
||||
|
||||
chapter_dir = Path(args.chapter_dir).resolve()
|
||||
output_cbz = chapter_dir.parent / f"{chapter_dir.name}_translated.cbz"
|
||||
|
||||
print("Loading modules...")
|
||||
|
||||
# Ensure we are loading from the directory where pipeline.py is located
|
||||
script_dir = Path(__file__).parent
|
||||
|
||||
try:
|
||||
translator = load_module("manga_translator", str(script_dir / "manga-translator.py"))
|
||||
except Exception as e:
|
||||
print(f"❌ Could not load manga-translator.py: {e}")
|
||||
sys.exit(1)
|
||||
|
||||
pages = sorted_pages(chapter_dir)
|
||||
if not pages:
|
||||
print(f"❌ No images found in: {chapter_dir}")
|
||||
sys.exit(1)
|
||||
|
||||
print(f"\n📖 Chapter : {chapter_dir}")
|
||||
print(f" Pages : {len(pages)}")
|
||||
print(f" Source : {SOURCE_LANG} → Target: {TARGET_LANG}\n")
|
||||
|
||||
translated_dir = chapter_dir / "translated"
|
||||
succeeded, failed = [], []
|
||||
|
||||
for i, page_path in enumerate(pages, start=1):
|
||||
print(f"[{i}/{len(pages)}] Processing...")
|
||||
workdir = make_page_workdir(chapter_dir, page_path.stem)
|
||||
|
||||
if process_page(page_path, workdir, translator):
|
||||
succeeded.append(page_path.name)
|
||||
else:
|
||||
failed.append(page_path.name)
|
||||
|
||||
print(f"\n{'═' * 70}")
|
||||
print("PIPELINE COMPLETE")
|
||||
print(f"✅ {len(succeeded)} page(s) succeeded")
|
||||
if failed:
|
||||
print(f"❌ {len(failed)} page(s) failed:")
|
||||
for f in failed:
|
||||
print(f" • {f}")
|
||||
print(f"{'═' * 70}\n")
|
||||
|
||||
print("Packing CBZ...")
|
||||
pack_cbz(chapter_dir, translated_dir, output_cbz)
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
Reference in New Issue
Block a user