ADded pipeline-translator.py
This commit is contained in:
@@ -1,35 +1,19 @@
|
|||||||
#!/usr/bin/env python3
|
#!/usr/bin/env python3
|
||||||
"""
|
"""
|
||||||
pipeline.py
|
pipeline-translator.py
|
||||||
───────────────────────────────────────────────────────────────
|
───────────────────────────────────────────────────────────────
|
||||||
Translation OCR pipeline (No Rendering)
|
Translation OCR pipeline (Batch Processing Only)
|
||||||
|
|
||||||
Usage:
|
Usage:
|
||||||
python pipeline.py /path/to/chapter/folder
|
python pipeline-translator.py /path/to/chapter/folder
|
||||||
"""
|
"""
|
||||||
|
|
||||||
import os
|
import os
|
||||||
import sys
|
import sys
|
||||||
import argparse
|
import argparse
|
||||||
import zipfile
|
|
||||||
import importlib.util
|
import importlib.util
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
|
|
||||||
# ─────────────────────────────────────────────
|
|
||||||
# CONFIG
|
|
||||||
# ─────────────────────────────────────────────
|
|
||||||
SOURCE_LANG = "en"
|
|
||||||
TARGET_LANG = "ca"
|
|
||||||
|
|
||||||
# Translator Settings
|
|
||||||
CONFIDENCE_THRESHOLD = 0.10
|
|
||||||
MIN_TEXT_LENGTH = 1
|
|
||||||
GAP_PX = "auto"
|
|
||||||
FILTER_SFX = True
|
|
||||||
QUALITY_THRESHOLD = 0.50
|
|
||||||
READING_MODE = "ltr"
|
|
||||||
DEBUG = True
|
|
||||||
|
|
||||||
# ─────────────────────────────────────────────
|
# ─────────────────────────────────────────────
|
||||||
# DYNAMIC MODULE LOADER
|
# DYNAMIC MODULE LOADER
|
||||||
# ─────────────────────────────────────────────
|
# ─────────────────────────────────────────────
|
||||||
@@ -57,40 +41,6 @@ def make_page_workdir(chapter_dir, page_stem):
|
|||||||
workdir.mkdir(parents=True, exist_ok=True)
|
workdir.mkdir(parents=True, exist_ok=True)
|
||||||
return workdir
|
return workdir
|
||||||
|
|
||||||
def pack_cbz(chapter_dir, translated_dir, output_cbz):
|
|
||||||
exts = {".jpg", ".jpeg", ".png", ".webp"}
|
|
||||||
|
|
||||||
pages = sorted(
|
|
||||||
[p for p in Path(chapter_dir).iterdir() if p.is_file() and p.suffix.lower() in exts],
|
|
||||||
key=lambda p: p.stem
|
|
||||||
)
|
|
||||||
|
|
||||||
txts = sorted(translated_dir.rglob("output.txt"), key=lambda p: p.parent.name)
|
|
||||||
jsons = sorted(translated_dir.rglob("bubbles.json"), key=lambda p: p.parent.name)
|
|
||||||
|
|
||||||
if not pages:
|
|
||||||
print("⚠️ No original pages found — CBZ not created.")
|
|
||||||
return
|
|
||||||
|
|
||||||
with zipfile.ZipFile(output_cbz, "w", compression=zipfile.ZIP_STORED) as zf:
|
|
||||||
# Original pages
|
|
||||||
for img in pages:
|
|
||||||
arcname = f"pages/{img.name}"
|
|
||||||
zf.write(img, arcname)
|
|
||||||
|
|
||||||
# Text outputs
|
|
||||||
for txt in txts:
|
|
||||||
arcname = f"translations/{txt.parent.name}_output.txt"
|
|
||||||
zf.write(txt, arcname)
|
|
||||||
|
|
||||||
# JSON outputs
|
|
||||||
for j in jsons:
|
|
||||||
arcname = f"data/{j.parent.name}_bubbles.json"
|
|
||||||
zf.write(j, arcname)
|
|
||||||
|
|
||||||
print(f"\n✅ CBZ saved → {output_cbz}")
|
|
||||||
print(f"📦 Contains: {len(pages)} original pages, {len(txts)} text files, {len(jsons)} JSON files.")
|
|
||||||
|
|
||||||
# ─────────────────────────────────────────────
|
# ─────────────────────────────────────────────
|
||||||
# PER-PAGE PIPELINE
|
# PER-PAGE PIPELINE
|
||||||
# ─────────────────────────────────────────────
|
# ─────────────────────────────────────────────
|
||||||
@@ -104,22 +54,17 @@ def process_page(page_path, workdir, translator_module):
|
|||||||
# Isolate execution to the specific page's folder
|
# Isolate execution to the specific page's folder
|
||||||
os.chdir(workdir)
|
os.chdir(workdir)
|
||||||
|
|
||||||
# 1) Translate
|
print(" ⏳ Extracting text and translating...")
|
||||||
|
|
||||||
|
# 1) Translate using ONLY the required path arguments.
|
||||||
|
# This forces the function to use its own internal default variables
|
||||||
|
# (like source_lang, target_lang, confidence_threshold) directly from manga-translator.py
|
||||||
translator_module.translate_manga_text(
|
translator_module.translate_manga_text(
|
||||||
image_path=str(page_path.resolve()),
|
image_path=str(page_path.resolve()),
|
||||||
source_lang=SOURCE_LANG,
|
|
||||||
target_lang=TARGET_LANG,
|
|
||||||
confidence_threshold=CONFIDENCE_THRESHOLD,
|
|
||||||
min_text_length=MIN_TEXT_LENGTH,
|
|
||||||
gap_px=GAP_PX,
|
|
||||||
filter_sound_effects=FILTER_SFX,
|
|
||||||
quality_threshold=QUALITY_THRESHOLD,
|
|
||||||
export_to_file="output.txt",
|
export_to_file="output.txt",
|
||||||
export_bubbles_to="bubbles.json",
|
export_bubbles_to="bubbles.json"
|
||||||
reading_mode=READING_MODE,
|
|
||||||
debug=DEBUG
|
|
||||||
)
|
)
|
||||||
print(" ✅ Translator done")
|
print(" ✅ Translation and OCR data saved successfully")
|
||||||
|
|
||||||
return True
|
return True
|
||||||
|
|
||||||
@@ -134,16 +79,13 @@ def process_page(page_path, workdir, translator_module):
|
|||||||
# MAIN
|
# MAIN
|
||||||
# ─────────────────────────────────────────────
|
# ─────────────────────────────────────────────
|
||||||
def main():
|
def main():
|
||||||
parser = argparse.ArgumentParser(description="Manga Translation OCR Pipeline")
|
parser = argparse.ArgumentParser(description="Manga Translation OCR Batch Pipeline")
|
||||||
parser.add_argument("chapter_dir", help="Path to the folder containing manga pages")
|
parser.add_argument("chapter_dir", help="Path to the folder containing manga pages")
|
||||||
args = parser.parse_args()
|
args = parser.parse_args()
|
||||||
|
|
||||||
chapter_dir = Path(args.chapter_dir).resolve()
|
chapter_dir = Path(args.chapter_dir).resolve()
|
||||||
output_cbz = chapter_dir.parent / f"{chapter_dir.name}_translated.cbz"
|
|
||||||
|
|
||||||
print("Loading modules...")
|
print("Loading translator module...")
|
||||||
|
|
||||||
# Ensure we are loading from the directory where pipeline.py is located
|
|
||||||
script_dir = Path(__file__).parent
|
script_dir = Path(__file__).parent
|
||||||
|
|
||||||
try:
|
try:
|
||||||
@@ -157,11 +99,10 @@ def main():
|
|||||||
print(f"❌ No images found in: {chapter_dir}")
|
print(f"❌ No images found in: {chapter_dir}")
|
||||||
sys.exit(1)
|
sys.exit(1)
|
||||||
|
|
||||||
print(f"\n📖 Chapter : {chapter_dir}")
|
print(f"\n📖 Chapter : {chapter_dir.name}")
|
||||||
print(f" Pages : {len(pages)}")
|
print(f" Pages : {len(pages)}")
|
||||||
print(f" Source : {SOURCE_LANG} → Target: {TARGET_LANG}\n")
|
print(" Note : Using translation settings directly from manga-translator.py\n")
|
||||||
|
|
||||||
translated_dir = chapter_dir / "translated"
|
|
||||||
succeeded, failed = [], []
|
succeeded, failed = [], []
|
||||||
|
|
||||||
for i, page_path in enumerate(pages, start=1):
|
for i, page_path in enumerate(pages, start=1):
|
||||||
@@ -182,8 +123,5 @@ def main():
|
|||||||
print(f" • {f}")
|
print(f" • {f}")
|
||||||
print(f"{'═' * 70}\n")
|
print(f"{'═' * 70}\n")
|
||||||
|
|
||||||
print("Packing CBZ...")
|
|
||||||
pack_cbz(chapter_dir, translated_dir, output_cbz)
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
main()
|
main()
|
||||||
Reference in New Issue
Block a user