Added fixes
This commit is contained in:
2785
manga-translator.py
2785
manga-translator.py
File diff suppressed because it is too large
Load Diff
@@ -16,26 +16,13 @@ from pathlib import Path
|
||||
|
||||
# ─────────────────────────────────────────────────────────────
|
||||
# PIPELINE CONFIGURATION
|
||||
# Single source of truth — mirrors the __main__ block in
|
||||
# manga-translator.py so both entry points stay in sync.
|
||||
# Maps to the process_manga_page() signature in manga-translator.py
|
||||
# ─────────────────────────────────────────────────────────────
|
||||
PIPELINE_CONFIG = dict(
|
||||
source_lang = "english",
|
||||
target_lang = "ca",
|
||||
confidence_threshold = 0.03,
|
||||
min_text_length = 1,
|
||||
gap_px = "auto",
|
||||
quality_threshold = 0.62,
|
||||
reading_mode = "rtl",
|
||||
debug = True,
|
||||
use_enhanced_ocr = True,
|
||||
strict_grouping = True,
|
||||
max_box_width_ratio = 0.6,
|
||||
max_box_height_ratio = 0.5,
|
||||
auto_fix_bubbles = True,
|
||||
source_lang = "en",
|
||||
target_lang = "ca",
|
||||
)
|
||||
|
||||
|
||||
# ─────────────────────────────────────────────────────────────
|
||||
# DYNAMIC MODULE LOADER
|
||||
# ─────────────────────────────────────────────────────────────
|
||||
@@ -68,16 +55,16 @@ def make_page_workdir(chapter_dir: Path, page_stem: str) -> Path:
|
||||
|
||||
def verify_translator_api(module) -> bool:
|
||||
"""
|
||||
Checks that the loaded module exposes translate_manga_text()
|
||||
Checks that the loaded module exposes process_manga_page()
|
||||
and that it accepts all keys defined in PIPELINE_CONFIG.
|
||||
Prints a warning for any missing parameter so mismatches are
|
||||
caught immediately rather than silently falling back to defaults.
|
||||
"""
|
||||
import inspect
|
||||
|
||||
fn = getattr(module, "translate_manga_text", None)
|
||||
fn = getattr(module, "process_manga_page", None)
|
||||
if fn is None:
|
||||
print("❌ manga-translator.py does not expose translate_manga_text()")
|
||||
print("❌ manga-translator.py does not expose process_manga_page()")
|
||||
return False
|
||||
|
||||
sig = inspect.signature(fn)
|
||||
@@ -86,8 +73,10 @@ def verify_translator_api(module) -> bool:
|
||||
|
||||
for key in PIPELINE_CONFIG:
|
||||
if key not in params:
|
||||
print(f"⚠️ PIPELINE_CONFIG key '{key}' not found in "
|
||||
f"translate_manga_text() — update pipeline or translator.")
|
||||
print(
|
||||
f"⚠️ PIPELINE_CONFIG key '{key}' not found in "
|
||||
f"process_manga_page() — update pipeline or translator."
|
||||
)
|
||||
ok = False
|
||||
|
||||
return ok
|
||||
@@ -107,22 +96,65 @@ def process_page(page_path: Path, workdir: Path, translator_module) -> bool:
|
||||
# output files land there automatically.
|
||||
os.chdir(workdir)
|
||||
|
||||
output_json = str(workdir / "bubbles.json")
|
||||
output_txt = str(workdir / "output.txt")
|
||||
debug_path = str(workdir / "debug_clusters.png")
|
||||
|
||||
print(" ⏳ Extracting text and translating...")
|
||||
|
||||
translator_module.translate_manga_text(
|
||||
image_path = str(page_path.resolve()),
|
||||
export_to_file = "output.txt",
|
||||
export_bubbles_to= "bubbles.json",
|
||||
**PIPELINE_CONFIG, # ← all settings from the single config dict
|
||||
results = translator_module.process_manga_page(
|
||||
image_path = str(page_path.resolve()),
|
||||
output_json = output_json,
|
||||
output_txt = output_txt,
|
||||
**PIPELINE_CONFIG,
|
||||
)
|
||||
|
||||
# Sanity-check that the expected outputs were actually written
|
||||
# ── Optional debug visualisation ─────────────────────
|
||||
if results:
|
||||
try:
|
||||
import cv2
|
||||
|
||||
image_bgr = cv2.imread(str(page_path.resolve()))
|
||||
if image_bgr is not None:
|
||||
# Reconstruct vis_boxes / vis_lines from results dict
|
||||
vis_boxes = {}
|
||||
vis_lines = {}
|
||||
vis_indices = {}
|
||||
|
||||
for bid_str, data in results.items():
|
||||
bid = int(bid_str)
|
||||
xywh = data["box"]
|
||||
vis_boxes[bid] = (
|
||||
xywh["x"],
|
||||
xywh["y"],
|
||||
xywh["x"] + xywh["w"],
|
||||
xywh["y"] + xywh["h"],
|
||||
)
|
||||
vis_lines[bid] = data.get("lines", [])
|
||||
vis_indices[bid] = []
|
||||
|
||||
translator_module.draw_debug_clusters(
|
||||
image_bgr = image_bgr,
|
||||
out_boxes = vis_boxes,
|
||||
out_lines = vis_lines,
|
||||
out_indices = vis_indices,
|
||||
ocr = [],
|
||||
save_path = debug_path,
|
||||
)
|
||||
except Exception as e:
|
||||
print(f" ⚠️ Debug visualisation failed (non-fatal): {e}")
|
||||
|
||||
# ── Sanity-check outputs ──────────────────────────────
|
||||
for fname in ("output.txt", "bubbles.json"):
|
||||
fpath = workdir / fname
|
||||
if not fpath.exists() or fpath.stat().st_size == 0:
|
||||
print(f" ⚠️ {fname} is missing or empty after processing.")
|
||||
|
||||
print(" ✅ Translation and OCR data saved successfully")
|
||||
if not results:
|
||||
print(" ⚠️ process_manga_page() returned no results.")
|
||||
return False
|
||||
|
||||
print(f" ✅ Done — {len(results)} box(es) processed.")
|
||||
return True
|
||||
|
||||
except Exception as e:
|
||||
@@ -154,8 +186,26 @@ def main():
|
||||
"--end", type=int, default=None,
|
||||
help="Stop after this page number inclusive (default: all)"
|
||||
)
|
||||
parser.add_argument(
|
||||
"--source", "-s", default=None,
|
||||
help=f"Override source language (default: {PIPELINE_CONFIG['source_lang']})"
|
||||
)
|
||||
parser.add_argument(
|
||||
"--target", "-t", default=None,
|
||||
help=f"Override target language (default: {PIPELINE_CONFIG['target_lang']})"
|
||||
)
|
||||
args = parser.parse_args()
|
||||
|
||||
# Allow CLI overrides of source/target without touching PIPELINE_CONFIG
|
||||
config = dict(PIPELINE_CONFIG)
|
||||
if args.source:
|
||||
config["source_lang"] = args.source
|
||||
if args.target:
|
||||
config["target_lang"] = args.target
|
||||
|
||||
# Patch PIPELINE_CONFIG in-place so process_page() picks up overrides
|
||||
PIPELINE_CONFIG.update(config)
|
||||
|
||||
chapter_dir = Path(args.chapter_dir).resolve()
|
||||
if not chapter_dir.is_dir():
|
||||
print(f"❌ Not a directory: {chapter_dir}")
|
||||
@@ -198,14 +248,12 @@ def main():
|
||||
|
||||
# ── Summary header ────────────────────────────────────────
|
||||
print(f"\n{'═' * 70}")
|
||||
print(f" 📖 Chapter : {chapter_dir.name}")
|
||||
print(f" 📄 Pages : {len(pages)} "
|
||||
print(f" 📖 Chapter : {chapter_dir.name}")
|
||||
print(f" 📄 Pages : {len(pages)} "
|
||||
f"(of {len(all_pages)} total, "
|
||||
f"range {args.start}–{end_idx})")
|
||||
print(f" 🌐 Lang : {PIPELINE_CONFIG['source_lang']} → "
|
||||
print(f" 🌐 Lang : {PIPELINE_CONFIG['source_lang']} → "
|
||||
f"{PIPELINE_CONFIG['target_lang']}")
|
||||
print(f" 📖 Read order : {PIPELINE_CONFIG['reading_mode'].upper()}")
|
||||
print(f" 🔍 Enhanced : {PIPELINE_CONFIG['use_enhanced_ocr']}")
|
||||
print(f"{'═' * 70}\n")
|
||||
|
||||
succeeded, failed = [], []
|
||||
|
||||
Reference in New Issue
Block a user