Added fixes
This commit is contained in:
2785
manga-translator.py
2785
manga-translator.py
File diff suppressed because it is too large
Load Diff
@@ -16,26 +16,13 @@ from pathlib import Path
|
|||||||
|
|
||||||
# ─────────────────────────────────────────────────────────────
|
# ─────────────────────────────────────────────────────────────
|
||||||
# PIPELINE CONFIGURATION
|
# PIPELINE CONFIGURATION
|
||||||
# Single source of truth — mirrors the __main__ block in
|
# Maps to the process_manga_page() signature in manga-translator.py
|
||||||
# manga-translator.py so both entry points stay in sync.
|
|
||||||
# ─────────────────────────────────────────────────────────────
|
# ─────────────────────────────────────────────────────────────
|
||||||
PIPELINE_CONFIG = dict(
|
PIPELINE_CONFIG = dict(
|
||||||
source_lang = "english",
|
source_lang = "en",
|
||||||
target_lang = "ca",
|
target_lang = "ca",
|
||||||
confidence_threshold = 0.03,
|
|
||||||
min_text_length = 1,
|
|
||||||
gap_px = "auto",
|
|
||||||
quality_threshold = 0.62,
|
|
||||||
reading_mode = "rtl",
|
|
||||||
debug = True,
|
|
||||||
use_enhanced_ocr = True,
|
|
||||||
strict_grouping = True,
|
|
||||||
max_box_width_ratio = 0.6,
|
|
||||||
max_box_height_ratio = 0.5,
|
|
||||||
auto_fix_bubbles = True,
|
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
# ─────────────────────────────────────────────────────────────
|
# ─────────────────────────────────────────────────────────────
|
||||||
# DYNAMIC MODULE LOADER
|
# DYNAMIC MODULE LOADER
|
||||||
# ─────────────────────────────────────────────────────────────
|
# ─────────────────────────────────────────────────────────────
|
||||||
@@ -68,16 +55,16 @@ def make_page_workdir(chapter_dir: Path, page_stem: str) -> Path:
|
|||||||
|
|
||||||
def verify_translator_api(module) -> bool:
|
def verify_translator_api(module) -> bool:
|
||||||
"""
|
"""
|
||||||
Checks that the loaded module exposes translate_manga_text()
|
Checks that the loaded module exposes process_manga_page()
|
||||||
and that it accepts all keys defined in PIPELINE_CONFIG.
|
and that it accepts all keys defined in PIPELINE_CONFIG.
|
||||||
Prints a warning for any missing parameter so mismatches are
|
Prints a warning for any missing parameter so mismatches are
|
||||||
caught immediately rather than silently falling back to defaults.
|
caught immediately rather than silently falling back to defaults.
|
||||||
"""
|
"""
|
||||||
import inspect
|
import inspect
|
||||||
|
|
||||||
fn = getattr(module, "translate_manga_text", None)
|
fn = getattr(module, "process_manga_page", None)
|
||||||
if fn is None:
|
if fn is None:
|
||||||
print("❌ manga-translator.py does not expose translate_manga_text()")
|
print("❌ manga-translator.py does not expose process_manga_page()")
|
||||||
return False
|
return False
|
||||||
|
|
||||||
sig = inspect.signature(fn)
|
sig = inspect.signature(fn)
|
||||||
@@ -86,8 +73,10 @@ def verify_translator_api(module) -> bool:
|
|||||||
|
|
||||||
for key in PIPELINE_CONFIG:
|
for key in PIPELINE_CONFIG:
|
||||||
if key not in params:
|
if key not in params:
|
||||||
print(f"⚠️ PIPELINE_CONFIG key '{key}' not found in "
|
print(
|
||||||
f"translate_manga_text() — update pipeline or translator.")
|
f"⚠️ PIPELINE_CONFIG key '{key}' not found in "
|
||||||
|
f"process_manga_page() — update pipeline or translator."
|
||||||
|
)
|
||||||
ok = False
|
ok = False
|
||||||
|
|
||||||
return ok
|
return ok
|
||||||
@@ -107,22 +96,65 @@ def process_page(page_path: Path, workdir: Path, translator_module) -> bool:
|
|||||||
# output files land there automatically.
|
# output files land there automatically.
|
||||||
os.chdir(workdir)
|
os.chdir(workdir)
|
||||||
|
|
||||||
|
output_json = str(workdir / "bubbles.json")
|
||||||
|
output_txt = str(workdir / "output.txt")
|
||||||
|
debug_path = str(workdir / "debug_clusters.png")
|
||||||
|
|
||||||
print(" ⏳ Extracting text and translating...")
|
print(" ⏳ Extracting text and translating...")
|
||||||
|
|
||||||
translator_module.translate_manga_text(
|
results = translator_module.process_manga_page(
|
||||||
image_path = str(page_path.resolve()),
|
image_path = str(page_path.resolve()),
|
||||||
export_to_file = "output.txt",
|
output_json = output_json,
|
||||||
export_bubbles_to= "bubbles.json",
|
output_txt = output_txt,
|
||||||
**PIPELINE_CONFIG, # ← all settings from the single config dict
|
**PIPELINE_CONFIG,
|
||||||
)
|
)
|
||||||
|
|
||||||
# Sanity-check that the expected outputs were actually written
|
# ── Optional debug visualisation ─────────────────────
|
||||||
|
if results:
|
||||||
|
try:
|
||||||
|
import cv2
|
||||||
|
|
||||||
|
image_bgr = cv2.imread(str(page_path.resolve()))
|
||||||
|
if image_bgr is not None:
|
||||||
|
# Reconstruct vis_boxes / vis_lines from results dict
|
||||||
|
vis_boxes = {}
|
||||||
|
vis_lines = {}
|
||||||
|
vis_indices = {}
|
||||||
|
|
||||||
|
for bid_str, data in results.items():
|
||||||
|
bid = int(bid_str)
|
||||||
|
xywh = data["box"]
|
||||||
|
vis_boxes[bid] = (
|
||||||
|
xywh["x"],
|
||||||
|
xywh["y"],
|
||||||
|
xywh["x"] + xywh["w"],
|
||||||
|
xywh["y"] + xywh["h"],
|
||||||
|
)
|
||||||
|
vis_lines[bid] = data.get("lines", [])
|
||||||
|
vis_indices[bid] = []
|
||||||
|
|
||||||
|
translator_module.draw_debug_clusters(
|
||||||
|
image_bgr = image_bgr,
|
||||||
|
out_boxes = vis_boxes,
|
||||||
|
out_lines = vis_lines,
|
||||||
|
out_indices = vis_indices,
|
||||||
|
ocr = [],
|
||||||
|
save_path = debug_path,
|
||||||
|
)
|
||||||
|
except Exception as e:
|
||||||
|
print(f" ⚠️ Debug visualisation failed (non-fatal): {e}")
|
||||||
|
|
||||||
|
# ── Sanity-check outputs ──────────────────────────────
|
||||||
for fname in ("output.txt", "bubbles.json"):
|
for fname in ("output.txt", "bubbles.json"):
|
||||||
fpath = workdir / fname
|
fpath = workdir / fname
|
||||||
if not fpath.exists() or fpath.stat().st_size == 0:
|
if not fpath.exists() or fpath.stat().st_size == 0:
|
||||||
print(f" ⚠️ {fname} is missing or empty after processing.")
|
print(f" ⚠️ {fname} is missing or empty after processing.")
|
||||||
|
|
||||||
print(" ✅ Translation and OCR data saved successfully")
|
if not results:
|
||||||
|
print(" ⚠️ process_manga_page() returned no results.")
|
||||||
|
return False
|
||||||
|
|
||||||
|
print(f" ✅ Done — {len(results)} box(es) processed.")
|
||||||
return True
|
return True
|
||||||
|
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
@@ -154,8 +186,26 @@ def main():
|
|||||||
"--end", type=int, default=None,
|
"--end", type=int, default=None,
|
||||||
help="Stop after this page number inclusive (default: all)"
|
help="Stop after this page number inclusive (default: all)"
|
||||||
)
|
)
|
||||||
|
parser.add_argument(
|
||||||
|
"--source", "-s", default=None,
|
||||||
|
help=f"Override source language (default: {PIPELINE_CONFIG['source_lang']})"
|
||||||
|
)
|
||||||
|
parser.add_argument(
|
||||||
|
"--target", "-t", default=None,
|
||||||
|
help=f"Override target language (default: {PIPELINE_CONFIG['target_lang']})"
|
||||||
|
)
|
||||||
args = parser.parse_args()
|
args = parser.parse_args()
|
||||||
|
|
||||||
|
# Allow CLI overrides of source/target without touching PIPELINE_CONFIG
|
||||||
|
config = dict(PIPELINE_CONFIG)
|
||||||
|
if args.source:
|
||||||
|
config["source_lang"] = args.source
|
||||||
|
if args.target:
|
||||||
|
config["target_lang"] = args.target
|
||||||
|
|
||||||
|
# Patch PIPELINE_CONFIG in-place so process_page() picks up overrides
|
||||||
|
PIPELINE_CONFIG.update(config)
|
||||||
|
|
||||||
chapter_dir = Path(args.chapter_dir).resolve()
|
chapter_dir = Path(args.chapter_dir).resolve()
|
||||||
if not chapter_dir.is_dir():
|
if not chapter_dir.is_dir():
|
||||||
print(f"❌ Not a directory: {chapter_dir}")
|
print(f"❌ Not a directory: {chapter_dir}")
|
||||||
@@ -198,14 +248,12 @@ def main():
|
|||||||
|
|
||||||
# ── Summary header ────────────────────────────────────────
|
# ── Summary header ────────────────────────────────────────
|
||||||
print(f"\n{'═' * 70}")
|
print(f"\n{'═' * 70}")
|
||||||
print(f" 📖 Chapter : {chapter_dir.name}")
|
print(f" 📖 Chapter : {chapter_dir.name}")
|
||||||
print(f" 📄 Pages : {len(pages)} "
|
print(f" 📄 Pages : {len(pages)} "
|
||||||
f"(of {len(all_pages)} total, "
|
f"(of {len(all_pages)} total, "
|
||||||
f"range {args.start}–{end_idx})")
|
f"range {args.start}–{end_idx})")
|
||||||
print(f" 🌐 Lang : {PIPELINE_CONFIG['source_lang']} → "
|
print(f" 🌐 Lang : {PIPELINE_CONFIG['source_lang']} → "
|
||||||
f"{PIPELINE_CONFIG['target_lang']}")
|
f"{PIPELINE_CONFIG['target_lang']}")
|
||||||
print(f" 📖 Read order : {PIPELINE_CONFIG['reading_mode'].upper()}")
|
|
||||||
print(f" 🔍 Enhanced : {PIPELINE_CONFIG['use_enhanced_ocr']}")
|
|
||||||
print(f"{'═' * 70}\n")
|
print(f"{'═' * 70}\n")
|
||||||
|
|
||||||
succeeded, failed = [], []
|
succeeded, failed = [], []
|
||||||
@@ -231,4 +279,4 @@ def main():
|
|||||||
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
main()
|
main()
|
||||||
Reference in New Issue
Block a user