Compare commits
11 Commits
f00647e668
...
main
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
3ca01dae8c | ||
|
|
037dadd920 | ||
|
|
285e9ca393 | ||
|
|
d77db83cfe | ||
|
|
b730037a06 | ||
|
|
7837aeaa9b | ||
|
|
455b4ad82c | ||
|
|
b6b0df4774 | ||
|
|
512bb32f66 | ||
|
|
494631c967 | ||
|
|
27a3e6f98a |
5
.gitignore
vendored
5
.gitignore
vendored
@@ -9,6 +9,11 @@
|
|||||||
|
|
||||||
.venv311/
|
.venv311/
|
||||||
|
|
||||||
|
#Folders to test
|
||||||
|
Spy_x_Family_076/
|
||||||
|
Dandadan_059/
|
||||||
|
Lv999/
|
||||||
|
|
||||||
# Icon must end with two \r
|
# Icon must end with two \r
|
||||||
Icon
|
Icon
|
||||||
|
|
||||||
|
|||||||
3727
manga-translator.py
3727
manga-translator.py
File diff suppressed because it is too large
Load Diff
119
older-code/patch_manga_translator.py
Normal file
119
older-code/patch_manga_translator.py
Normal file
@@ -0,0 +1,119 @@
|
|||||||
|
#!/usr/bin/env python3
|
||||||
|
# -*- coding: utf-8 -*-
|
||||||
|
|
||||||
|
import re
|
||||||
|
from pathlib import Path
|
||||||
|
|
||||||
|
TARGET = Path("manga-translator.py")
|
||||||
|
|
||||||
|
def cut_after_first_entrypoint(text: str) -> str:
|
||||||
|
"""
|
||||||
|
Keep only first full __main__ block and remove duplicated tail if present.
|
||||||
|
"""
|
||||||
|
m = re.search(r'(?m)^if __name__ == "__main__":\s*$', text)
|
||||||
|
if not m:
|
||||||
|
return text
|
||||||
|
|
||||||
|
start = m.start()
|
||||||
|
# Keep entrypoint block plus indented lines after it
|
||||||
|
lines = text[start:].splitlines(True)
|
||||||
|
keep = []
|
||||||
|
keep.append(lines[0]) # if __name__...
|
||||||
|
i = 1
|
||||||
|
while i < len(lines):
|
||||||
|
ln = lines[i]
|
||||||
|
if ln.strip() == "":
|
||||||
|
keep.append(ln)
|
||||||
|
i += 1
|
||||||
|
continue
|
||||||
|
# if dedented back to col 0 => end of block
|
||||||
|
if not ln.startswith((" ", "\t")):
|
||||||
|
break
|
||||||
|
keep.append(ln)
|
||||||
|
i += 1
|
||||||
|
|
||||||
|
cleaned = text[:start] + "".join(keep)
|
||||||
|
return cleaned
|
||||||
|
|
||||||
|
def replace_bad_vars(text: str) -> str:
|
||||||
|
text = text.replace(
|
||||||
|
"merge_micro_boxes_relaxed(bubbles, bubble_boxes, bubble_quads, bubble_indices, ocr, image_bgr)",
|
||||||
|
"merge_micro_boxes_relaxed(bubbles, bubble_boxes, bubble_quads, bubble_indices, filtered, image)"
|
||||||
|
)
|
||||||
|
text = text.replace(
|
||||||
|
"reattach_orphan_short_tokens(bubbles, bubble_boxes, bubble_quads, bubble_indices, ocr)",
|
||||||
|
"reattach_orphan_short_tokens(bubbles, bubble_boxes, bubble_quads, bubble_indices, filtered)"
|
||||||
|
)
|
||||||
|
return text
|
||||||
|
|
||||||
|
def ensure_autofix_chain(text: str) -> str:
|
||||||
|
old = (
|
||||||
|
" # ── Auto-fix (split + merge) ──────────────────────────────────────────\n"
|
||||||
|
" if auto_fix_bubbles:\n"
|
||||||
|
" bubbles, bubble_boxes, bubble_quads, bubble_indices = merge_micro_boxes_relaxed(bubbles, bubble_boxes, bubble_quads, bubble_indices, filtered, image)\n"
|
||||||
|
)
|
||||||
|
new = (
|
||||||
|
" # ── Auto-fix (split + merge) ──────────────────────────────────────────\n"
|
||||||
|
" if auto_fix_bubbles:\n"
|
||||||
|
" bubbles, bubble_boxes, bubble_quads, bubble_indices = auto_fix_bubble_detection(\n"
|
||||||
|
" bubble_boxes, bubble_indices, bubble_quads, bubbles, filtered, image)\n"
|
||||||
|
" bubbles, bubble_boxes, bubble_quads, bubble_indices = merge_micro_boxes_relaxed(\n"
|
||||||
|
" bubbles, bubble_boxes, bubble_quads, bubble_indices, filtered, image)\n"
|
||||||
|
)
|
||||||
|
return text.replace(old, new)
|
||||||
|
|
||||||
|
def ensure_split_commit(text: str) -> str:
|
||||||
|
marker = " # ── Remove nested / duplicate boxes ──────────────────────────────────\n"
|
||||||
|
if marker not in text:
|
||||||
|
return text
|
||||||
|
|
||||||
|
if "bubbles = new_bubbles" in text:
|
||||||
|
return text
|
||||||
|
|
||||||
|
inject = (
|
||||||
|
" bubbles = new_bubbles\n"
|
||||||
|
" bubble_boxes = new_bubble_boxes\n"
|
||||||
|
" bubble_quads = new_bubble_quads\n"
|
||||||
|
" bubble_indices = new_bubble_indices\n\n"
|
||||||
|
)
|
||||||
|
return text.replace(marker, inject + marker)
|
||||||
|
|
||||||
|
def ensure_rescue_pipeline(text: str) -> str:
|
||||||
|
anchor = ' print(f"Kept: {len(filtered)} | Skipped: {skipped}")\n'
|
||||||
|
if anchor not in text:
|
||||||
|
return text
|
||||||
|
|
||||||
|
if "rescue_name_and_short_tokens(raw" in text:
|
||||||
|
return text
|
||||||
|
|
||||||
|
block = (
|
||||||
|
' print(f"Kept: {len(filtered)} | Skipped: {skipped}")\n'
|
||||||
|
' # Protect short dialogue tokens confidence\n'
|
||||||
|
' tmp = []\n'
|
||||||
|
' for bbox, t, conf in filtered:\n'
|
||||||
|
' tmp.append((bbox, t, maybe_conf_floor_for_protected(t, conf, floor=0.40)))\n'
|
||||||
|
' filtered = tmp\n'
|
||||||
|
' # Rescue names/short tokens dropped by strict filters\n'
|
||||||
|
' rescued = rescue_name_and_short_tokens(raw, min_conf=0.20)\n'
|
||||||
|
' filtered = merge_rescued_items(filtered, rescued, iou_threshold=0.55)\n'
|
||||||
|
)
|
||||||
|
return text.replace(anchor, block)
|
||||||
|
|
||||||
|
def main():
|
||||||
|
if not TARGET.exists():
|
||||||
|
raise FileNotFoundError(f"Not found: {TARGET}")
|
||||||
|
|
||||||
|
src = TARGET.read_text(encoding="utf-8")
|
||||||
|
out = src
|
||||||
|
|
||||||
|
out = cut_after_first_entrypoint(out)
|
||||||
|
out = replace_bad_vars(out)
|
||||||
|
out = ensure_autofix_chain(out)
|
||||||
|
out = ensure_split_commit(out)
|
||||||
|
out = ensure_rescue_pipeline(out)
|
||||||
|
|
||||||
|
TARGET.write_text(out, encoding="utf-8")
|
||||||
|
print("✅ Patched manga-translator.py")
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
main()
|
||||||
@@ -6,6 +6,8 @@ Translation OCR pipeline (Batch Processing Only)
|
|||||||
|
|
||||||
Usage:
|
Usage:
|
||||||
python pipeline-translator.py /path/to/chapter/folder
|
python pipeline-translator.py /path/to/chapter/folder
|
||||||
|
python pipeline-translator.py /path/to/chapter/folder --start 2 --end 5
|
||||||
|
python pipeline-translator.py /path/to/chapter/folder --source en --target es
|
||||||
"""
|
"""
|
||||||
|
|
||||||
import os
|
import os
|
||||||
@@ -14,114 +16,375 @@ import argparse
|
|||||||
import importlib.util
|
import importlib.util
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
|
|
||||||
# ─────────────────────────────────────────────
|
|
||||||
|
# ─────────────────────────────────────────────────────────────
|
||||||
|
# PIPELINE CONFIGURATION
|
||||||
|
# Maps to the process_manga_page() signature in manga-translator.py
|
||||||
|
# ─────────────────────────────────────────────────────────────
|
||||||
|
PIPELINE_CONFIG = dict(
|
||||||
|
source_lang = "en",
|
||||||
|
target_lang = "ca",
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
# ─────────────────────────────────────────────────────────────
|
||||||
# DYNAMIC MODULE LOADER
|
# DYNAMIC MODULE LOADER
|
||||||
# ─────────────────────────────────────────────
|
# FIX: Always evicts stale sys.modules entry and deletes
|
||||||
def load_module(name, filepath):
|
# __pycache__ for manga-translator.py before loading,
|
||||||
|
# so edits are ALWAYS picked up on every run.
|
||||||
|
# ─────────────────────────────────────────────────────────────
|
||||||
|
def purge_bytecode_cache(filepath: str) -> None:
|
||||||
|
"""
|
||||||
|
Delete the compiled .pyc file for the given .py path so Python
|
||||||
|
cannot silently use a stale cached version of the module.
|
||||||
|
"""
|
||||||
|
import py_compile
|
||||||
|
from importlib.util import cache_from_source
|
||||||
|
|
||||||
|
try:
|
||||||
|
pyc_path = cache_from_source(filepath)
|
||||||
|
if os.path.exists(pyc_path):
|
||||||
|
os.remove(pyc_path)
|
||||||
|
print(f"🗑️ Purged bytecode cache: {pyc_path}")
|
||||||
|
except Exception as e:
|
||||||
|
# Non-fatal — just warn and continue
|
||||||
|
print(f"⚠️ Could not purge bytecode cache: {e}")
|
||||||
|
|
||||||
|
|
||||||
|
def load_module(name: str, filepath: str):
|
||||||
|
"""
|
||||||
|
Dynamically load a .py file as a module.
|
||||||
|
|
||||||
|
FIX 1: Purge the .pyc cache so edits are always reflected.
|
||||||
|
FIX 2: Evict any previously loaded version from sys.modules
|
||||||
|
to prevent Python reusing a stale module object across
|
||||||
|
multiple calls (e.g. when running in a REPL or test loop).
|
||||||
|
"""
|
||||||
|
# FIX 1: delete stale bytecode
|
||||||
|
purge_bytecode_cache(filepath)
|
||||||
|
|
||||||
|
# FIX 2: evict from module registry
|
||||||
|
if name in sys.modules:
|
||||||
|
del sys.modules[name]
|
||||||
|
|
||||||
spec = importlib.util.spec_from_file_location(name, filepath)
|
spec = importlib.util.spec_from_file_location(name, filepath)
|
||||||
if spec is None or spec.loader is None:
|
if spec is None or spec.loader is None:
|
||||||
raise FileNotFoundError(f"Cannot load spec for {filepath}")
|
raise FileNotFoundError(f"Cannot load module spec for: {filepath}")
|
||||||
|
|
||||||
module = importlib.util.module_from_spec(spec)
|
module = importlib.util.module_from_spec(spec)
|
||||||
|
sys.modules[name] = module # register before exec (handles self-refs)
|
||||||
spec.loader.exec_module(module)
|
spec.loader.exec_module(module)
|
||||||
return module
|
return module
|
||||||
|
|
||||||
# ─────────────────────────────────────────────
|
|
||||||
|
# ─────────────────────────────────────────────────────────────
|
||||||
# HELPERS
|
# HELPERS
|
||||||
# ─────────────────────────────────────────────
|
# ─────────────────────────────────────────────────────────────
|
||||||
def sorted_pages(chapter_dir):
|
def sorted_pages(chapter_dir: Path):
|
||||||
|
"""Return all image files in chapter_dir sorted by filename stem."""
|
||||||
exts = {".jpg", ".jpeg", ".png", ".webp"}
|
exts = {".jpg", ".jpeg", ".png", ".webp"}
|
||||||
pages = [
|
pages = [
|
||||||
p for p in Path(chapter_dir).iterdir()
|
p for p in chapter_dir.iterdir()
|
||||||
if p.is_file() and p.suffix.lower() in exts
|
if p.is_file() and p.suffix.lower() in exts
|
||||||
]
|
]
|
||||||
return sorted(pages, key=lambda p: p.stem)
|
return sorted(pages, key=lambda p: p.stem)
|
||||||
|
|
||||||
def make_page_workdir(chapter_dir, page_stem):
|
|
||||||
workdir = Path(chapter_dir) / "translated" / page_stem
|
def make_page_workdir(chapter_dir: Path, page_stem: str) -> Path:
|
||||||
|
"""Create and return translated/<page_stem>/ inside chapter_dir."""
|
||||||
|
workdir = chapter_dir / "translated" / page_stem
|
||||||
workdir.mkdir(parents=True, exist_ok=True)
|
workdir.mkdir(parents=True, exist_ok=True)
|
||||||
return workdir
|
return workdir
|
||||||
|
|
||||||
# ─────────────────────────────────────────────
|
|
||||||
|
def verify_translator_api(module) -> bool:
|
||||||
|
"""
|
||||||
|
Checks that the loaded module exposes process_manga_page() and
|
||||||
|
that it accepts all keys defined in PIPELINE_CONFIG.
|
||||||
|
Prints a clear warning for any missing parameter.
|
||||||
|
"""
|
||||||
|
import inspect
|
||||||
|
|
||||||
|
fn = getattr(module, "process_manga_page", None)
|
||||||
|
if fn is None:
|
||||||
|
print("❌ manga-translator.py does not expose process_manga_page()")
|
||||||
|
return False
|
||||||
|
|
||||||
|
sig = inspect.signature(fn)
|
||||||
|
params = set(sig.parameters.keys())
|
||||||
|
ok = True
|
||||||
|
|
||||||
|
for key in PIPELINE_CONFIG:
|
||||||
|
if key not in params:
|
||||||
|
print(
|
||||||
|
f"⚠️ PIPELINE_CONFIG key '{key}' not found in "
|
||||||
|
f"process_manga_page() — update pipeline or translator."
|
||||||
|
)
|
||||||
|
ok = False
|
||||||
|
|
||||||
|
return ok
|
||||||
|
|
||||||
|
|
||||||
|
def sanity_check_fixes(module_path: Path) -> None:
|
||||||
|
"""
|
||||||
|
Grep the translator source for key fix signatures and warn if
|
||||||
|
any are missing. Helps catch cases where an edit was not saved.
|
||||||
|
"""
|
||||||
|
checks = {
|
||||||
|
"Fix A (gap_factor=4.0)": "gap_factor=4.0",
|
||||||
|
"Fix B (_majority_contour_id)": "_majority_contour_id",
|
||||||
|
"Fix C (median_inter adaptive gap)": "median_inter",
|
||||||
|
"Fix D (merge_same_column_dialogue)": "merge_same_column_dialogue_boxes",
|
||||||
|
"Fix E (lang_code from self.langs)": "lang_code = self.langs",
|
||||||
|
}
|
||||||
|
|
||||||
|
print("\n🔎 Sanity-checking fixes in manga-translator.py:")
|
||||||
|
source = module_path.read_text(encoding="utf-8")
|
||||||
|
all_ok = True
|
||||||
|
|
||||||
|
for label, token in checks.items():
|
||||||
|
found = token in source
|
||||||
|
status = "✅" if found else "❌ MISSING"
|
||||||
|
print(f" {status} {label}")
|
||||||
|
if not found:
|
||||||
|
all_ok = False
|
||||||
|
|
||||||
|
if not all_ok:
|
||||||
|
print(
|
||||||
|
"\n⚠️ One or more fixes are missing from manga-translator.py.\n"
|
||||||
|
" Save the file and re-run. Aborting.\n"
|
||||||
|
)
|
||||||
|
sys.exit(1)
|
||||||
|
else:
|
||||||
|
print(" All fixes present.\n")
|
||||||
|
|
||||||
|
|
||||||
|
# ─────────────────────────────────────────────────────────────
|
||||||
# PER-PAGE PIPELINE
|
# PER-PAGE PIPELINE
|
||||||
# ─────────────────────────────────────────────
|
# ─────────────────────────────────────────────────────────────
|
||||||
def process_page(page_path, workdir, translator_module):
|
def process_page(page_path: Path, workdir: Path, translator_module) -> bool:
|
||||||
print(f"\n{'─' * 70}")
|
print(f"\n{'─' * 70}")
|
||||||
print(f"PAGE: {page_path.name}")
|
print(f" PAGE : {page_path.name}")
|
||||||
|
print(f" OUT : {workdir}")
|
||||||
print(f"{'─' * 70}")
|
print(f"{'─' * 70}")
|
||||||
|
|
||||||
orig_dir = os.getcwd()
|
orig_dir = os.getcwd()
|
||||||
try:
|
try:
|
||||||
# Isolate execution to the specific page's folder
|
|
||||||
os.chdir(workdir)
|
os.chdir(workdir)
|
||||||
|
|
||||||
|
# Use absolute paths so output always lands in workdir
|
||||||
|
# regardless of any internal os.getcwd() calls.
|
||||||
|
output_json = str(workdir / "bubbles.json")
|
||||||
|
output_txt = str(workdir / "output.txt")
|
||||||
|
debug_path = str(workdir / "debug_clusters.png")
|
||||||
|
|
||||||
print(" ⏳ Extracting text and translating...")
|
print(" ⏳ Extracting text and translating...")
|
||||||
|
|
||||||
# 1) Translate using ONLY the required path arguments.
|
results = translator_module.process_manga_page(
|
||||||
# This forces the function to use its own internal default variables
|
image_path = str(page_path.resolve()),
|
||||||
# (like source_lang, target_lang, confidence_threshold) directly from manga-translator.py
|
output_json = output_json,
|
||||||
translator_module.translate_manga_text(
|
output_txt = output_txt,
|
||||||
image_path=str(page_path.resolve()),
|
**PIPELINE_CONFIG,
|
||||||
export_to_file="output.txt",
|
|
||||||
export_bubbles_to="bubbles.json"
|
|
||||||
)
|
)
|
||||||
print(" ✅ Translation and OCR data saved successfully")
|
|
||||||
|
|
||||||
|
# ── Debug visualisation ───────────────────────────────
|
||||||
|
# FIX: process_manga_page() already writes debug_clusters.png
|
||||||
|
# internally with full OCR quad data.
|
||||||
|
# We do NOT call draw_debug_clusters() here with ocr=[]
|
||||||
|
# because that would OVERWRITE the correct debug image with
|
||||||
|
# a degraded version that has no quad outlines.
|
||||||
|
#
|
||||||
|
# If process_manga_page() did not write a debug image
|
||||||
|
# (e.g. older version), we do a minimal fallback draw.
|
||||||
|
if results and not os.path.exists(debug_path):
|
||||||
|
try:
|
||||||
|
import cv2
|
||||||
|
image_bgr = cv2.imread(str(page_path.resolve()))
|
||||||
|
if image_bgr is not None:
|
||||||
|
vis_boxes: dict = {}
|
||||||
|
vis_lines: dict = {}
|
||||||
|
vis_indices: dict = {}
|
||||||
|
|
||||||
|
for bid_str, data in results.items():
|
||||||
|
bid = int(bid_str)
|
||||||
|
xywh = data["box"]
|
||||||
|
vis_boxes[bid] = (
|
||||||
|
xywh["x"],
|
||||||
|
xywh["y"],
|
||||||
|
xywh["x"] + xywh["w"],
|
||||||
|
xywh["y"] + xywh["h"],
|
||||||
|
)
|
||||||
|
vis_lines[bid] = data.get("lines", [])
|
||||||
|
vis_indices[bid] = []
|
||||||
|
|
||||||
|
# Fallback only — ocr=[] means no quad outlines
|
||||||
|
translator_module.draw_debug_clusters(
|
||||||
|
image_bgr = image_bgr,
|
||||||
|
out_boxes = vis_boxes,
|
||||||
|
out_lines = vis_lines,
|
||||||
|
out_indices = vis_indices,
|
||||||
|
ocr = [],
|
||||||
|
save_path = debug_path,
|
||||||
|
)
|
||||||
|
print(f" 🖼️ Fallback debug image written → {debug_path}")
|
||||||
|
except Exception as e:
|
||||||
|
print(f" ⚠️ Debug visualisation failed (non-fatal): {e}")
|
||||||
|
|
||||||
|
# ── Sanity-check output files ─────────────────────────
|
||||||
|
all_good = True
|
||||||
|
for fname in ("output.txt", "bubbles.json"):
|
||||||
|
fpath = workdir / fname
|
||||||
|
if not fpath.exists():
|
||||||
|
print(f" ⚠️ {fname} was NOT created.")
|
||||||
|
all_good = False
|
||||||
|
elif fpath.stat().st_size == 0:
|
||||||
|
print(f" ⚠️ {fname} exists but is EMPTY.")
|
||||||
|
all_good = False
|
||||||
|
else:
|
||||||
|
print(f" 📄 {fname} → {fpath.stat().st_size} bytes")
|
||||||
|
|
||||||
|
if not results:
|
||||||
|
print(" ⚠️ process_manga_page() returned no results.")
|
||||||
|
return False
|
||||||
|
|
||||||
|
print(f" ✅ Done — {len(results)} box(es) processed.")
|
||||||
return True
|
return True
|
||||||
|
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
|
import traceback
|
||||||
print(f" ❌ Failed: {e}")
|
print(f" ❌ Failed: {e}")
|
||||||
|
traceback.print_exc()
|
||||||
return False
|
return False
|
||||||
|
|
||||||
finally:
|
finally:
|
||||||
os.chdir(orig_dir)
|
os.chdir(orig_dir)
|
||||||
|
|
||||||
# ─────────────────────────────────────────────
|
|
||||||
|
# ─────────────────────────────────────────────────────────────
|
||||||
# MAIN
|
# MAIN
|
||||||
# ─────────────────────────────────────────────
|
# ─────────────────────────────────────────────────────────────
|
||||||
def main():
|
def main():
|
||||||
parser = argparse.ArgumentParser(description="Manga Translation OCR Batch Pipeline")
|
parser = argparse.ArgumentParser(
|
||||||
parser.add_argument("chapter_dir", help="Path to the folder containing manga pages")
|
description="Manga Translation OCR Batch Pipeline",
|
||||||
|
formatter_class=argparse.RawDescriptionHelpFormatter,
|
||||||
|
epilog="""
|
||||||
|
Examples:
|
||||||
|
python pipeline-translator.py pages-for-tests
|
||||||
|
python pipeline-translator.py pages-for-tests --start 2 --end 4
|
||||||
|
python pipeline-translator.py pages-for-tests --source en --target es
|
||||||
|
"""
|
||||||
|
)
|
||||||
|
parser.add_argument(
|
||||||
|
"chapter_dir",
|
||||||
|
help="Path to the folder containing manga page images"
|
||||||
|
)
|
||||||
|
parser.add_argument(
|
||||||
|
"--start", type=int, default=1,
|
||||||
|
help="Start from this page number (1-based, default: 1)"
|
||||||
|
)
|
||||||
|
parser.add_argument(
|
||||||
|
"--end", type=int, default=None,
|
||||||
|
help="Stop after this page number inclusive (default: all)"
|
||||||
|
)
|
||||||
|
parser.add_argument(
|
||||||
|
"--source", "-s", default=None,
|
||||||
|
help=f"Override source language (default: {PIPELINE_CONFIG['source_lang']})"
|
||||||
|
)
|
||||||
|
parser.add_argument(
|
||||||
|
"--target", "-t", default=None,
|
||||||
|
help=f"Override target language (default: {PIPELINE_CONFIG['target_lang']})"
|
||||||
|
)
|
||||||
|
parser.add_argument(
|
||||||
|
"--skip-sanity", action="store_true",
|
||||||
|
help="Skip the fix sanity check (not recommended)"
|
||||||
|
)
|
||||||
args = parser.parse_args()
|
args = parser.parse_args()
|
||||||
|
|
||||||
|
# ── Apply CLI language overrides ─────────────────────────
|
||||||
|
config = dict(PIPELINE_CONFIG)
|
||||||
|
if args.source:
|
||||||
|
config["source_lang"] = args.source
|
||||||
|
if args.target:
|
||||||
|
config["target_lang"] = args.target
|
||||||
|
PIPELINE_CONFIG.update(config)
|
||||||
|
|
||||||
|
# ── Resolve chapter directory ─────────────────────────────
|
||||||
chapter_dir = Path(args.chapter_dir).resolve()
|
chapter_dir = Path(args.chapter_dir).resolve()
|
||||||
|
if not chapter_dir.is_dir():
|
||||||
|
print(f"❌ Not a directory: {chapter_dir}")
|
||||||
|
sys.exit(1)
|
||||||
|
|
||||||
print("Loading translator module...")
|
# ── Locate manga-translator.py ────────────────────────────
|
||||||
script_dir = Path(__file__).parent
|
script_dir = Path(__file__).parent
|
||||||
|
module_path = script_dir / "manga-translator.py"
|
||||||
|
|
||||||
|
if not module_path.exists():
|
||||||
|
print(f"❌ manga-translator.py not found in {script_dir}")
|
||||||
|
sys.exit(1)
|
||||||
|
|
||||||
|
# ── Sanity-check that all fixes are present ───────────────
|
||||||
|
if not args.skip_sanity:
|
||||||
|
sanity_check_fixes(module_path)
|
||||||
|
|
||||||
|
# ── Load translator module ────────────────────────────────
|
||||||
|
print(f"📦 Loading translator from: {module_path}")
|
||||||
try:
|
try:
|
||||||
translator = load_module("manga_translator", str(script_dir / "manga-translator.py"))
|
translator = load_module("manga_translator", str(module_path))
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
print(f"❌ Could not load manga-translator.py: {e}")
|
print(f"❌ Could not load manga-translator.py: {e}")
|
||||||
sys.exit(1)
|
sys.exit(1)
|
||||||
|
|
||||||
pages = sorted_pages(chapter_dir)
|
# ── API compatibility check ───────────────────────────────
|
||||||
if not pages:
|
if not verify_translator_api(translator):
|
||||||
print(f"❌ No images found in: {chapter_dir}")
|
print("❌ Aborting — fix the parameter mismatch above first.")
|
||||||
sys.exit(1)
|
sys.exit(1)
|
||||||
|
|
||||||
print(f"\n📖 Chapter : {chapter_dir.name}")
|
# ── Discover and slice pages ──────────────────────────────
|
||||||
print(f" Pages : {len(pages)}")
|
all_pages = sorted_pages(chapter_dir)
|
||||||
print(" Note : Using translation settings directly from manga-translator.py\n")
|
if not all_pages:
|
||||||
|
print(f"❌ No image files found in: {chapter_dir}")
|
||||||
|
sys.exit(1)
|
||||||
|
|
||||||
succeeded, failed = [], []
|
start_idx = max(0, args.start - 1)
|
||||||
|
end_idx = args.end if args.end is not None else len(all_pages)
|
||||||
|
pages = all_pages[start_idx:end_idx]
|
||||||
|
|
||||||
for i, page_path in enumerate(pages, start=1):
|
if not pages:
|
||||||
print(f"[{i}/{len(pages)}] Processing...")
|
print(f"❌ No pages in range [{args.start}, {args.end}]")
|
||||||
|
sys.exit(1)
|
||||||
|
|
||||||
|
print(f"\n📚 Chapter : {chapter_dir.name}")
|
||||||
|
print(f" Pages : {len(pages)} of {len(all_pages)} total")
|
||||||
|
print(f" Source : {PIPELINE_CONFIG['source_lang']}")
|
||||||
|
print(f" Target : {PIPELINE_CONFIG['target_lang']}")
|
||||||
|
print(f" Output : {chapter_dir / 'translated'}\n")
|
||||||
|
|
||||||
|
# ── Process each page ─────────────────────────────────────
|
||||||
|
results_summary = []
|
||||||
|
|
||||||
|
for page_num, page_path in enumerate(pages, start=start_idx + 1):
|
||||||
workdir = make_page_workdir(chapter_dir, page_path.stem)
|
workdir = make_page_workdir(chapter_dir, page_path.stem)
|
||||||
|
success = process_page(page_path, workdir, translator)
|
||||||
|
results_summary.append((page_num, page_path.name, success))
|
||||||
|
|
||||||
if process_page(page_path, workdir, translator):
|
# ── Final summary ─────────────────────────────────────────
|
||||||
succeeded.append(page_path.name)
|
|
||||||
else:
|
|
||||||
failed.append(page_path.name)
|
|
||||||
|
|
||||||
print(f"\n{'═' * 70}")
|
print(f"\n{'═' * 70}")
|
||||||
print("PIPELINE COMPLETE")
|
print(f" BATCH COMPLETE")
|
||||||
print(f"✅ {len(succeeded)} page(s) succeeded")
|
print(f"{'═' * 70}")
|
||||||
if failed:
|
|
||||||
print(f"❌ {len(failed)} page(s) failed:")
|
passed = sum(1 for _, _, ok in results_summary if ok)
|
||||||
for f in failed:
|
failed = len(results_summary) - passed
|
||||||
print(f" • {f}")
|
|
||||||
|
for page_num, name, ok in results_summary:
|
||||||
|
status = "✅" if ok else "❌"
|
||||||
|
print(f" {status} [{page_num:>3}] {name}")
|
||||||
|
|
||||||
|
print(f"\n Total: {passed} succeeded, {failed} failed")
|
||||||
print(f"{'═' * 70}\n")
|
print(f"{'═' * 70}\n")
|
||||||
|
|
||||||
|
if failed:
|
||||||
|
sys.exit(1)
|
||||||
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
main()
|
main()
|
||||||
79
requirements
79
requirements
@@ -1,79 +0,0 @@
|
|||||||
aistudio-sdk==0.3.8
|
|
||||||
annotated-doc==0.0.4
|
|
||||||
annotated-types==0.7.0
|
|
||||||
anyio==4.13.0
|
|
||||||
bce-python-sdk==0.9.70
|
|
||||||
beautifulsoup4==4.14.3
|
|
||||||
certifi==2026.2.25
|
|
||||||
chardet==7.4.3
|
|
||||||
charset-normalizer==3.4.7
|
|
||||||
click==8.3.2
|
|
||||||
colorlog==6.10.1
|
|
||||||
crc32c==2.8
|
|
||||||
deep-translator==1.11.4
|
|
||||||
easyocr==1.7.2
|
|
||||||
filelock==3.28.0
|
|
||||||
fsspec==2026.3.0
|
|
||||||
future==1.0.0
|
|
||||||
h11==0.16.0
|
|
||||||
hf-xet==1.4.3
|
|
||||||
httpcore==1.0.9
|
|
||||||
httpx==0.28.1
|
|
||||||
huggingface_hub==1.10.2
|
|
||||||
idna==3.11
|
|
||||||
ImageIO==2.37.3
|
|
||||||
imagesize==2.0.0
|
|
||||||
Jinja2==3.1.6
|
|
||||||
lazy-loader==0.5
|
|
||||||
markdown-it-py==4.0.0
|
|
||||||
MarkupSafe==3.0.3
|
|
||||||
mdurl==0.1.2
|
|
||||||
modelscope==1.35.4
|
|
||||||
mpmath==1.3.0
|
|
||||||
networkx==3.6.1
|
|
||||||
ninja==1.13.0
|
|
||||||
numpy==1.26.4
|
|
||||||
opencv-contrib-python==4.10.0.84
|
|
||||||
opencv-python==4.11.0.86
|
|
||||||
opencv-python-headless==4.11.0.86
|
|
||||||
opt-einsum==3.3.0
|
|
||||||
packaging==26.1
|
|
||||||
paddleocr==3.4.1
|
|
||||||
paddlepaddle==3.3.1
|
|
||||||
paddlex==3.4.3
|
|
||||||
pandas==3.0.2
|
|
||||||
pillow==12.2.0
|
|
||||||
prettytable==3.17.0
|
|
||||||
protobuf==7.34.1
|
|
||||||
psutil==7.2.2
|
|
||||||
py-cpuinfo==9.0.0
|
|
||||||
pyclipper==1.4.0
|
|
||||||
pycryptodome==3.23.0
|
|
||||||
pydantic==2.13.1
|
|
||||||
pydantic_core==2.46.1
|
|
||||||
Pygments==2.20.0
|
|
||||||
pypdfium2==5.7.0
|
|
||||||
python-bidi==0.6.7
|
|
||||||
python-dateutil==2.9.0.post0
|
|
||||||
PyYAML==6.0.2
|
|
||||||
requests==2.33.1
|
|
||||||
rich==15.0.0
|
|
||||||
ruamel.yaml==0.19.1
|
|
||||||
safetensors==0.7.0
|
|
||||||
scikit-image==0.26.0
|
|
||||||
scipy==1.17.1
|
|
||||||
shapely==2.1.2
|
|
||||||
shellingham==1.5.4
|
|
||||||
six==1.17.0
|
|
||||||
soupsieve==2.8.3
|
|
||||||
sympy==1.14.0
|
|
||||||
tifffile==2026.3.3
|
|
||||||
torch==2.11.0
|
|
||||||
torchvision==0.26.0
|
|
||||||
tqdm==4.67.3
|
|
||||||
typer==0.24.1
|
|
||||||
typing-inspection==0.4.2
|
|
||||||
typing_extensions==4.15.0
|
|
||||||
ujson==5.12.0
|
|
||||||
urllib3==2.6.3
|
|
||||||
wcwidth==0.6.0
|
|
||||||
@@ -1,12 +0,0 @@
|
|||||||
numpy<2.0
|
|
||||||
opencv-python>=4.8
|
|
||||||
easyocr>=1.7.1
|
|
||||||
deep-translator>=1.11.4
|
|
||||||
manga-ocr>=0.1.14
|
|
||||||
torch
|
|
||||||
torchvision
|
|
||||||
Pillow
|
|
||||||
transformers
|
|
||||||
fugashi
|
|
||||||
unidic-lite
|
|
||||||
|
|
||||||
Reference in New Issue
Block a user