Added stuff
This commit is contained in:
4
.gitignore
vendored
4
.gitignore
vendored
@@ -9,6 +9,10 @@
|
||||
|
||||
.venv311/
|
||||
|
||||
Spy_x_Family_076/
|
||||
|
||||
Dandadan_059/
|
||||
|
||||
# Icon must end with two \r
|
||||
Icon
|
||||
|
||||
|
||||
2895
manga-translator.py
2895
manga-translator.py
File diff suppressed because it is too large
Load Diff
@@ -14,10 +14,32 @@ import argparse
|
||||
import importlib.util
|
||||
from pathlib import Path
|
||||
|
||||
# ─────────────────────────────────────────────
|
||||
# ─────────────────────────────────────────────────────────────
|
||||
# PIPELINE CONFIGURATION
|
||||
# Single source of truth — mirrors the __main__ block in
|
||||
# manga-translator.py so both entry points stay in sync.
|
||||
# ─────────────────────────────────────────────────────────────
|
||||
PIPELINE_CONFIG = dict(
|
||||
source_lang = "english",
|
||||
target_lang = "ca",
|
||||
confidence_threshold = 0.03,
|
||||
min_text_length = 1,
|
||||
gap_px = "auto",
|
||||
quality_threshold = 0.62,
|
||||
reading_mode = "rtl",
|
||||
debug = True,
|
||||
use_enhanced_ocr = True,
|
||||
strict_grouping = True,
|
||||
max_box_width_ratio = 0.6,
|
||||
max_box_height_ratio = 0.5,
|
||||
auto_fix_bubbles = True,
|
||||
)
|
||||
|
||||
|
||||
# ─────────────────────────────────────────────────────────────
|
||||
# DYNAMIC MODULE LOADER
|
||||
# ─────────────────────────────────────────────
|
||||
def load_module(name, filepath):
|
||||
# ─────────────────────────────────────────────────────────────
|
||||
def load_module(name: str, filepath: str):
|
||||
spec = importlib.util.spec_from_file_location(name, filepath)
|
||||
if spec is None or spec.loader is None:
|
||||
raise FileNotFoundError(f"Cannot load spec for {filepath}")
|
||||
@@ -25,103 +47,188 @@ def load_module(name, filepath):
|
||||
spec.loader.exec_module(module)
|
||||
return module
|
||||
|
||||
# ─────────────────────────────────────────────
|
||||
|
||||
# ─────────────────────────────────────────────────────────────
|
||||
# HELPERS
|
||||
# ─────────────────────────────────────────────
|
||||
def sorted_pages(chapter_dir):
|
||||
# ─────────────────────────────────────────────────────────────
|
||||
def sorted_pages(chapter_dir: Path):
|
||||
exts = {".jpg", ".jpeg", ".png", ".webp"}
|
||||
pages = [
|
||||
p for p in Path(chapter_dir).iterdir()
|
||||
p for p in chapter_dir.iterdir()
|
||||
if p.is_file() and p.suffix.lower() in exts
|
||||
]
|
||||
return sorted(pages, key=lambda p: p.stem)
|
||||
|
||||
def make_page_workdir(chapter_dir, page_stem):
|
||||
workdir = Path(chapter_dir) / "translated" / page_stem
|
||||
|
||||
def make_page_workdir(chapter_dir: Path, page_stem: str) -> Path:
|
||||
workdir = chapter_dir / "translated" / page_stem
|
||||
workdir.mkdir(parents=True, exist_ok=True)
|
||||
return workdir
|
||||
|
||||
# ─────────────────────────────────────────────
|
||||
|
||||
def verify_translator_api(module) -> bool:
|
||||
"""
|
||||
Checks that the loaded module exposes translate_manga_text()
|
||||
and that it accepts all keys defined in PIPELINE_CONFIG.
|
||||
Prints a warning for any missing parameter so mismatches are
|
||||
caught immediately rather than silently falling back to defaults.
|
||||
"""
|
||||
import inspect
|
||||
|
||||
fn = getattr(module, "translate_manga_text", None)
|
||||
if fn is None:
|
||||
print("❌ manga-translator.py does not expose translate_manga_text()")
|
||||
return False
|
||||
|
||||
sig = inspect.signature(fn)
|
||||
params = set(sig.parameters.keys())
|
||||
ok = True
|
||||
|
||||
for key in PIPELINE_CONFIG:
|
||||
if key not in params:
|
||||
print(f"⚠️ PIPELINE_CONFIG key '{key}' not found in "
|
||||
f"translate_manga_text() — update pipeline or translator.")
|
||||
ok = False
|
||||
|
||||
return ok
|
||||
|
||||
|
||||
# ─────────────────────────────────────────────────────────────
|
||||
# PER-PAGE PIPELINE
|
||||
# ─────────────────────────────────────────────
|
||||
def process_page(page_path, workdir, translator_module):
|
||||
# ─────────────────────────────────────────────────────────────
|
||||
def process_page(page_path: Path, workdir: Path, translator_module) -> bool:
|
||||
print(f"\n{'─' * 70}")
|
||||
print(f"PAGE: {page_path.name}")
|
||||
print(f" PAGE : {page_path.name}")
|
||||
print(f"{'─' * 70}")
|
||||
|
||||
orig_dir = os.getcwd()
|
||||
try:
|
||||
# Isolate execution to the specific page's folder
|
||||
# Run inside the page's own workdir so debug images and
|
||||
# output files land there automatically.
|
||||
os.chdir(workdir)
|
||||
|
||||
print(" ⏳ Extracting text and translating...")
|
||||
|
||||
# 1) Translate using ONLY the required path arguments.
|
||||
# This forces the function to use its own internal default variables
|
||||
# (like source_lang, target_lang, confidence_threshold) directly from manga-translator.py
|
||||
translator_module.translate_manga_text(
|
||||
image_path=str(page_path.resolve()),
|
||||
export_to_file="output.txt",
|
||||
export_bubbles_to="bubbles.json"
|
||||
)
|
||||
print(" ✅ Translation and OCR data saved successfully")
|
||||
|
||||
translator_module.translate_manga_text(
|
||||
image_path = str(page_path.resolve()),
|
||||
export_to_file = "output.txt",
|
||||
export_bubbles_to= "bubbles.json",
|
||||
**PIPELINE_CONFIG, # ← all settings from the single config dict
|
||||
)
|
||||
|
||||
# Sanity-check that the expected outputs were actually written
|
||||
for fname in ("output.txt", "bubbles.json"):
|
||||
fpath = workdir / fname
|
||||
if not fpath.exists() or fpath.stat().st_size == 0:
|
||||
print(f" ⚠️ {fname} is missing or empty after processing.")
|
||||
|
||||
print(" ✅ Translation and OCR data saved successfully")
|
||||
return True
|
||||
|
||||
except Exception as e:
|
||||
import traceback
|
||||
print(f" ❌ Failed: {e}")
|
||||
traceback.print_exc()
|
||||
return False
|
||||
|
||||
finally:
|
||||
os.chdir(orig_dir)
|
||||
|
||||
# ─────────────────────────────────────────────
|
||||
|
||||
# ─────────────────────────────────────────────────────────────
|
||||
# MAIN
|
||||
# ─────────────────────────────────────────────
|
||||
# ─────────────────────────────────────────────────────────────
|
||||
def main():
|
||||
parser = argparse.ArgumentParser(description="Manga Translation OCR Batch Pipeline")
|
||||
parser.add_argument("chapter_dir", help="Path to the folder containing manga pages")
|
||||
parser = argparse.ArgumentParser(
|
||||
description="Manga Translation OCR Batch Pipeline"
|
||||
)
|
||||
parser.add_argument(
|
||||
"chapter_dir",
|
||||
help="Path to the folder containing manga page images"
|
||||
)
|
||||
parser.add_argument(
|
||||
"--start", type=int, default=1,
|
||||
help="Start from this page number (1-based, default: 1)"
|
||||
)
|
||||
parser.add_argument(
|
||||
"--end", type=int, default=None,
|
||||
help="Stop after this page number inclusive (default: all)"
|
||||
)
|
||||
args = parser.parse_args()
|
||||
|
||||
chapter_dir = Path(args.chapter_dir).resolve()
|
||||
if not chapter_dir.is_dir():
|
||||
print(f"❌ Not a directory: {chapter_dir}")
|
||||
sys.exit(1)
|
||||
|
||||
print("Loading translator module...")
|
||||
script_dir = Path(__file__).parent
|
||||
|
||||
# ── Load translator module ────────────────────────────────
|
||||
script_dir = Path(__file__).parent
|
||||
module_path = script_dir / "manga-translator.py"
|
||||
|
||||
if not module_path.exists():
|
||||
print(f"❌ manga-translator.py not found in {script_dir}")
|
||||
sys.exit(1)
|
||||
|
||||
print(f"📦 Loading translator from: {module_path}")
|
||||
try:
|
||||
translator = load_module("manga_translator", str(script_dir / "manga-translator.py"))
|
||||
translator = load_module("manga_translator", str(module_path))
|
||||
except Exception as e:
|
||||
print(f"❌ Could not load manga-translator.py: {e}")
|
||||
sys.exit(1)
|
||||
|
||||
pages = sorted_pages(chapter_dir)
|
||||
if not pages:
|
||||
# ── API compatibility check ───────────────────────────────
|
||||
if not verify_translator_api(translator):
|
||||
print("❌ Aborting — fix the parameter mismatch above first.")
|
||||
sys.exit(1)
|
||||
|
||||
# ── Discover pages ────────────────────────────────────────
|
||||
all_pages = sorted_pages(chapter_dir)
|
||||
if not all_pages:
|
||||
print(f"❌ No images found in: {chapter_dir}")
|
||||
sys.exit(1)
|
||||
|
||||
print(f"\n📖 Chapter : {chapter_dir.name}")
|
||||
print(f" Pages : {len(pages)}")
|
||||
print(" Note : Using translation settings directly from manga-translator.py\n")
|
||||
# Apply --start / --end slice (1-based, inclusive)
|
||||
start_idx = max(0, args.start - 1)
|
||||
end_idx = args.end if args.end is not None else len(all_pages)
|
||||
pages = all_pages[start_idx:end_idx]
|
||||
|
||||
if not pages:
|
||||
print(f"❌ No pages in range [{args.start}, {args.end}]")
|
||||
sys.exit(1)
|
||||
|
||||
# ── Summary header ────────────────────────────────────────
|
||||
print(f"\n{'═' * 70}")
|
||||
print(f" 📖 Chapter : {chapter_dir.name}")
|
||||
print(f" 📄 Pages : {len(pages)} "
|
||||
f"(of {len(all_pages)} total, "
|
||||
f"range {args.start}–{end_idx})")
|
||||
print(f" 🌐 Lang : {PIPELINE_CONFIG['source_lang']} → "
|
||||
f"{PIPELINE_CONFIG['target_lang']}")
|
||||
print(f" 📖 Read order : {PIPELINE_CONFIG['reading_mode'].upper()}")
|
||||
print(f" 🔍 Enhanced : {PIPELINE_CONFIG['use_enhanced_ocr']}")
|
||||
print(f"{'═' * 70}\n")
|
||||
|
||||
succeeded, failed = [], []
|
||||
|
||||
for i, page_path in enumerate(pages, start=1):
|
||||
print(f"[{i}/{len(pages)}] Processing...")
|
||||
print(f"[{i}/{len(pages)}] {page_path.name}")
|
||||
workdir = make_page_workdir(chapter_dir, page_path.stem)
|
||||
|
||||
|
||||
if process_page(page_path, workdir, translator):
|
||||
succeeded.append(page_path.name)
|
||||
else:
|
||||
failed.append(page_path.name)
|
||||
|
||||
# ── Final report ──────────────────────────────────────────
|
||||
print(f"\n{'═' * 70}")
|
||||
print("PIPELINE COMPLETE")
|
||||
print(f"✅ {len(succeeded)} page(s) succeeded")
|
||||
print(" PIPELINE COMPLETE")
|
||||
print(f" ✅ {len(succeeded)} page(s) succeeded")
|
||||
if failed:
|
||||
print(f"❌ {len(failed)} page(s) failed:")
|
||||
for f in failed:
|
||||
print(f" • {f}")
|
||||
print(f" ❌ {len(failed)} page(s) failed:")
|
||||
for name in failed:
|
||||
print(f" • {name}")
|
||||
print(f"{'═' * 70}\n")
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
main()
|
||||
|
||||
79
requirements
79
requirements
@@ -1,79 +0,0 @@
|
||||
aistudio-sdk==0.3.8
|
||||
annotated-doc==0.0.4
|
||||
annotated-types==0.7.0
|
||||
anyio==4.13.0
|
||||
bce-python-sdk==0.9.70
|
||||
beautifulsoup4==4.14.3
|
||||
certifi==2026.2.25
|
||||
chardet==7.4.3
|
||||
charset-normalizer==3.4.7
|
||||
click==8.3.2
|
||||
colorlog==6.10.1
|
||||
crc32c==2.8
|
||||
deep-translator==1.11.4
|
||||
easyocr==1.7.2
|
||||
filelock==3.28.0
|
||||
fsspec==2026.3.0
|
||||
future==1.0.0
|
||||
h11==0.16.0
|
||||
hf-xet==1.4.3
|
||||
httpcore==1.0.9
|
||||
httpx==0.28.1
|
||||
huggingface_hub==1.10.2
|
||||
idna==3.11
|
||||
ImageIO==2.37.3
|
||||
imagesize==2.0.0
|
||||
Jinja2==3.1.6
|
||||
lazy-loader==0.5
|
||||
markdown-it-py==4.0.0
|
||||
MarkupSafe==3.0.3
|
||||
mdurl==0.1.2
|
||||
modelscope==1.35.4
|
||||
mpmath==1.3.0
|
||||
networkx==3.6.1
|
||||
ninja==1.13.0
|
||||
numpy==1.26.4
|
||||
opencv-contrib-python==4.10.0.84
|
||||
opencv-python==4.11.0.86
|
||||
opencv-python-headless==4.11.0.86
|
||||
opt-einsum==3.3.0
|
||||
packaging==26.1
|
||||
paddleocr==3.4.1
|
||||
paddlepaddle==3.3.1
|
||||
paddlex==3.4.3
|
||||
pandas==3.0.2
|
||||
pillow==12.2.0
|
||||
prettytable==3.17.0
|
||||
protobuf==7.34.1
|
||||
psutil==7.2.2
|
||||
py-cpuinfo==9.0.0
|
||||
pyclipper==1.4.0
|
||||
pycryptodome==3.23.0
|
||||
pydantic==2.13.1
|
||||
pydantic_core==2.46.1
|
||||
Pygments==2.20.0
|
||||
pypdfium2==5.7.0
|
||||
python-bidi==0.6.7
|
||||
python-dateutil==2.9.0.post0
|
||||
PyYAML==6.0.2
|
||||
requests==2.33.1
|
||||
rich==15.0.0
|
||||
ruamel.yaml==0.19.1
|
||||
safetensors==0.7.0
|
||||
scikit-image==0.26.0
|
||||
scipy==1.17.1
|
||||
shapely==2.1.2
|
||||
shellingham==1.5.4
|
||||
six==1.17.0
|
||||
soupsieve==2.8.3
|
||||
sympy==1.14.0
|
||||
tifffile==2026.3.3
|
||||
torch==2.11.0
|
||||
torchvision==0.26.0
|
||||
tqdm==4.67.3
|
||||
typer==0.24.1
|
||||
typing-inspection==0.4.2
|
||||
typing_extensions==4.15.0
|
||||
ujson==5.12.0
|
||||
urllib3==2.6.3
|
||||
wcwidth==0.6.0
|
||||
Reference in New Issue
Block a user