Added stuff

2026-04-22 10:51:57 +02:00
parent 512bb32f66
commit b6b0df4774
4 changed files with 1543 additions and 1632 deletions
--- a/.gitignore
+++ b/.gitignore
@@ -9,6 +9,10 @@
 .venv311/
 Spy_x_Family_076/
 Dandadan_059/
 # Icon must end with two \r
 Icon
--- a/manga-translator.py
+++ b/manga-translator.py
--- a/pipeline-translator.py
+++ b/pipeline-translator.py
@@ -14,10 +14,32 @@ import argparse
 import importlib.util
 from pathlib import Path
-# ─────────────────────────────────────────────
+# ─────────────────────────────────────────────────────────────
 #  PIPELINE CONFIGURATION
 #  Single source of truth — mirrors the __main__ block in
 #  manga-translator.py so both entry points stay in sync.
 # ─────────────────────────────────────────────────────────────
 PIPELINE_CONFIG = dict(
    source_lang          = "english",
    target_lang          = "ca",
    confidence_threshold = 0.03,
    min_text_length      = 1,
    gap_px               = "auto",
    quality_threshold    = 0.62,
    reading_mode         = "rtl",
    debug                = True,
    use_enhanced_ocr     = True,
    strict_grouping      = True,
    max_box_width_ratio  = 0.6,
    max_box_height_ratio = 0.5,
    auto_fix_bubbles     = True,
 )
 # ─────────────────────────────────────────────────────────────
 #  DYNAMIC MODULE LOADER
-# ─────────────────────────────────────────────
+# ─────────────────────────────────────────────────────────────
-def load_module(name, filepath):
+def load_module(name: str, filepath: str):
    spec = importlib.util.spec_from_file_location(name, filepath)
    if spec is None or spec.loader is None:
        raise FileNotFoundError(f"Cannot load spec for {filepath}")
@@ -25,103 +47,188 @@ def load_module(name, filepath):
    spec.loader.exec_module(module)
    return module
-# ─────────────────────────────────────────────
+
 # ─────────────────────────────────────────────────────────────
 #  HELPERS
-# ─────────────────────────────────────────────
+# ─────────────────────────────────────────────────────────────
-def sorted_pages(chapter_dir):
+def sorted_pages(chapter_dir: Path):
    exts = {".jpg", ".jpeg", ".png", ".webp"}
    pages = [
-        p for p in Path(chapter_dir).iterdir()
+        p for p in chapter_dir.iterdir()
        if p.is_file() and p.suffix.lower() in exts
    ]
    return sorted(pages, key=lambda p: p.stem)
-def make_page_workdir(chapter_dir, page_stem):
+
-    workdir = Path(chapter_dir) / "translated" / page_stem
+def make_page_workdir(chapter_dir: Path, page_stem: str) -> Path:
    workdir = chapter_dir / "translated" / page_stem
    workdir.mkdir(parents=True, exist_ok=True)
    return workdir
-# ─────────────────────────────────────────────
+
 def verify_translator_api(module) -> bool:
    """
    Checks that the loaded module exposes translate_manga_text()
    and that it accepts all keys defined in PIPELINE_CONFIG.
    Prints a warning for any missing parameter so mismatches are
    caught immediately rather than silently falling back to defaults.
    """
    import inspect
    fn = getattr(module, "translate_manga_text", None)
    if fn is None:
        print("❌ manga-translator.py does not expose translate_manga_text()")
        return False
    sig    = inspect.signature(fn)
    params = set(sig.parameters.keys())
    ok     = True
    for key in PIPELINE_CONFIG:
        if key not in params:
            print(f"⚠️  PIPELINE_CONFIG key '{key}' not found in "
                  f"translate_manga_text() — update pipeline or translator.")
            ok = False
    return ok
 # ─────────────────────────────────────────────────────────────
 #  PER-PAGE PIPELINE
-# ─────────────────────────────────────────────
+# ─────────────────────────────────────────────────────────────
-def process_page(page_path, workdir, translator_module):
+def process_page(page_path: Path, workdir: Path, translator_module) -> bool:
    print(f"\n{'─' * 70}")
-    print(f"PAGE: {page_path.name}")
+    print(f"  PAGE : {page_path.name}")
    print(f"{'─' * 70}")
    orig_dir = os.getcwd()
    try:
-        # Isolate execution to the specific page's folder
+        # Run inside the page's own workdir so debug images and
        # output files land there automatically.
        os.chdir(workdir)
        print("  ⏳ Extracting text and translating...")
        # 1) Translate using ONLY the required path arguments.
        # This forces the function to use its own internal default variables 
        # (like source_lang, target_lang, confidence_threshold) directly from manga-translator.py
        translator_module.translate_manga_text(
            image_path=str(page_path.resolve()),
            export_to_file="output.txt",
            export_bubbles_to="bubbles.json"
        )
        print("  ✅ Translation and OCR data saved successfully")
        translator_module.translate_manga_text(
            image_path       = str(page_path.resolve()),
            export_to_file   = "output.txt",
            export_bubbles_to= "bubbles.json",
            **PIPELINE_CONFIG,          # ← all settings from the single config dict
        )
        # Sanity-check that the expected outputs were actually written
        for fname in ("output.txt", "bubbles.json"):
            fpath = workdir / fname
            if not fpath.exists() or fpath.stat().st_size == 0:
                print(f"  ⚠️  {fname} is missing or empty after processing.")
        print("  ✅ Translation and OCR data saved successfully")
        return True
    except Exception as e:
        import traceback
        print(f"  ❌ Failed: {e}")
        traceback.print_exc()
        return False
    finally:
        os.chdir(orig_dir)
-# ─────────────────────────────────────────────
+
 # ─────────────────────────────────────────────────────────────
 #  MAIN
-# ─────────────────────────────────────────────
+# ─────────────────────────────────────────────────────────────
 def main():
-    parser = argparse.ArgumentParser(description="Manga Translation OCR Batch Pipeline")
+    parser = argparse.ArgumentParser(
-    parser.add_argument("chapter_dir", help="Path to the folder containing manga pages")
+        description="Manga Translation OCR Batch Pipeline"
    )
    parser.add_argument(
        "chapter_dir",
        help="Path to the folder containing manga page images"
    )
    parser.add_argument(
        "--start", type=int, default=1,
        help="Start from this page number (1-based, default: 1)"
    )
    parser.add_argument(
        "--end", type=int, default=None,
        help="Stop after this page number inclusive (default: all)"
    )
    args = parser.parse_args()
    chapter_dir = Path(args.chapter_dir).resolve()
    if not chapter_dir.is_dir():
        print(f"❌ Not a directory: {chapter_dir}")
        sys.exit(1)
-    print("Loading translator module...")
+    # ── Load translator module ────────────────────────────────
-    script_dir = Path(__file__).parent
+    script_dir  = Path(__file__).parent
-    
+    module_path = script_dir / "manga-translator.py"
    if not module_path.exists():
        print(f"❌ manga-translator.py not found in {script_dir}")
        sys.exit(1)
    print(f"📦 Loading translator from: {module_path}")
    try:
-        translator = load_module("manga_translator", str(script_dir / "manga-translator.py"))
+        translator = load_module("manga_translator", str(module_path))
    except Exception as e:
        print(f"❌ Could not load manga-translator.py: {e}")
        sys.exit(1)
-    pages = sorted_pages(chapter_dir)
+    # ── API compatibility check ───────────────────────────────
-    if not pages:
+    if not verify_translator_api(translator):
        print("❌ Aborting — fix the parameter mismatch above first.")
        sys.exit(1)
    # ── Discover pages ────────────────────────────────────────
    all_pages = sorted_pages(chapter_dir)
    if not all_pages:
        print(f"❌ No images found in: {chapter_dir}")
        sys.exit(1)
-    print(f"\n📖 Chapter : {chapter_dir.name}")
+    # Apply --start / --end slice (1-based, inclusive)
-    print(f"   Pages   : {len(pages)}")
+    start_idx = max(0, args.start - 1)
-    print("   Note    : Using translation settings directly from manga-translator.py\n")
+    end_idx   = args.end if args.end is not None else len(all_pages)
    pages     = all_pages[start_idx:end_idx]
    if not pages:
        print(f"❌ No pages in range [{args.start}, {args.end}]")
        sys.exit(1)
    # ── Summary header ────────────────────────────────────────
    print(f"\n{'═' * 70}")
    print(f"  📖 Chapter    : {chapter_dir.name}")
    print(f"  📄 Pages      : {len(pages)} "
          f"(of {len(all_pages)} total, "
          f"range {args.start}–{end_idx})")
    print(f"  🌐 Lang       : {PIPELINE_CONFIG['source_lang']} → "
          f"{PIPELINE_CONFIG['target_lang']}")
    print(f"  📖 Read order : {PIPELINE_CONFIG['reading_mode'].upper()}")
    print(f"  🔍 Enhanced   : {PIPELINE_CONFIG['use_enhanced_ocr']}")
    print(f"{'═' * 70}\n")
    succeeded, failed = [], []
    for i, page_path in enumerate(pages, start=1):
-        print(f"[{i}/{len(pages)}] Processing...")
+        print(f"[{i}/{len(pages)}] {page_path.name}")
        workdir = make_page_workdir(chapter_dir, page_path.stem)
-        
+
        if process_page(page_path, workdir, translator):
            succeeded.append(page_path.name)
        else:
            failed.append(page_path.name)
    # ── Final report ──────────────────────────────────────────
    print(f"\n{'═' * 70}")
-    print("PIPELINE COMPLETE")
+    print("  PIPELINE COMPLETE")
-    print(f"✅ {len(succeeded)} page(s) succeeded")
+    print(f"  ✅ {len(succeeded)} page(s) succeeded")
    if failed:
-        print(f"❌ {len(failed)} page(s) failed:")
+        print(f"  ❌ {len(failed)} page(s) failed:")
-        for f in failed:
+        for name in failed:
-            print(f"   • {f}")
+            print(f"     • {name}")
    print(f"{'═' * 70}\n")
 if __name__ == "__main__":
-    main()
+    main()
--- a/79
+++ b/79
@@ -1,79 +0,0 @@
 aistudio-sdk==0.3.8
 annotated-doc==0.0.4
 annotated-types==0.7.0
 anyio==4.13.0
 bce-python-sdk==0.9.70
 beautifulsoup4==4.14.3
 certifi==2026.2.25
 chardet==7.4.3
 charset-normalizer==3.4.7
 click==8.3.2
 colorlog==6.10.1
 crc32c==2.8
 deep-translator==1.11.4
 easyocr==1.7.2
 filelock==3.28.0
 fsspec==2026.3.0
 future==1.0.0
 h11==0.16.0
 hf-xet==1.4.3
 httpcore==1.0.9
 httpx==0.28.1
 huggingface_hub==1.10.2
 idna==3.11
 ImageIO==2.37.3
 imagesize==2.0.0
 Jinja2==3.1.6
 lazy-loader==0.5
 markdown-it-py==4.0.0
 MarkupSafe==3.0.3
 mdurl==0.1.2
 modelscope==1.35.4
 mpmath==1.3.0
 networkx==3.6.1
 ninja==1.13.0
 numpy==1.26.4
 opencv-contrib-python==4.10.0.84
 opencv-python==4.11.0.86
 opencv-python-headless==4.11.0.86
 opt-einsum==3.3.0
 packaging==26.1
 paddleocr==3.4.1
 paddlepaddle==3.3.1
 paddlex==3.4.3
 pandas==3.0.2
 pillow==12.2.0
 prettytable==3.17.0
 protobuf==7.34.1
 psutil==7.2.2
 py-cpuinfo==9.0.0
 pyclipper==1.4.0
 pycryptodome==3.23.0
 pydantic==2.13.1
 pydantic_core==2.46.1
 Pygments==2.20.0
 pypdfium2==5.7.0
 python-bidi==0.6.7
 python-dateutil==2.9.0.post0
 PyYAML==6.0.2
 requests==2.33.1
 rich==15.0.0
 ruamel.yaml==0.19.1
 safetensors==0.7.0
 scikit-image==0.26.0
 scipy==1.17.1
 shapely==2.1.2
 shellingham==1.5.4
 six==1.17.0
 soupsieve==2.8.3
 sympy==1.14.0
 tifffile==2026.3.3
 torch==2.11.0
 torchvision==0.26.0
 tqdm==4.67.3
 typer==0.24.1
 typing-inspection==0.4.2
 typing_extensions==4.15.0
 ujson==5.12.0
 urllib3==2.6.3
 wcwidth==0.6.0