Added stuff

2026-04-22 10:51:57 +02:00
parent 512bb32f66
commit b6b0df4774
4 changed files with 1543 additions and 1632 deletions
--- a/.gitignore
+++ b/.gitignore
@@ -9,6 +9,10 @@

 .venv311/

+Spy_x_Family_076/
+
+Dandadan_059/
+
 # Icon must end with two \r
 Icon

--- a/manga-translator.py
+++ b/manga-translator.py
--- a/pipeline-translator.py
+++ b/pipeline-translator.py
@@ -14,10 +14,32 @@ import argparse
 import importlib.util
 from pathlib import Path

-# ─────────────────────────────────────────────
+# ─────────────────────────────────────────────────────────────
+#  PIPELINE CONFIGURATION
+#  Single source of truth — mirrors the __main__ block in
+#  manga-translator.py so both entry points stay in sync.
+# ─────────────────────────────────────────────────────────────
+PIPELINE_CONFIG = dict(
+    source_lang          = "english",
+    target_lang          = "ca",
+    confidence_threshold = 0.03,
+    min_text_length      = 1,
+    gap_px               = "auto",
+    quality_threshold    = 0.62,
+    reading_mode         = "rtl",
+    debug                = True,
+    use_enhanced_ocr     = True,
+    strict_grouping      = True,
+    max_box_width_ratio  = 0.6,
+    max_box_height_ratio = 0.5,
+    auto_fix_bubbles     = True,
+)
+
+
+# ─────────────────────────────────────────────────────────────
 #  DYNAMIC MODULE LOADER
-# ─────────────────────────────────────────────
-def load_module(name, filepath):
+# ─────────────────────────────────────────────────────────────
+def load_module(name: str, filepath: str):
    spec = importlib.util.spec_from_file_location(name, filepath)
    if spec is None or spec.loader is None:
        raise FileNotFoundError(f"Cannot load spec for {filepath}")
@@ -25,103 +47,188 @@ def load_module(name, filepath):
    spec.loader.exec_module(module)
    return module

-# ─────────────────────────────────────────────
+
+# ─────────────────────────────────────────────────────────────
 #  HELPERS
-# ─────────────────────────────────────────────
-def sorted_pages(chapter_dir):
+# ─────────────────────────────────────────────────────────────
+def sorted_pages(chapter_dir: Path):
    exts = {".jpg", ".jpeg", ".png", ".webp"}
    pages = [
-        p for p in Path(chapter_dir).iterdir()
+        p for p in chapter_dir.iterdir()
        if p.is_file() and p.suffix.lower() in exts
    ]
    return sorted(pages, key=lambda p: p.stem)

-def make_page_workdir(chapter_dir, page_stem):
-    workdir = Path(chapter_dir) / "translated" / page_stem
+
+def make_page_workdir(chapter_dir: Path, page_stem: str) -> Path:
+    workdir = chapter_dir / "translated" / page_stem
    workdir.mkdir(parents=True, exist_ok=True)
    return workdir

-# ─────────────────────────────────────────────
+
+def verify_translator_api(module) -> bool:
+    """
+    Checks that the loaded module exposes translate_manga_text()
+    and that it accepts all keys defined in PIPELINE_CONFIG.
+    Prints a warning for any missing parameter so mismatches are
+    caught immediately rather than silently falling back to defaults.
+    """
+    import inspect
+
+    fn = getattr(module, "translate_manga_text", None)
+    if fn is None:
+        print("❌ manga-translator.py does not expose translate_manga_text()")
+        return False
+
+    sig    = inspect.signature(fn)
+    params = set(sig.parameters.keys())
+    ok     = True
+
+    for key in PIPELINE_CONFIG:
+        if key not in params:
+            print(f"⚠️  PIPELINE_CONFIG key '{key}' not found in "
+                  f"translate_manga_text() — update pipeline or translator.")
+            ok = False
+
+    return ok
+
+
+# ─────────────────────────────────────────────────────────────
 #  PER-PAGE PIPELINE
-# ─────────────────────────────────────────────
-def process_page(page_path, workdir, translator_module):
+# ─────────────────────────────────────────────────────────────
+def process_page(page_path: Path, workdir: Path, translator_module) -> bool:
    print(f"\n{'─' * 70}")
-    print(f"PAGE: {page_path.name}")
+    print(f"  PAGE : {page_path.name}")
    print(f"{'─' * 70}")

    orig_dir = os.getcwd()
    try:
-        # Isolate execution to the specific page's folder
+        # Run inside the page's own workdir so debug images and
+        # output files land there automatically.
        os.chdir(workdir)

        print("  ⏳ Extracting text and translating...")
-        
-        # 1) Translate using ONLY the required path arguments.
-        # This forces the function to use its own internal default variables 
-        # (like source_lang, target_lang, confidence_threshold) directly from manga-translator.py
-        translator_module.translate_manga_text(
-            image_path=str(page_path.resolve()),
-            export_to_file="output.txt",
-            export_bubbles_to="bubbles.json"
-        )
-        print("  ✅ Translation and OCR data saved successfully")

+        translator_module.translate_manga_text(
+            image_path       = str(page_path.resolve()),
+            export_to_file   = "output.txt",
+            export_bubbles_to= "bubbles.json",
+            **PIPELINE_CONFIG,          # ← all settings from the single config dict
+        )
+
+        # Sanity-check that the expected outputs were actually written
+        for fname in ("output.txt", "bubbles.json"):
+            fpath = workdir / fname
+            if not fpath.exists() or fpath.stat().st_size == 0:
+                print(f"  ⚠️  {fname} is missing or empty after processing.")
+
+        print("  ✅ Translation and OCR data saved successfully")
        return True

    except Exception as e:
+        import traceback
        print(f"  ❌ Failed: {e}")
+        traceback.print_exc()
        return False

    finally:
        os.chdir(orig_dir)

-# ─────────────────────────────────────────────
+
+# ─────────────────────────────────────────────────────────────
 #  MAIN
-# ─────────────────────────────────────────────
+# ─────────────────────────────────────────────────────────────
 def main():
-    parser = argparse.ArgumentParser(description="Manga Translation OCR Batch Pipeline")
-    parser.add_argument("chapter_dir", help="Path to the folder containing manga pages")
+    parser = argparse.ArgumentParser(
+        description="Manga Translation OCR Batch Pipeline"
+    )
+    parser.add_argument(
+        "chapter_dir",
+        help="Path to the folder containing manga page images"
+    )
+    parser.add_argument(
+        "--start", type=int, default=1,
+        help="Start from this page number (1-based, default: 1)"
+    )
+    parser.add_argument(
+        "--end", type=int, default=None,
+        help="Stop after this page number inclusive (default: all)"
+    )
    args = parser.parse_args()

    chapter_dir = Path(args.chapter_dir).resolve()
+    if not chapter_dir.is_dir():
+        print(f"❌ Not a directory: {chapter_dir}")
+        sys.exit(1)

-    print("Loading translator module...")
-    script_dir = Path(__file__).parent
-    
+    # ── Load translator module ────────────────────────────────
+    script_dir  = Path(__file__).parent
+    module_path = script_dir / "manga-translator.py"
+
+    if not module_path.exists():
+        print(f"❌ manga-translator.py not found in {script_dir}")
+        sys.exit(1)
+
+    print(f"📦 Loading translator from: {module_path}")
    try:
-        translator = load_module("manga_translator", str(script_dir / "manga-translator.py"))
+        translator = load_module("manga_translator", str(module_path))
    except Exception as e:
        print(f"❌ Could not load manga-translator.py: {e}")
        sys.exit(1)

-    pages = sorted_pages(chapter_dir)
-    if not pages:
+    # ── API compatibility check ───────────────────────────────
+    if not verify_translator_api(translator):
+        print("❌ Aborting — fix the parameter mismatch above first.")
+        sys.exit(1)
+
+    # ── Discover pages ────────────────────────────────────────
+    all_pages = sorted_pages(chapter_dir)
+    if not all_pages:
        print(f"❌ No images found in: {chapter_dir}")
        sys.exit(1)

-    print(f"\n📖 Chapter : {chapter_dir.name}")
-    print(f"   Pages   : {len(pages)}")
-    print("   Note    : Using translation settings directly from manga-translator.py\n")
+    # Apply --start / --end slice (1-based, inclusive)
+    start_idx = max(0, args.start - 1)
+    end_idx   = args.end if args.end is not None else len(all_pages)
+    pages     = all_pages[start_idx:end_idx]
+
+    if not pages:
+        print(f"❌ No pages in range [{args.start}, {args.end}]")
+        sys.exit(1)
+
+    # ── Summary header ────────────────────────────────────────
+    print(f"\n{'═' * 70}")
+    print(f"  📖 Chapter    : {chapter_dir.name}")
+    print(f"  📄 Pages      : {len(pages)} "
+          f"(of {len(all_pages)} total, "
+          f"range {args.start}–{end_idx})")
+    print(f"  🌐 Lang       : {PIPELINE_CONFIG['source_lang']} → "
+          f"{PIPELINE_CONFIG['target_lang']}")
+    print(f"  📖 Read order : {PIPELINE_CONFIG['reading_mode'].upper()}")
+    print(f"  🔍 Enhanced   : {PIPELINE_CONFIG['use_enhanced_ocr']}")
+    print(f"{'═' * 70}\n")

    succeeded, failed = [], []

    for i, page_path in enumerate(pages, start=1):
-        print(f"[{i}/{len(pages)}] Processing...")
+        print(f"[{i}/{len(pages)}] {page_path.name}")
        workdir = make_page_workdir(chapter_dir, page_path.stem)
-        
+
        if process_page(page_path, workdir, translator):
            succeeded.append(page_path.name)
        else:
            failed.append(page_path.name)

+    # ── Final report ──────────────────────────────────────────
    print(f"\n{'═' * 70}")
-    print("PIPELINE COMPLETE")
-    print(f"✅ {len(succeeded)} page(s) succeeded")
+    print("  PIPELINE COMPLETE")
+    print(f"  ✅ {len(succeeded)} page(s) succeeded")
    if failed:
-        print(f"❌ {len(failed)} page(s) failed:")
-        for f in failed:
-            print(f"   • {f}")
+        print(f"  ❌ {len(failed)} page(s) failed:")
+        for name in failed:
+            print(f"     • {name}")
    print(f"{'═' * 70}\n")

+
 if __name__ == "__main__":
-    main()
+    main()
--- a/79
+++ b/79
@@ -1,79 +0,0 @@
-aistudio-sdk==0.3.8
-annotated-doc==0.0.4
-annotated-types==0.7.0
-anyio==4.13.0
-bce-python-sdk==0.9.70
-beautifulsoup4==4.14.3
-certifi==2026.2.25
-chardet==7.4.3
-charset-normalizer==3.4.7
-click==8.3.2
-colorlog==6.10.1
-crc32c==2.8
-deep-translator==1.11.4
-easyocr==1.7.2
-filelock==3.28.0
-fsspec==2026.3.0
-future==1.0.0
-h11==0.16.0
-hf-xet==1.4.3
-httpcore==1.0.9
-httpx==0.28.1
-huggingface_hub==1.10.2
-idna==3.11
-ImageIO==2.37.3
-imagesize==2.0.0
-Jinja2==3.1.6
-lazy-loader==0.5
-markdown-it-py==4.0.0
-MarkupSafe==3.0.3
-mdurl==0.1.2
-modelscope==1.35.4
-mpmath==1.3.0
-networkx==3.6.1
-ninja==1.13.0
-numpy==1.26.4
-opencv-contrib-python==4.10.0.84
-opencv-python==4.11.0.86
-opencv-python-headless==4.11.0.86
-opt-einsum==3.3.0
-packaging==26.1
-paddleocr==3.4.1
-paddlepaddle==3.3.1
-paddlex==3.4.3
-pandas==3.0.2
-pillow==12.2.0
-prettytable==3.17.0
-protobuf==7.34.1
-psutil==7.2.2
-py-cpuinfo==9.0.0
-pyclipper==1.4.0
-pycryptodome==3.23.0
-pydantic==2.13.1
-pydantic_core==2.46.1
-Pygments==2.20.0
-pypdfium2==5.7.0
-python-bidi==0.6.7
-python-dateutil==2.9.0.post0
-PyYAML==6.0.2
-requests==2.33.1
-rich==15.0.0
-ruamel.yaml==0.19.1
-safetensors==0.7.0
-scikit-image==0.26.0
-scipy==1.17.1
-shapely==2.1.2
-shellingham==1.5.4
-six==1.17.0
-soupsieve==2.8.3
-sympy==1.14.0
-tifffile==2026.3.3
-torch==2.11.0
-torchvision==0.26.0
-tqdm==4.67.3
-typer==0.24.1
-typing-inspection==0.4.2
-typing_extensions==4.15.0
-ujson==5.12.0
-urllib3==2.6.3
-wcwidth==0.6.0