Compare commits
23 Commits
5ef8c39f69
...
main
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
037dadd920 | ||
|
|
285e9ca393 | ||
|
|
d77db83cfe | ||
|
|
b730037a06 | ||
|
|
7837aeaa9b | ||
|
|
455b4ad82c | ||
|
|
b6b0df4774 | ||
|
|
512bb32f66 | ||
|
|
494631c967 | ||
|
|
27a3e6f98a | ||
|
|
f00647e668 | ||
|
|
a5c81f4ff0 | ||
|
|
f56ee49abf | ||
|
|
ba5f001e75 | ||
|
|
2fb5e9eb7b | ||
|
|
dfa52f54eb | ||
|
|
bd475d8f01 | ||
|
|
f753a78ba4 | ||
|
|
3800f6cf3f | ||
|
|
beb8557e19 | ||
|
|
39765a6cf1 | ||
|
|
5aa79d986a | ||
|
|
dd1cf54f86 |
6
.gitignore
vendored
6
.gitignore
vendored
@@ -9,6 +9,11 @@
|
||||
|
||||
.venv311/
|
||||
|
||||
#Folders to test
|
||||
Spy_x_Family_076/
|
||||
Dandadan_059/
|
||||
Lv999/
|
||||
|
||||
# Icon must end with two \r
|
||||
Icon
|
||||
|
||||
@@ -23,6 +28,7 @@ Icon
|
||||
*.jpg
|
||||
*.jpeg
|
||||
*.json
|
||||
*.webp
|
||||
|
||||
# Files that might appear in the root of a volume
|
||||
.DocumentRevisions-V100
|
||||
|
||||
53
README.md
53
README.md
@@ -0,0 +1,53 @@
|
||||
# Manga Translator OCR Pipeline
|
||||
|
||||
A robust manga/comic OCR + translation pipeline with:
|
||||
|
||||
- EasyOCR (default, reliable on macOS M1)
|
||||
- Optional PaddleOCR (auto-fallback if unavailable)
|
||||
- Bubble clustering and line-level boxes
|
||||
- Robust reread pass (multi-preprocessing + slight rotation)
|
||||
- Translation export + debug overlays
|
||||
|
||||
---
|
||||
|
||||
## ✨ Features
|
||||
|
||||
- OCR from raw manga pages
|
||||
- Noise filtering (`BOX` debug artifacts, tiny garbage tokens, symbols)
|
||||
- Speech bubble grouping
|
||||
- Reading order estimation (`ltr` / `rtl`)
|
||||
- Translation output (`output.txt`)
|
||||
- Structured bubble metadata (`bubbles.json`)
|
||||
- Visual debug output (`debug_clusters.png`)
|
||||
|
||||
---
|
||||
|
||||
## 🧰 Requirements
|
||||
|
||||
- macOS (Apple Silicon supported)
|
||||
- Python **3.11** recommended
|
||||
- Homebrew (for Python install)
|
||||
|
||||
---
|
||||
|
||||
## 🚀 Setup (Python 3.11 venv)
|
||||
|
||||
```bash
|
||||
cd /path/to/manga-translator
|
||||
|
||||
# 1) Create venv with 3.11
|
||||
/opt/homebrew/bin/python3.11 -m venv venv
|
||||
|
||||
# 2) Activate
|
||||
source venv/bin/activate
|
||||
|
||||
# 3) Verify interpreter
|
||||
python -V
|
||||
# expected: Python 3.11.x
|
||||
|
||||
# 4) Install dependencies
|
||||
python -m pip install --upgrade pip setuptools wheel
|
||||
python -m pip install -r requirements.txt
|
||||
|
||||
# Optional Paddle runtime
|
||||
python -m pip install paddlepaddle || true
|
||||
|
||||
BIN
fonts/ComicNeue-Bold.ttf
Executable file
BIN
fonts/ComicNeue-Bold.ttf
Executable file
Binary file not shown.
Binary file not shown.
BIN
fonts/ComicRelief-Bold.ttf
Executable file
BIN
fonts/ComicRelief-Bold.ttf
Executable file
Binary file not shown.
Binary file not shown.
BIN
fonts/Komika.ttf
Normal file
BIN
fonts/Komika.ttf
Normal file
Binary file not shown.
BIN
fonts/Laffayette_Comic_Pro.ttf
Normal file
BIN
fonts/Laffayette_Comic_Pro.ttf
Normal file
Binary file not shown.
BIN
fonts/animeace2_bld.ttf
Normal file
BIN
fonts/animeace2_bld.ttf
Normal file
Binary file not shown.
BIN
fonts/animeace2_ital.ttf
Normal file
BIN
fonts/animeace2_ital.ttf
Normal file
Binary file not shown.
BIN
fonts/animeace2_reg.ttf
Normal file
BIN
fonts/animeace2_reg.ttf
Normal file
Binary file not shown.
@@ -1,509 +1,357 @@
|
||||
import os
|
||||
#!/usr/bin/env python3
|
||||
# -*- coding: utf-8 -*-
|
||||
"""
|
||||
manga-renderer.py
|
||||
|
||||
Inputs: 001.jpg + bubbles.json + output_001.txt
|
||||
Output: translated_page_001.png
|
||||
|
||||
Strategy:
|
||||
1. For every bubble, white-fill all its OCR quads (erases original text cleanly).
|
||||
2. Detect the original font size from the OCR bounding boxes.
|
||||
3. Dynamically wrap and scale down the translated text if it exceeds the bubble dimensions.
|
||||
4. Render the translated text centered inside the bubble bounding box.
|
||||
5. Uses uniform line heights (ascent + descent) to prevent Catalan accent collisions (È, À).
|
||||
6. Adds a dynamic white stroke to the text to cover any residual original characters.
|
||||
"""
|
||||
|
||||
import json
|
||||
import re
|
||||
import textwrap
|
||||
import cv2
|
||||
import numpy as np
|
||||
from PIL import Image, ImageDraw, ImageFont
|
||||
from typing import Dict, List, Tuple, Optional, Set, Any
|
||||
|
||||
# ============================================================
|
||||
# CONFIG — edit these paths to match your setup
|
||||
# ============================================================
|
||||
IMAGE_PATH = "004.png"
|
||||
BUBBLES_PATH = "bubbles_004.json"
|
||||
TRANSLATIONS_PATH = "output_004.txt"
|
||||
OUTPUT_PATH = "translated_page_004.png"
|
||||
|
||||
# ─────────────────────────────────────────────
|
||||
# CONFIG
|
||||
# ─────────────────────────────────────────────
|
||||
DEFAULT_FONT_CANDIDATES = [
|
||||
"fonts/ComicRelief-Regular.ttf",
|
||||
"fonts/ComicNeue-Regular.ttf",
|
||||
# Font candidates — Prioritizes Laffayette for Catalan, with safe fallbacks
|
||||
FONT_CANDIDATES = [
|
||||
"fonts/animeace2_reg.ttf",
|
||||
"fonts/ComicNeue-Bold.ttf",
|
||||
]
|
||||
DEFAULT_FONT_COLOR = (0, 0, 0)
|
||||
DEFAULT_STROKE_COLOR = (255, 255, 255)
|
||||
|
||||
MAX_FONT_SIZE = 20
|
||||
MIN_FONT_SIZE = 6
|
||||
DEFAULT_FONT_SIZE = 18
|
||||
MIN_FONT_SIZE = 8
|
||||
QUAD_PAD = 4 # extra pixels added around each quad before white-fill
|
||||
|
||||
# Guarantee full wipe of yellow squares
|
||||
YELLOW_BOX_PAD_X = 1
|
||||
YELLOW_BOX_PAD_Y = 1
|
||||
YELLOW_UNION_PAD_X = 4
|
||||
YELLOW_UNION_PAD_Y = 4
|
||||
# ============================================================
|
||||
# SKIP LIST
|
||||
# ============================================================
|
||||
SKIP_BUBBLE_IDS: Set[int] = {
|
||||
# Add any bubble IDs you do NOT want rendered here.
|
||||
}
|
||||
|
||||
# Optional extra cleanup expansion
|
||||
ENABLE_EXTRA_CLEAN = True
|
||||
EXTRA_DILATE_ITERS = 1
|
||||
EXTRA_CLOSE_ITERS = 1
|
||||
# ============================================================
|
||||
# FONT LOADER
|
||||
# ============================================================
|
||||
def load_font(path: str, size: int) -> Optional[ImageFont.FreeTypeFont]:
|
||||
"""Try every face index in a .ttc collection. Validate with getbbox."""
|
||||
indices = range(4) if path.lower().endswith(".ttc") else [0]
|
||||
for idx in indices:
|
||||
try:
|
||||
font = ImageFont.truetype(path, size, index=idx)
|
||||
font.getbbox("A") # raises if face metrics are broken
|
||||
return font
|
||||
except Exception:
|
||||
continue
|
||||
return None
|
||||
|
||||
# Bubble detection (for optional extra mask / border preservation)
|
||||
FLOOD_TOL = 30
|
||||
def resolve_font_path() -> str:
|
||||
"""Return the path for the first working candidate."""
|
||||
for candidate in FONT_CANDIDATES:
|
||||
if load_font(candidate, DEFAULT_FONT_SIZE) is not None:
|
||||
print(f" ✅ Font: {candidate}")
|
||||
return candidate
|
||||
print(" ⚠️ No TrueType font found — using Pillow bitmap fallback")
|
||||
return ""
|
||||
|
||||
# Border restoration: keep very conservative
|
||||
ENABLE_EDGE_RESTORE = True
|
||||
EDGE_RESTORE_DILATE = 1
|
||||
|
||||
# Text layout inside yellow-union
|
||||
TEXT_INSET = 0.92
|
||||
|
||||
|
||||
# ─────────────────────────────────────────────
|
||||
# PARSERS
|
||||
# ─────────────────────────────────────────────
|
||||
def parse_translations(translations_file):
|
||||
# ============================================================
|
||||
# PARSERS
|
||||
# ============================================================
|
||||
def parse_translations(filepath: str) -> Dict[int, str]:
|
||||
"""
|
||||
Reads output.txt and returns {bubble_id: translated_text}.
|
||||
Lines look like: #2|1|vision-base|ORIGINAL|TRANSLATED|FLAGS
|
||||
"""
|
||||
translations = {}
|
||||
originals = {}
|
||||
flags_map = {}
|
||||
|
||||
with open(translations_file, "r", encoding="utf-8") as f:
|
||||
with open(filepath, "r", encoding="utf-8") as f:
|
||||
for line in f:
|
||||
line = line.strip()
|
||||
if not line.startswith("#"):
|
||||
continue
|
||||
|
||||
parts = line.split("|")
|
||||
if len(parts) < 5:
|
||||
continue
|
||||
try:
|
||||
bubble_id = int(parts[0].lstrip("#"))
|
||||
except Exception:
|
||||
bid = int(parts[0].lstrip("#"))
|
||||
translated = parts[4].strip()
|
||||
if translated and translated != "-":
|
||||
translations[bid] = translated
|
||||
except ValueError:
|
||||
continue
|
||||
return translations
|
||||
|
||||
if len(parts) >= 5:
|
||||
original = parts[2].strip()
|
||||
translated = parts[3].strip()
|
||||
flags = parts[4].strip()
|
||||
elif len(parts) >= 4:
|
||||
original = parts[2].strip()
|
||||
translated = parts[3].strip()
|
||||
flags = "-"
|
||||
elif len(parts) >= 3:
|
||||
original = ""
|
||||
translated = parts[2].strip()
|
||||
flags = "-"
|
||||
else:
|
||||
continue
|
||||
def parse_bubbles(filepath: str):
|
||||
"""Returns the full JSON data."""
|
||||
with open(filepath, "r", encoding="utf-8") as f:
|
||||
data = json.load(f)
|
||||
return data
|
||||
|
||||
if translated.startswith("["):
|
||||
continue
|
||||
|
||||
translations[bubble_id] = translated
|
||||
originals[bubble_id] = original
|
||||
flags_map[bubble_id] = flags
|
||||
|
||||
return translations, originals, flags_map
|
||||
|
||||
|
||||
def parse_bubbles(bubbles_file):
|
||||
with open(bubbles_file, "r", encoding="utf-8") as f:
|
||||
raw = json.load(f)
|
||||
return {int(k): v for k, v in raw.items()}
|
||||
|
||||
|
||||
# ─────────────────────────────────────────────
|
||||
# HELPERS
|
||||
# ─────────────────────────────────────────────
|
||||
def clamp(v, lo, hi):
|
||||
return max(lo, min(hi, v))
|
||||
|
||||
|
||||
def xywh_to_xyxy(box):
|
||||
if not box:
|
||||
return None
|
||||
x = int(box.get("x", 0))
|
||||
y = int(box.get("y", 0))
|
||||
w = int(box.get("w", 0))
|
||||
h = int(box.get("h", 0))
|
||||
return (x, y, x + w, y + h)
|
||||
|
||||
|
||||
def union_xyxy(boxes):
|
||||
boxes = [b for b in boxes if b is not None]
|
||||
if not boxes:
|
||||
return None
|
||||
x1 = min(b[0] for b in boxes)
|
||||
y1 = min(b[1] for b in boxes)
|
||||
x2 = max(b[2] for b in boxes)
|
||||
y2 = max(b[3] for b in boxes)
|
||||
if x2 <= x1 or y2 <= y1:
|
||||
return None
|
||||
return (x1, y1, x2, y2)
|
||||
|
||||
|
||||
def bbox_from_mask(mask):
|
||||
ys, xs = np.where(mask > 0)
|
||||
if len(xs) == 0:
|
||||
return None
|
||||
return (int(xs.min()), int(ys.min()), int(xs.max()) + 1, int(ys.max()) + 1)
|
||||
|
||||
|
||||
def normalize_text(s):
|
||||
t = s.upper().strip()
|
||||
t = re.sub(r"[^\w]+", "", t)
|
||||
return t
|
||||
|
||||
|
||||
def is_sfx_like(text):
|
||||
t = normalize_text(text)
|
||||
return bool(len(t) <= 8 and re.fullmatch(r"(SHA+|BIP+|BEEP+|HN+|AH+|OH+)", t))
|
||||
|
||||
|
||||
# ─────────────────────────────────────────────
|
||||
# FONT
|
||||
# ─────────────────────────────────────────────
|
||||
def load_font_from_candidates(candidates, size):
|
||||
for path in candidates:
|
||||
if path and os.path.exists(path):
|
||||
try:
|
||||
return ImageFont.truetype(path, size), path
|
||||
except Exception:
|
||||
continue
|
||||
return ImageFont.load_default(), "PIL_DEFAULT"
|
||||
|
||||
|
||||
def measure_text(draw, text, font):
|
||||
bb = draw.textbbox((0, 0), text, font=font)
|
||||
return bb[2] - bb[0], bb[3] - bb[1]
|
||||
|
||||
|
||||
def wrap_text(draw, text, font, max_width):
|
||||
words = text.split()
|
||||
lines = []
|
||||
cur = ""
|
||||
|
||||
for w in words:
|
||||
test = (cur + " " + w).strip()
|
||||
tw, _ = measure_text(draw, test, font)
|
||||
if tw <= max_width or not cur:
|
||||
cur = test
|
||||
else:
|
||||
lines.append(cur)
|
||||
cur = w
|
||||
if cur:
|
||||
lines.append(cur)
|
||||
|
||||
if not lines:
|
||||
return [""], 0, 0
|
||||
|
||||
widths = []
|
||||
heights = []
|
||||
for ln in lines:
|
||||
lw, lh = measure_text(draw, ln, font)
|
||||
widths.append(lw)
|
||||
heights.append(lh)
|
||||
|
||||
gap = max(2, heights[0] // 5)
|
||||
total_h = sum(heights) + gap * (len(lines) - 1)
|
||||
return lines, total_h, max(widths)
|
||||
|
||||
|
||||
def fit_font(draw, text, font_candidates, safe_w, safe_h):
|
||||
for size in range(MAX_FONT_SIZE, MIN_FONT_SIZE - 1, -1):
|
||||
font, _ = load_font_from_candidates(font_candidates, size)
|
||||
lines, total_h, max_w = wrap_text(draw, text, font, safe_w)
|
||||
if total_h <= safe_h and max_w <= safe_w:
|
||||
return font, lines, total_h
|
||||
|
||||
font, _ = load_font_from_candidates(font_candidates, MIN_FONT_SIZE)
|
||||
lines, total_h, _ = wrap_text(draw, text, font, safe_w)
|
||||
return font, lines, total_h
|
||||
|
||||
|
||||
def draw_text_with_stroke(draw, pos, text, font, fill, stroke_fill):
|
||||
x, y = pos
|
||||
_, h = measure_text(draw, text, font)
|
||||
sw = 2 if h <= 11 else 1
|
||||
|
||||
for dx in range(-sw, sw + 1):
|
||||
for dy in range(-sw, sw + 1):
|
||||
if dx == 0 and dy == 0:
|
||||
continue
|
||||
draw.text((x + dx, y + dy), text, font=font, fill=stroke_fill)
|
||||
|
||||
draw.text((x, y), text, font=font, fill=fill)
|
||||
|
||||
|
||||
# ─────────────────────────────────────────────
|
||||
# MASK BUILDERS
|
||||
# ─────────────────────────────────────────────
|
||||
def build_yellow_mask(bubble_data, img_h, img_w):
|
||||
# ============================================================
|
||||
# ERASE — white-fill every OCR quad (with small padding)
|
||||
# ============================================================
|
||||
def erase_quads(
|
||||
image_bgr,
|
||||
bubbles_data: Dict[str, dict],
|
||||
translations: Dict[int, str],
|
||||
skip_ids: Set[int],
|
||||
pad: int = QUAD_PAD
|
||||
):
|
||||
"""
|
||||
HARD GUARANTEE:
|
||||
Returned mask always covers all yellow squares (line_bboxes).
|
||||
White-fills OCR quads ONLY for bubbles that:
|
||||
- have a translation in output.txt AND
|
||||
- are NOT in SKIP_BUBBLE_IDS
|
||||
"""
|
||||
mask = np.zeros((img_h, img_w), dtype=np.uint8)
|
||||
ih, iw = image_bgr.shape[:2]
|
||||
result = image_bgr.copy()
|
||||
|
||||
# Preferred: exact line boxes
|
||||
line_boxes = bubble_data.get("line_bboxes", [])
|
||||
for lb in line_boxes:
|
||||
b = xywh_to_xyxy(lb)
|
||||
if not b:
|
||||
erased_count = 0
|
||||
skipped_count = 0
|
||||
|
||||
for bid_str, val in bubbles_data.items():
|
||||
bid = int(bid_str)
|
||||
quads = val.get("quads", [])
|
||||
|
||||
if bid in skip_ids or bid not in translations:
|
||||
skipped_count += 1
|
||||
continue
|
||||
x1, y1, x2, y2 = b
|
||||
x1 -= YELLOW_BOX_PAD_X
|
||||
y1 -= YELLOW_BOX_PAD_Y
|
||||
x2 += YELLOW_BOX_PAD_X
|
||||
y2 += YELLOW_BOX_PAD_Y
|
||||
x1 = clamp(x1, 0, img_w - 1)
|
||||
y1 = clamp(y1, 0, img_h - 1)
|
||||
x2 = clamp(x2, 1, img_w)
|
||||
y2 = clamp(y2, 1, img_h)
|
||||
if x2 > x1 and y2 > y1:
|
||||
cv2.rectangle(mask, (x1, y1), (x2, y2), 255, -1)
|
||||
|
||||
# If no line boxes available, use line_union fallback
|
||||
if np.count_nonzero(mask) == 0:
|
||||
ub = xywh_to_xyxy(bubble_data.get("line_union_bbox"))
|
||||
if ub:
|
||||
x1, y1, x2, y2 = ub
|
||||
x1 -= YELLOW_UNION_PAD_X
|
||||
y1 -= YELLOW_UNION_PAD_Y
|
||||
x2 += YELLOW_UNION_PAD_X
|
||||
y2 += YELLOW_UNION_PAD_Y
|
||||
x1 = clamp(x1, 0, img_w - 1)
|
||||
y1 = clamp(y1, 0, img_h - 1)
|
||||
x2 = clamp(x2, 1, img_w)
|
||||
y2 = clamp(y2, 1, img_h)
|
||||
if x2 > x1 and y2 > y1:
|
||||
cv2.rectangle(mask, (x1, y1), (x2, y2), 255, -1)
|
||||
for quad in quads:
|
||||
pts = np.array(quad, dtype=np.int32)
|
||||
cv2.fillPoly(result, [pts], (255, 255, 255))
|
||||
|
||||
# Last fallback: text_bbox
|
||||
if np.count_nonzero(mask) == 0:
|
||||
tb = xywh_to_xyxy(bubble_data.get("text_bbox"))
|
||||
if tb:
|
||||
x1, y1, x2, y2 = tb
|
||||
x1 -= YELLOW_UNION_PAD_X
|
||||
y1 -= YELLOW_UNION_PAD_Y
|
||||
x2 += YELLOW_UNION_PAD_X
|
||||
y2 += YELLOW_UNION_PAD_Y
|
||||
x1 = clamp(x1, 0, img_w - 1)
|
||||
y1 = clamp(y1, 0, img_h - 1)
|
||||
x2 = clamp(x2, 1, img_w)
|
||||
y2 = clamp(y2, 1, img_h)
|
||||
if x2 > x1 and y2 > y1:
|
||||
cv2.rectangle(mask, (x1, y1), (x2, y2), 255, -1)
|
||||
xs = [p[0] for p in quad]; ys = [p[1] for p in quad]
|
||||
x1 = max(0, min(xs) - pad)
|
||||
y1 = max(0, min(ys) - pad)
|
||||
x2 = min(iw - 1, max(xs) + pad)
|
||||
y2 = min(ih - 1, max(ys) + pad)
|
||||
cv2.rectangle(result, (x1, y1), (x2, y2), (255, 255, 255), -1)
|
||||
|
||||
return mask
|
||||
erased_count += 1
|
||||
|
||||
print(f" Erased : {erased_count} bubbles")
|
||||
print(f" Ignored: {skipped_count} bubbles (no translation or in skip list)")
|
||||
return result
|
||||
|
||||
def bubble_interior_mask(img_bgr, bubble_data):
|
||||
# ============================================================
|
||||
# DYNAMIC TEXT FITTING
|
||||
# ============================================================
|
||||
def get_original_font_size(bubble_data: dict, fallback_size: int = DEFAULT_FONT_SIZE) -> int:
|
||||
"""Calculates the original font size based on the OCR bounding boxes."""
|
||||
line_bboxes = bubble_data.get("line_bboxes", [])
|
||||
if not line_bboxes:
|
||||
return fallback_size
|
||||
|
||||
heights = [box["h"] for box in line_bboxes]
|
||||
median_h = int(np.median(heights))
|
||||
|
||||
estimated_size = int(median_h * 0.85)
|
||||
return max(MIN_FONT_SIZE, min(estimated_size, 60))
|
||||
|
||||
def fit_text_dynamically(
|
||||
text: str,
|
||||
font_path: str,
|
||||
max_w: int,
|
||||
max_h: int,
|
||||
target_font_size: int
|
||||
) -> Tuple[List[str], Any, int, int]:
|
||||
"""
|
||||
Optional helper to expand clean region safely; never used to shrink yellow coverage.
|
||||
Wraps text and scales down font size if it exceeds the bubble dimensions.
|
||||
Returns: (wrapped_lines, font_object, line_spacing, final_font_size)
|
||||
"""
|
||||
h, w = img_bgr.shape[:2]
|
||||
font_size = target_font_size
|
||||
|
||||
panel = xywh_to_xyxy(bubble_data.get("panel_bbox"))
|
||||
if panel is None:
|
||||
panel = (0, 0, w, h)
|
||||
px1, py1, px2, py2 = panel
|
||||
if not font_path:
|
||||
font = ImageFont.load_default()
|
||||
char_w = 6
|
||||
chars_per_line = max(1, int(max_w / char_w))
|
||||
wrapped_lines = textwrap.wrap(text, width=chars_per_line)
|
||||
return wrapped_lines, font, 4, 10
|
||||
|
||||
seed = bubble_data.get("seed_point", {})
|
||||
sx = int(seed.get("x", bubble_data.get("x", 0) + bubble_data.get("w", 1) // 2))
|
||||
sy = int(seed.get("y", bubble_data.get("y", 0) + bubble_data.get("h", 1) // 2))
|
||||
sx = clamp(sx, 1, w - 2)
|
||||
sy = clamp(sy, 1, h - 2)
|
||||
while font_size >= MIN_FONT_SIZE:
|
||||
font = load_font(font_path, font_size)
|
||||
if font is None:
|
||||
font = ImageFont.load_default()
|
||||
return [text], font, 4, 10
|
||||
|
||||
gray = cv2.cvtColor(img_bgr, cv2.COLOR_BGR2GRAY)
|
||||
_, binary = cv2.threshold(gray, 200, 255, cv2.THRESH_BINARY)
|
||||
char_bbox = font.getbbox("A")
|
||||
char_w = (char_bbox[2] - char_bbox[0]) or 10
|
||||
chars_per_line = max(1, int((max_w * 0.95) / char_w))
|
||||
|
||||
panel_bin = np.zeros_like(binary)
|
||||
panel_bin[py1:py2, px1:px2] = binary[py1:py2, px1:px2]
|
||||
wrapped_lines = textwrap.wrap(text, width=chars_per_line)
|
||||
|
||||
# if seed on dark pixel, search nearby white
|
||||
if gray[sy, sx] < 150:
|
||||
found = False
|
||||
search_r = max(2, min(bubble_data.get("w", 20), bubble_data.get("h", 20)) // 3)
|
||||
for r in range(1, search_r + 1):
|
||||
for dy in range(-r, r + 1):
|
||||
for dx in range(-r, r + 1):
|
||||
nx, ny = sx + dx, sy + dy
|
||||
if px1 <= nx < px2 and py1 <= ny < py2 and gray[ny, nx] >= 200:
|
||||
sx, sy = nx, ny
|
||||
found = True
|
||||
break
|
||||
if found:
|
||||
break
|
||||
if found:
|
||||
break
|
||||
# Use uniform font metrics for height to protect accents like È
|
||||
line_spacing = max(2, int(font_size * 0.15))
|
||||
if hasattr(font, 'getmetrics'):
|
||||
ascent, descent = font.getmetrics()
|
||||
line_h = ascent + descent
|
||||
else:
|
||||
line_h = font_size
|
||||
|
||||
if not found:
|
||||
m = np.zeros((h, w), dtype=np.uint8)
|
||||
bx = bubble_data.get("x", 0)
|
||||
by = bubble_data.get("y", 0)
|
||||
bw = bubble_data.get("w", 20)
|
||||
bh = bubble_data.get("h", 20)
|
||||
cv2.ellipse(m, (bx + bw // 2, by + bh // 2), (max(4, bw // 2), max(4, bh // 2)), 0, 0, 360, 255, -1)
|
||||
return m
|
||||
total_h = (line_h * len(wrapped_lines)) + (line_spacing * max(0, len(wrapped_lines) - 1))
|
||||
|
||||
ff_mask = np.zeros((h + 2, w + 2), dtype=np.uint8)
|
||||
flood = panel_bin.copy()
|
||||
cv2.floodFill(
|
||||
flood, ff_mask, (sx, sy), 255,
|
||||
loDiff=FLOOD_TOL, upDiff=FLOOD_TOL,
|
||||
flags=cv2.FLOODFILL_FIXED_RANGE
|
||||
max_line_w = 0
|
||||
for line in wrapped_lines:
|
||||
bbox = font.getbbox(line)
|
||||
lw = bbox[2] - bbox[0]
|
||||
max_line_w = max(max_line_w, lw)
|
||||
|
||||
if max_line_w <= max_w and total_h <= max_h:
|
||||
return wrapped_lines, font, line_spacing, font_size
|
||||
|
||||
font_size -= 2
|
||||
|
||||
font = load_font(font_path, MIN_FONT_SIZE) or ImageFont.load_default()
|
||||
char_bbox = font.getbbox("A") if hasattr(font, 'getbbox') else (0,0,6,10)
|
||||
char_w = (char_bbox[2] - char_bbox[0]) or 6
|
||||
chars_per_line = max(1, int(max_w / char_w))
|
||||
wrapped_lines = textwrap.wrap(text, width=chars_per_line)
|
||||
|
||||
return wrapped_lines, font, max(2, int(MIN_FONT_SIZE * 0.15)), MIN_FONT_SIZE
|
||||
|
||||
# ============================================================
|
||||
# RENDER
|
||||
# ============================================================
|
||||
def render_text(
|
||||
image_bgr,
|
||||
bubbles_data: Dict[str, dict],
|
||||
translations: Dict[int, str],
|
||||
font_path: str,
|
||||
skip_ids: Set[int]
|
||||
):
|
||||
"""
|
||||
Draws the translated text centered in the line_union_bbox of each bubble.
|
||||
Adds a dynamic white stroke (outline) to cover any residual original characters.
|
||||
"""
|
||||
image_rgb = cv2.cvtColor(image_bgr, cv2.COLOR_BGR2RGB)
|
||||
pil_img = Image.fromarray(image_rgb)
|
||||
draw = ImageDraw.Draw(pil_img)
|
||||
|
||||
rendered_count = 0
|
||||
|
||||
for bid_str, val in bubbles_data.items():
|
||||
bid = int(bid_str)
|
||||
|
||||
if bid in skip_ids or bid not in translations:
|
||||
continue
|
||||
|
||||
text = translations[bid]
|
||||
|
||||
union_box = val.get("line_union_bbox")
|
||||
if not union_box:
|
||||
union_box = val.get("text_bbox")
|
||||
if not union_box:
|
||||
continue
|
||||
|
||||
bx, by, bw, bh = union_box["x"], union_box["y"], union_box["w"], union_box["h"]
|
||||
|
||||
pad_x = int(bw * 0.1)
|
||||
pad_y = int(bh * 0.1)
|
||||
bx -= pad_x // 2
|
||||
by -= pad_y // 2
|
||||
bw += pad_x
|
||||
bh += pad_y
|
||||
|
||||
target_size = get_original_font_size(val)
|
||||
wrapped_lines, font, line_spacing, final_size = fit_text_dynamically(text, font_path, bw, bh, target_size)
|
||||
|
||||
# Use uniform typographic line height for rendering to protect accents
|
||||
if hasattr(font, 'getmetrics'):
|
||||
ascent, descent = font.getmetrics()
|
||||
line_h = ascent + descent
|
||||
else:
|
||||
line_h = final_size
|
||||
|
||||
total_text_height = (line_h * len(wrapped_lines)) + (line_spacing * max(0, len(wrapped_lines) - 1))
|
||||
|
||||
current_y = by + (bh - total_text_height) // 2
|
||||
|
||||
# Dynamic outline thickness based on the final scaled font size
|
||||
outline_thickness = max(2, int(final_size * 0.10))
|
||||
|
||||
for i, line in enumerate(wrapped_lines):
|
||||
if hasattr(font, 'getbbox'):
|
||||
bbox = font.getbbox(line)
|
||||
lw = bbox[2] - bbox[0]
|
||||
else:
|
||||
lw = len(line) * 6
|
||||
|
||||
current_x = bx + (bw - lw) // 2
|
||||
|
||||
# Draw text with white stroke for artifact coverage
|
||||
draw.text(
|
||||
(current_x, current_y),
|
||||
line,
|
||||
fill=(0, 0, 0),
|
||||
font=font,
|
||||
stroke_width=outline_thickness,
|
||||
stroke_fill=(255, 255, 255)
|
||||
)
|
||||
|
||||
# Advance Y by the uniform line height + spacing
|
||||
current_y += line_h + line_spacing
|
||||
|
||||
rendered_count += 1
|
||||
|
||||
print(f" Rendered: {rendered_count} bubbles (with uniform line spacing & outlines)")
|
||||
return cv2.cvtColor(np.array(pil_img), cv2.COLOR_RGB2BGR)
|
||||
|
||||
# ============================================================
|
||||
# MAIN
|
||||
# ============================================================
|
||||
def main():
|
||||
print(f"Loading image: {IMAGE_PATH}")
|
||||
image_bgr = cv2.imread(IMAGE_PATH)
|
||||
if image_bgr is None:
|
||||
print(f"❌ Error: Could not load {IMAGE_PATH}")
|
||||
return
|
||||
|
||||
print(f"Loading translations: {TRANSLATIONS_PATH}")
|
||||
translations = parse_translations(TRANSLATIONS_PATH)
|
||||
|
||||
print(f"Loading bubble data: {BUBBLES_PATH}")
|
||||
bubbles_data = parse_bubbles(BUBBLES_PATH)
|
||||
|
||||
print("Resolving font...")
|
||||
font_path = resolve_font_path()
|
||||
|
||||
print("\n--- Step 1: Erasing original text ---")
|
||||
erased_bgr = erase_quads(
|
||||
image_bgr=image_bgr,
|
||||
bubbles_data=bubbles_data,
|
||||
translations=translations,
|
||||
skip_ids=SKIP_BUBBLE_IDS,
|
||||
pad=QUAD_PAD
|
||||
)
|
||||
|
||||
m = (ff_mask[1:-1, 1:-1] * 255).astype(np.uint8)
|
||||
m = cv2.morphologyEx(m, cv2.MORPH_CLOSE, np.ones((3, 3), np.uint8), iterations=1)
|
||||
return m
|
||||
|
||||
|
||||
def build_clean_mask(img_bgr, bubble_data):
|
||||
"""
|
||||
FINAL RULE:
|
||||
clean_mask MUST cover yellow_mask completely.
|
||||
"""
|
||||
h, w = img_bgr.shape[:2]
|
||||
yellow = build_yellow_mask(bubble_data, h, w)
|
||||
|
||||
# start with guaranteed yellow
|
||||
clean = yellow.copy()
|
||||
|
||||
if ENABLE_EXTRA_CLEAN:
|
||||
bubble_m = bubble_interior_mask(img_bgr, bubble_data)
|
||||
extra = cv2.dilate(yellow, np.ones((3, 3), np.uint8), iterations=EXTRA_DILATE_ITERS)
|
||||
extra = cv2.morphologyEx(extra, cv2.MORPH_CLOSE, np.ones((3, 3), np.uint8), iterations=EXTRA_CLOSE_ITERS)
|
||||
extra = cv2.bitwise_and(extra, bubble_m)
|
||||
|
||||
# IMPORTANT: union with yellow (never subtract yellow)
|
||||
clean = cv2.bitwise_or(yellow, extra)
|
||||
|
||||
# final guarantee (defensive)
|
||||
clean = cv2.bitwise_or(clean, yellow)
|
||||
|
||||
return clean, yellow
|
||||
|
||||
|
||||
# ─────────────────────────────────────────────
|
||||
# DRAW BUBBLE
|
||||
# ─────────────────────────────────────────────
|
||||
def draw_bubble(
|
||||
pil_img,
|
||||
img_bgr_ref,
|
||||
bubble_data,
|
||||
original_text,
|
||||
translated_text,
|
||||
font_candidates,
|
||||
font_color,
|
||||
stroke_color
|
||||
):
|
||||
if original_text and translated_text:
|
||||
if normalize_text(original_text) == normalize_text(translated_text) and is_sfx_like(original_text):
|
||||
return "skip_sfx"
|
||||
|
||||
rgb = np.array(pil_img)
|
||||
h, w = rgb.shape[:2]
|
||||
|
||||
clean_mask, yellow_mask = build_clean_mask(img_bgr_ref, bubble_data)
|
||||
if np.count_nonzero(clean_mask) == 0:
|
||||
return "skip_no_area"
|
||||
|
||||
# 1) FORCE white fill on clean mask (includes full yellow by guarantee)
|
||||
rgb[clean_mask == 255] = [255, 255, 255]
|
||||
|
||||
# 2) Optional edge restore, but NEVER overwrite yellow coverage
|
||||
if ENABLE_EDGE_RESTORE:
|
||||
bubble_m = bubble_interior_mask(img_bgr_ref, bubble_data)
|
||||
edge = cv2.morphologyEx(bubble_m, cv2.MORPH_GRADIENT, np.ones((3, 3), np.uint8))
|
||||
edge = cv2.dilate(edge, np.ones((3, 3), np.uint8), iterations=EDGE_RESTORE_DILATE)
|
||||
|
||||
# Don't restore where yellow exists (hard guarantee)
|
||||
edge[yellow_mask == 255] = 0
|
||||
|
||||
orig_rgb = cv2.cvtColor(img_bgr_ref, cv2.COLOR_BGR2RGB)
|
||||
rgb[edge == 255] = orig_rgb[edge == 255]
|
||||
|
||||
pil_img.paste(Image.fromarray(rgb))
|
||||
|
||||
if not translated_text:
|
||||
return "clean_only"
|
||||
|
||||
# text region based on yellow area (exact requirement)
|
||||
text_bbox = bbox_from_mask(yellow_mask)
|
||||
if text_bbox is None:
|
||||
text_bbox = bbox_from_mask(clean_mask)
|
||||
if text_bbox is None:
|
||||
return "skip_no_area"
|
||||
|
||||
x1, y1, x2, y2 = text_bbox
|
||||
|
||||
draw = ImageDraw.Draw(pil_img)
|
||||
text_cx = int((x1 + x2) / 2)
|
||||
text_cy = int((y1 + y2) / 2)
|
||||
safe_w = max(16, int((x2 - x1) * TEXT_INSET))
|
||||
safe_h = max(16, int((y2 - y1) * TEXT_INSET))
|
||||
|
||||
font, lines, total_h = fit_font(draw, translated_text, font_candidates, safe_w, safe_h)
|
||||
|
||||
y_cursor = int(round(text_cy - total_h / 2.0))
|
||||
for line in lines:
|
||||
lw, lh = measure_text(draw, line, font)
|
||||
x = text_cx - lw // 2
|
||||
draw_text_with_stroke(draw, (x, y_cursor), line, font, fill=font_color, stroke_fill=stroke_color)
|
||||
y_cursor += lh + max(lh // 5, 2)
|
||||
|
||||
return "rendered"
|
||||
|
||||
|
||||
# ─────────────────────────────────────────────
|
||||
# MAIN
|
||||
# ─────────────────────────────────────────────
|
||||
def render_translations(
|
||||
input_image,
|
||||
output_image,
|
||||
translations_file,
|
||||
bubbles_file,
|
||||
font_candidates=DEFAULT_FONT_CANDIDATES,
|
||||
font_color=DEFAULT_FONT_COLOR,
|
||||
stroke_color=DEFAULT_STROKE_COLOR
|
||||
):
|
||||
img_bgr = cv2.imread(input_image)
|
||||
if img_bgr is None:
|
||||
raise FileNotFoundError(f"Cannot load image: {input_image}")
|
||||
|
||||
img_pil = Image.fromarray(cv2.cvtColor(img_bgr, cv2.COLOR_BGR2RGB))
|
||||
|
||||
translations, originals, flags_map = parse_translations(translations_file)
|
||||
bubbles = parse_bubbles(bubbles_file)
|
||||
|
||||
rendered, skipped = 0, 0
|
||||
|
||||
def sort_key(item):
|
||||
bid, _ = item
|
||||
b = bubbles.get(bid, {})
|
||||
return int(b.get("reading_order", bid))
|
||||
|
||||
for bubble_id, translated_text in sorted(translations.items(), key=sort_key):
|
||||
if bubble_id not in bubbles:
|
||||
skipped += 1
|
||||
continue
|
||||
|
||||
bubble_data = bubbles[bubble_id]
|
||||
original_text = originals.get(bubble_id, "")
|
||||
|
||||
status = draw_bubble(
|
||||
pil_img=img_pil,
|
||||
img_bgr_ref=img_bgr,
|
||||
bubble_data=bubble_data,
|
||||
original_text=original_text,
|
||||
translated_text=translated_text,
|
||||
font_candidates=font_candidates,
|
||||
font_color=font_color,
|
||||
stroke_color=stroke_color
|
||||
)
|
||||
|
||||
if status.startswith("skip"):
|
||||
skipped += 1
|
||||
else:
|
||||
rendered += 1
|
||||
|
||||
out_bgr = cv2.cvtColor(np.array(img_pil), cv2.COLOR_RGB2BGR)
|
||||
cv2.imwrite(output_image, out_bgr)
|
||||
|
||||
print(f"✅ Done — {rendered} rendered, {skipped} skipped.")
|
||||
print(f"📄 Output → {output_image}")
|
||||
print("Guarantee: full yellow-square area is always white-cleaned before drawing text.")
|
||||
print("\n--- Step 2: Rendering translated text ---")
|
||||
final_bgr = render_text(
|
||||
image_bgr=erased_bgr,
|
||||
bubbles_data=bubbles_data,
|
||||
translations=translations,
|
||||
font_path=font_path,
|
||||
skip_ids=SKIP_BUBBLE_IDS
|
||||
)
|
||||
|
||||
print(f"\nSaving final image to: {OUTPUT_PATH}")
|
||||
cv2.imwrite(OUTPUT_PATH, final_bgr)
|
||||
print("✅ Done!")
|
||||
|
||||
if __name__ == "__main__":
|
||||
render_translations(
|
||||
input_image="001-page.png",
|
||||
output_image="page_translated.png",
|
||||
translations_file="output.txt",
|
||||
bubbles_file="bubbles.json",
|
||||
font_candidates=DEFAULT_FONT_CANDIDATES,
|
||||
font_color=DEFAULT_FONT_COLOR,
|
||||
stroke_color=DEFAULT_STROKE_COLOR
|
||||
)
|
||||
main()
|
||||
3528
manga-translator.py
3528
manga-translator.py
File diff suppressed because it is too large
Load Diff
37
older-code/analyze_box5.py
Normal file
37
older-code/analyze_box5.py
Normal file
@@ -0,0 +1,37 @@
|
||||
#!/usr/bin/env python3
|
||||
import sys
|
||||
sys.path.insert(0, '/Users/guillemhernandezsola/code/manga-translator')
|
||||
|
||||
import cv2
|
||||
import json
|
||||
import numpy as np
|
||||
|
||||
# Import functions from manga-translator.py
|
||||
import importlib.util
|
||||
spec = importlib.util.spec_from_file_location("manga_translator", "/Users/guillemhernandezsola/code/manga-translator/manga-translator.py")
|
||||
mt = importlib.util.module_from_spec(spec)
|
||||
spec.loader.exec_module(mt)
|
||||
|
||||
image_path = '004.png'
|
||||
detector = mt.MacVisionDetector(source_lang='en')
|
||||
raw = detector.read(image_path)
|
||||
|
||||
# Load current bubbles to see what box 5 contains
|
||||
with open('bubbles.json') as f:
|
||||
bubbles_data = json.load(f)
|
||||
box5_data = bubbles_data['5']
|
||||
box5_bounds = (box5_data['x'], box5_data['y'], box5_data['x'] + box5_data['w'], box5_data['y'] + box5_data['h'])
|
||||
print(f'Box 5 bounds (xyxy): {box5_bounds}')
|
||||
print()
|
||||
|
||||
# Print all detections sorted by position
|
||||
print('All raw detections:')
|
||||
for i, (bbox, text, conf) in enumerate(sorted(raw, key=lambda x: (mt.quad_bbox(x[0])[1], mt.quad_bbox(x[0])[0]))):
|
||||
b = mt.quad_bbox(bbox)
|
||||
t_norm = mt.normalize_text(text)
|
||||
print(f'{i:2d}. [{b[0]:3d},{b[1]:3d} -> {b[2]:3d},{b[3]:3d}] conf={conf:.2f} text="{t_norm}"')
|
||||
|
||||
# Check if this overlaps with box 5
|
||||
b5_x1, b5_y1, b5_x2, b5_y2 = box5_bounds
|
||||
if not (b[2] < b5_x1 or b[0] > b5_x2 or b[3] < b5_y1 or b[1] > b5_y2):
|
||||
print(f' ^ OVERLAPS with Box 5!')
|
||||
95
older-code/analyze_box7_split.py
Normal file
95
older-code/analyze_box7_split.py
Normal file
@@ -0,0 +1,95 @@
|
||||
#!/usr/bin/env python3
|
||||
import sys
|
||||
sys.path.insert(0, '/Users/guillemhernandezsola/code/manga-translator')
|
||||
|
||||
import cv2
|
||||
import json
|
||||
import numpy as np
|
||||
import importlib.util
|
||||
|
||||
spec = importlib.util.spec_from_file_location("manga_translator", "/Users/guillemhernandezsola/code/manga-translator/manga-translator.py")
|
||||
mt = importlib.util.module_from_spec(spec)
|
||||
spec.loader.exec_module(mt)
|
||||
|
||||
image_path = '004.png'
|
||||
detector = mt.MacVisionDetector(source_lang='en')
|
||||
raw = detector.read(image_path)
|
||||
image = cv2.imread(image_path)
|
||||
|
||||
# Filter as the pipeline does
|
||||
filtered = []
|
||||
for bbox, text, conf in raw:
|
||||
t = mt.normalize_text(text)
|
||||
qb = mt.quad_bbox(bbox)
|
||||
|
||||
if conf < 0.12:
|
||||
continue
|
||||
if len(t) < 1:
|
||||
continue
|
||||
if mt.is_noise_text(t):
|
||||
continue
|
||||
if mt.is_sound_effect(t):
|
||||
continue
|
||||
if mt.is_title_text(t):
|
||||
continue
|
||||
|
||||
filtered.append((bbox, t, conf))
|
||||
|
||||
# Run grouping
|
||||
bubbles, bubble_boxes, bubble_quads, bubble_indices = mt.group_tokens(
|
||||
filtered, image.shape, gap_px=18, bbox_padding=3
|
||||
)
|
||||
|
||||
print("=== BUBBLE 7 & 8 ANALYSIS ===\n")
|
||||
print("Current Bubble 7 (right side content):")
|
||||
for bid in [7]:
|
||||
if bid in bubble_indices:
|
||||
box = bubble_boxes[bid]
|
||||
print(f" Box: {box}")
|
||||
print(f" Indices: {bubble_indices[bid]}")
|
||||
indices = bubble_indices[bid]
|
||||
boxes = [mt.quad_bbox(filtered[i][0]) for i in indices]
|
||||
min_x = min(b[0] for b in boxes)
|
||||
max_x = max(b[2] for b in boxes)
|
||||
print(f" X range: {min_x} - {max_x}")
|
||||
for idx in indices:
|
||||
b = mt.quad_bbox(filtered[idx][0])
|
||||
print(f" {idx}: x=[{b[0]:3d},{b[2]:3d}] y=[{b[1]:3d},{b[3]:3d}] = {filtered[idx][1]}")
|
||||
|
||||
print("\nCurrent Bubble 8 (left side content):")
|
||||
for bid in [8]:
|
||||
if bid in bubble_indices:
|
||||
box = bubble_boxes[bid]
|
||||
print(f" Box: {box}")
|
||||
print(f" Indices: {bubble_indices[bid]}")
|
||||
indices = bubble_indices[bid]
|
||||
boxes = [mt.quad_bbox(filtered[i][0]) for i in indices]
|
||||
min_x = min(b[0] for b in boxes)
|
||||
max_x = max(b[2] for b in boxes)
|
||||
print(f" X range: {min_x} - {max_x}")
|
||||
for idx in indices:
|
||||
b = mt.quad_bbox(filtered[idx][0])
|
||||
print(f" {idx}: x=[{b[0]:3d},{b[2]:3d}] y=[{b[1]:3d},{b[3]:3d}] = {filtered[idx][1]}")
|
||||
|
||||
# Check the horizontal gap between them
|
||||
print("\n=== GAP ANALYSIS ===")
|
||||
if 7 in bubble_indices and 8 in bubble_indices:
|
||||
boxes7 = [mt.quad_bbox(filtered[i][0]) for i in bubble_indices[7]]
|
||||
boxes8 = [mt.quad_bbox(filtered[i][0]) for i in bubble_indices[8]]
|
||||
|
||||
max_x7 = max(b[2] for b in boxes7)
|
||||
min_x8 = min(b[0] for b in boxes8)
|
||||
|
||||
print(f"Bubble 7 max X: {max_x7}")
|
||||
print(f"Bubble 8 min X: {min_x8}")
|
||||
print(f"Horizontal gap: {min_x8 - max_x7}")
|
||||
|
||||
# Check Y overlap
|
||||
min_y7 = min(b[1] for b in boxes7)
|
||||
max_y7 = max(b[3] for b in boxes7)
|
||||
min_y8 = min(b[1] for b in boxes8)
|
||||
max_y8 = max(b[3] for b in boxes8)
|
||||
|
||||
print(f"\nBubble 7 Y range: {min_y7} - {max_y7}")
|
||||
print(f"Bubble 8 Y range: {min_y8} - {max_y8}")
|
||||
print(f"Y overlap: {max(0, min(max_y7, max_y8) - max(min_y7, min_y8))} pixels")
|
||||
55
older-code/analyze_grouping.py
Normal file
55
older-code/analyze_grouping.py
Normal file
@@ -0,0 +1,55 @@
|
||||
#!/usr/bin/env python3
|
||||
import sys
|
||||
sys.path.insert(0, '/Users/guillemhernandezsola/code/manga-translator')
|
||||
|
||||
import cv2
|
||||
import json
|
||||
import numpy as np
|
||||
import importlib.util
|
||||
|
||||
spec = importlib.util.spec_from_file_location("manga_translator", "/Users/guillemhernandezsola/code/manga-translator/manga-translator.py")
|
||||
mt = importlib.util.module_from_spec(spec)
|
||||
spec.loader.exec_module(mt)
|
||||
|
||||
image_path = '004.png'
|
||||
detector = mt.MacVisionDetector(source_lang='en')
|
||||
raw = detector.read(image_path)
|
||||
image = cv2.imread(image_path)
|
||||
|
||||
# Filter as the pipeline does
|
||||
filtered = []
|
||||
for bbox, text, conf in raw:
|
||||
t = mt.normalize_text(text)
|
||||
qb = mt.quad_bbox(bbox)
|
||||
|
||||
if conf < 0.12:
|
||||
continue
|
||||
if len(t) < 1:
|
||||
continue
|
||||
if mt.is_noise_text(t):
|
||||
continue
|
||||
if mt.is_sound_effect(t):
|
||||
continue
|
||||
if mt.is_title_text(t):
|
||||
continue
|
||||
|
||||
filtered.append((bbox, t, conf))
|
||||
|
||||
print(f"Filtered {len(filtered)} detections")
|
||||
|
||||
# Now run grouping
|
||||
bubbles, bubble_boxes, bubble_quads, bubble_indices = mt.group_tokens(
|
||||
filtered, image.shape, gap_px=18, bbox_padding=3
|
||||
)
|
||||
|
||||
# Find which bubble contains box 5
|
||||
box5_region = (378, 570, 536, 753)
|
||||
|
||||
print("\n=== BUBBLES ===")
|
||||
for bid, box in bubble_boxes.items():
|
||||
print(f"Bubble {bid}: {box}")
|
||||
print(f" Indices: {bubble_indices[bid]}")
|
||||
print(f" Detections:")
|
||||
for idx in bubble_indices[bid]:
|
||||
b = mt.quad_bbox(filtered[idx][0])
|
||||
print(f" {idx}: [{b[0]:3d},{b[1]:3d} -> {b[2]:3d},{b[3]:3d}] = {filtered[idx][1]}")
|
||||
77
older-code/check_box7.py
Normal file
77
older-code/check_box7.py
Normal file
@@ -0,0 +1,77 @@
|
||||
#!/usr/bin/env python3
|
||||
import sys
|
||||
sys.path.insert(0, '/Users/guillemhernandezsola/code/manga-translator')
|
||||
|
||||
import cv2
|
||||
import json
|
||||
import numpy as np
|
||||
import importlib.util
|
||||
|
||||
spec = importlib.util.spec_from_file_location("manga_translator", "/Users/guillemhernandezsola/code/manga-translator/manga-translator.py")
|
||||
mt = importlib.util.module_from_spec(spec)
|
||||
spec.loader.exec_module(mt)
|
||||
|
||||
image_path = '004.png'
|
||||
detector = mt.MacVisionDetector(source_lang='en')
|
||||
raw = detector.read(image_path)
|
||||
image = cv2.imread(image_path)
|
||||
|
||||
# Filter as the pipeline does
|
||||
filtered = []
|
||||
for bbox, text, conf in raw:
|
||||
t = mt.normalize_text(text)
|
||||
qb = mt.quad_bbox(bbox)
|
||||
|
||||
if conf < 0.12:
|
||||
continue
|
||||
if len(t) < 1:
|
||||
continue
|
||||
if mt.is_noise_text(t):
|
||||
continue
|
||||
if mt.is_sound_effect(t):
|
||||
continue
|
||||
if mt.is_title_text(t):
|
||||
continue
|
||||
|
||||
filtered.append((bbox, t, conf))
|
||||
|
||||
# Now run grouping
|
||||
bubbles, bubble_boxes, bubble_quads, bubble_indices = mt.group_tokens(
|
||||
filtered, image.shape, gap_px=18, bbox_padding=3
|
||||
)
|
||||
|
||||
# Check current bubbles.json for reference
|
||||
with open('bubbles.json') as f:
|
||||
old_bubbles = json.load(f)
|
||||
|
||||
print("=== BOX 5 ===")
|
||||
print(f"Old bounds (from bubbles.json): x={old_bubbles['5']['x']}, y={old_bubbles['5']['y']}, w={old_bubbles['5']['w']}, h={old_bubbles['5']['h']}")
|
||||
print(f" (xyxy): ({old_bubbles['5']['x']}, {old_bubbles['5']['y']}, {old_bubbles['5']['x'] + old_bubbles['5']['w']}, {old_bubbles['5']['y'] + old_bubbles['5']['h']})")
|
||||
|
||||
# Find bubble at that location in current grouping
|
||||
for bid, box in bubble_boxes.items():
|
||||
if box[0] == 371 and box[1] == 563: # New box 5 location
|
||||
print(f"Current bubble {bid}: {box}")
|
||||
print(f" Detections: {bubble_indices[bid]}")
|
||||
for idx in bubble_indices[bid]:
|
||||
b = mt.quad_bbox(filtered[idx][0])
|
||||
print(f" {idx}: [{b[0]:3d},{b[1]:3d} -> {b[2]:3d},{b[3]:3d}] = {filtered[idx][1]}")
|
||||
|
||||
print("\n=== BOX 7 ===")
|
||||
print(f"Old bounds (from bubbles.json): x={old_bubbles['7']['x']}, y={old_bubbles['7']['y']}, w={old_bubbles['7']['w']}, h={old_bubbles['7']['h']}")
|
||||
print(f" (xyxy): ({old_bubbles['7']['x']}, {old_bubbles['7']['y']}, {old_bubbles['7']['x'] + old_bubbles['7']['w']}, {old_bubbles['7']['y'] + old_bubbles['7']['h']})")
|
||||
|
||||
# Find corresponding bubble
|
||||
for bid, box in bubble_boxes.items():
|
||||
x1, y1, x2, y2 = box
|
||||
# Check if this overlaps with old box 7
|
||||
old_x1, old_y1 = old_bubbles['7']['x'], old_bubbles['7']['y']
|
||||
old_x2 = old_x1 + old_bubbles['7']['w']
|
||||
old_y2 = old_y1 + old_bubbles['7']['h']
|
||||
|
||||
if not (x2 < old_x1 or x1 > old_x2 or y2 < old_y1 or y1 > old_y2):
|
||||
print(f"Current bubble {bid}: {box}")
|
||||
print(f" Detections: {bubble_indices[bid]}")
|
||||
for idx in bubble_indices[bid]:
|
||||
b = mt.quad_bbox(filtered[idx][0])
|
||||
print(f" {idx}: [{b[0]:3d},{b[1]:3d} -> {b[2]:3d},{b[3]:3d}] = {filtered[idx][1]}")
|
||||
68
older-code/check_grouping_logic.py
Normal file
68
older-code/check_grouping_logic.py
Normal file
@@ -0,0 +1,68 @@
|
||||
#!/usr/bin/env python3
|
||||
import sys
|
||||
sys.path.insert(0, '/Users/guillemhernandezsola/code/manga-translator')
|
||||
|
||||
import cv2
|
||||
import numpy as np
|
||||
import importlib.util
|
||||
|
||||
spec = importlib.util.spec_from_file_location("manga_translator", "/Users/guillemhernandezsola/code/manga-translator/manga-translator.py")
|
||||
mt = importlib.util.module_from_spec(spec)
|
||||
spec.loader.exec_module(mt)
|
||||
|
||||
image_path = '004.png'
|
||||
detector = mt.MacVisionDetector(source_lang='en')
|
||||
raw = detector.read(image_path)
|
||||
image = cv2.imread(image_path)
|
||||
|
||||
# Filter
|
||||
filtered = []
|
||||
for bbox, text, conf in raw:
|
||||
t = mt.normalize_text(text)
|
||||
if conf < 0.12 or len(t) < 1 or mt.is_noise_text(t) or mt.is_sound_effect(t) or mt.is_title_text(t):
|
||||
continue
|
||||
filtered.append((bbox, t, conf))
|
||||
|
||||
# Get the indices we're interested in (left and right bubbles)
|
||||
left_indices = [41, 42, 43, 44, 45, 46] # LET, GO, OFF, ME, AL-, REA-
|
||||
right_indices = [47, 48, 49, 50, 51, 52, 53, 54] # DON'T, WORRY!, HARUKO, ...
|
||||
|
||||
print("=== CHECKING GROUPING CONDITIONS ===\n")
|
||||
|
||||
# Check if they would be united in group_tokens
|
||||
boxes_left = [mt.quad_bbox(filtered[i][0]) for i in left_indices]
|
||||
boxes_right = [mt.quad_bbox(filtered[i][0]) for i in right_indices]
|
||||
|
||||
# Check overlap_or_near
|
||||
print("Checking overlap_or_near with gap=18:")
|
||||
for li, bi in enumerate(left_indices):
|
||||
for ri, bj in enumerate(right_indices):
|
||||
b_left = boxes_left[li]
|
||||
b_right = boxes_right[ri]
|
||||
gap_x = max(0, max(b_left[0], b_right[0]) - min(b_left[2], b_right[2]))
|
||||
gap_y = max(0, max(b_left[1], b_right[1]) - min(b_left[3], b_right[3]))
|
||||
overlaps = gap_x <= 18 and gap_y <= 18
|
||||
if overlaps:
|
||||
print(f" {bi} and {bj} overlap/near: gap_x={gap_x}, gap_y={gap_y}")
|
||||
|
||||
# Check distance check
|
||||
hs = [max(1.0, b[3] - b[1]) for b in [*boxes_left, *boxes_right]]
|
||||
med_h = float(np.median(hs)) if hs else 12.0
|
||||
dist_thresh = max(20.0, med_h * 2.2)
|
||||
|
||||
print(f"\nMedian height: {med_h}")
|
||||
print(f"Distance threshold: {dist_thresh}")
|
||||
|
||||
print("\nChecking distance check:")
|
||||
for li, bi in enumerate(left_indices[:1]): # Just check first from each
|
||||
for ri, bj in enumerate(right_indices[:1]):
|
||||
b_left = boxes_left[li]
|
||||
b_right = boxes_right[ri]
|
||||
cx_left = (b_left[0] + b_left[2]) / 2.0
|
||||
cy_left = (b_left[1] + b_left[3]) / 2.0
|
||||
cx_right = (b_right[0] + b_right[2]) / 2.0
|
||||
cy_right = (b_right[1] + b_right[3]) / 2.0
|
||||
d = ((cx_left - cx_right) ** 2 + (cy_left - cy_right) ** 2) ** 0.5
|
||||
within_dist = d <= dist_thresh
|
||||
within_y = abs(cy_left - cy_right) <= med_h * 3.0
|
||||
print(f" {bi} to {bj}: distance={d:.1f}, within_dist={within_dist}, within_y_tol={within_y}")
|
||||
107
older-code/debug_split_phase.py
Normal file
107
older-code/debug_split_phase.py
Normal file
@@ -0,0 +1,107 @@
|
||||
#!/usr/bin/env python3
|
||||
# Debug script to see what bubbles are produced after splitting
|
||||
|
||||
import sys
|
||||
sys.path.insert(0, '/Users/guillemhernandezsola/code/manga-translator')
|
||||
|
||||
import cv2
|
||||
import json
|
||||
import numpy as np
|
||||
import importlib.util
|
||||
|
||||
spec = importlib.util.spec_from_file_location("manga_translator", "/Users/guillemhernandezsola/code/manga-translator/manga-translator.py")
|
||||
mt = importlib.util.module_from_spec(spec)
|
||||
spec.loader.exec_module(mt)
|
||||
|
||||
image_path = '004.png'
|
||||
detector = mt.MacVisionDetector(source_lang='en')
|
||||
raw = detector.read(image_path)
|
||||
image = cv2.imread(image_path)
|
||||
|
||||
# Full filtering as pipeline does
|
||||
filtered = []
|
||||
skipped = 0
|
||||
ih, iw = image.shape[:2]
|
||||
|
||||
for bbox, text, conf in raw:
|
||||
t = mt.normalize_text(text)
|
||||
qb = mt.quad_bbox(bbox)
|
||||
|
||||
if conf < 0.12:
|
||||
skipped += 1
|
||||
continue
|
||||
if len(t) < 1:
|
||||
skipped += 1
|
||||
continue
|
||||
if mt.is_noise_text(t):
|
||||
skipped += 1
|
||||
continue
|
||||
if mt.is_sound_effect(t):
|
||||
skipped += 1
|
||||
continue
|
||||
if mt.is_title_text(t):
|
||||
skipped += 1
|
||||
continue
|
||||
if qb[1] < int(ih * mt.TOP_BAND_RATIO):
|
||||
if conf < 0.70 and len(t) >= 5:
|
||||
skipped += 1
|
||||
continue
|
||||
|
||||
filtered.append((bbox, t, conf))
|
||||
|
||||
resolved_gap = mt.auto_gap(image_path)
|
||||
bubbles, bubble_boxes, bubble_quads, bubble_indices = mt.group_tokens(
|
||||
filtered, image.shape, gap_px=resolved_gap, bbox_padding=3
|
||||
)
|
||||
|
||||
print("=== AFTER GROUPING ===")
|
||||
print(f"Bubbles dict keys: {sorted(bubbles.keys())}")
|
||||
for bid in [7, 8]:
|
||||
if bid in bubbles:
|
||||
print(f"\nBubble {bid}:")
|
||||
print(f" Box: {bubble_boxes[bid]}")
|
||||
print(f" Indices ({len(bubble_indices[bid])}): {bubble_indices[bid]}")
|
||||
print(f" Quads ({len(bubble_quads[bid])})")
|
||||
|
||||
# Now simulate the split logic
|
||||
new_bubbles, new_bubble_boxes, new_bubble_quads, new_bubble_indices = {}, {}, {}, {}
|
||||
next_bid = max(bubbles.keys()) + 1 if bubbles else 1
|
||||
splits_performed = []
|
||||
|
||||
for bid in list(bubbles.keys()):
|
||||
box = bubble_boxes[bid]
|
||||
bubble_split = None
|
||||
|
||||
# Try split
|
||||
split_result = mt.split_panel_box(image, box, bubble_quads=bubble_quads[bid])
|
||||
if split_result:
|
||||
box_left, box_right, split_x = split_result
|
||||
# ... split logic ...
|
||||
bubble_split = "panel_split"
|
||||
|
||||
if bubble_split is None:
|
||||
col_split = mt.split_bubble_if_multiple_columns(bubble_indices[bid], filtered, bid=bid)
|
||||
if col_split:
|
||||
bubble_split = "column_split"
|
||||
|
||||
if bubble_split:
|
||||
splits_performed.append(f"Bubble {bid}: {bubble_split}")
|
||||
# Don't actually split here, just mark it
|
||||
else:
|
||||
# No split
|
||||
new_bubbles[bid] = bubbles[bid]
|
||||
new_bubble_boxes[bid] = bubble_boxes[bid]
|
||||
new_bubble_quads[bid] = bubble_quads[bid]
|
||||
new_bubble_indices[bid] = bubble_indices[bid]
|
||||
|
||||
print("\n=== AFTER SPLIT LOGIC ===")
|
||||
print(f"Splits detected: {len(splits_performed)}")
|
||||
for s in splits_performed:
|
||||
print(f" {s}")
|
||||
|
||||
print(f"\nBubbles dict keys: {sorted(new_bubbles.keys())}")
|
||||
for bid in [7, 8]:
|
||||
if bid in new_bubbles:
|
||||
print(f"\nBubble {bid}:")
|
||||
print(f" Box: {new_bubble_boxes[bid]}")
|
||||
print(f" Indices ({len(new_bubble_indices[bid])}): {new_bubble_indices[bid][:3]}...")
|
||||
119
older-code/patch_manga_translator.py
Normal file
119
older-code/patch_manga_translator.py
Normal file
@@ -0,0 +1,119 @@
|
||||
#!/usr/bin/env python3
|
||||
# -*- coding: utf-8 -*-
|
||||
|
||||
import re
|
||||
from pathlib import Path
|
||||
|
||||
TARGET = Path("manga-translator.py")
|
||||
|
||||
def cut_after_first_entrypoint(text: str) -> str:
|
||||
"""
|
||||
Keep only first full __main__ block and remove duplicated tail if present.
|
||||
"""
|
||||
m = re.search(r'(?m)^if __name__ == "__main__":\s*$', text)
|
||||
if not m:
|
||||
return text
|
||||
|
||||
start = m.start()
|
||||
# Keep entrypoint block plus indented lines after it
|
||||
lines = text[start:].splitlines(True)
|
||||
keep = []
|
||||
keep.append(lines[0]) # if __name__...
|
||||
i = 1
|
||||
while i < len(lines):
|
||||
ln = lines[i]
|
||||
if ln.strip() == "":
|
||||
keep.append(ln)
|
||||
i += 1
|
||||
continue
|
||||
# if dedented back to col 0 => end of block
|
||||
if not ln.startswith((" ", "\t")):
|
||||
break
|
||||
keep.append(ln)
|
||||
i += 1
|
||||
|
||||
cleaned = text[:start] + "".join(keep)
|
||||
return cleaned
|
||||
|
||||
def replace_bad_vars(text: str) -> str:
|
||||
text = text.replace(
|
||||
"merge_micro_boxes_relaxed(bubbles, bubble_boxes, bubble_quads, bubble_indices, ocr, image_bgr)",
|
||||
"merge_micro_boxes_relaxed(bubbles, bubble_boxes, bubble_quads, bubble_indices, filtered, image)"
|
||||
)
|
||||
text = text.replace(
|
||||
"reattach_orphan_short_tokens(bubbles, bubble_boxes, bubble_quads, bubble_indices, ocr)",
|
||||
"reattach_orphan_short_tokens(bubbles, bubble_boxes, bubble_quads, bubble_indices, filtered)"
|
||||
)
|
||||
return text
|
||||
|
||||
def ensure_autofix_chain(text: str) -> str:
|
||||
old = (
|
||||
" # ── Auto-fix (split + merge) ──────────────────────────────────────────\n"
|
||||
" if auto_fix_bubbles:\n"
|
||||
" bubbles, bubble_boxes, bubble_quads, bubble_indices = merge_micro_boxes_relaxed(bubbles, bubble_boxes, bubble_quads, bubble_indices, filtered, image)\n"
|
||||
)
|
||||
new = (
|
||||
" # ── Auto-fix (split + merge) ──────────────────────────────────────────\n"
|
||||
" if auto_fix_bubbles:\n"
|
||||
" bubbles, bubble_boxes, bubble_quads, bubble_indices = auto_fix_bubble_detection(\n"
|
||||
" bubble_boxes, bubble_indices, bubble_quads, bubbles, filtered, image)\n"
|
||||
" bubbles, bubble_boxes, bubble_quads, bubble_indices = merge_micro_boxes_relaxed(\n"
|
||||
" bubbles, bubble_boxes, bubble_quads, bubble_indices, filtered, image)\n"
|
||||
)
|
||||
return text.replace(old, new)
|
||||
|
||||
def ensure_split_commit(text: str) -> str:
|
||||
marker = " # ── Remove nested / duplicate boxes ──────────────────────────────────\n"
|
||||
if marker not in text:
|
||||
return text
|
||||
|
||||
if "bubbles = new_bubbles" in text:
|
||||
return text
|
||||
|
||||
inject = (
|
||||
" bubbles = new_bubbles\n"
|
||||
" bubble_boxes = new_bubble_boxes\n"
|
||||
" bubble_quads = new_bubble_quads\n"
|
||||
" bubble_indices = new_bubble_indices\n\n"
|
||||
)
|
||||
return text.replace(marker, inject + marker)
|
||||
|
||||
def ensure_rescue_pipeline(text: str) -> str:
|
||||
anchor = ' print(f"Kept: {len(filtered)} | Skipped: {skipped}")\n'
|
||||
if anchor not in text:
|
||||
return text
|
||||
|
||||
if "rescue_name_and_short_tokens(raw" in text:
|
||||
return text
|
||||
|
||||
block = (
|
||||
' print(f"Kept: {len(filtered)} | Skipped: {skipped}")\n'
|
||||
' # Protect short dialogue tokens confidence\n'
|
||||
' tmp = []\n'
|
||||
' for bbox, t, conf in filtered:\n'
|
||||
' tmp.append((bbox, t, maybe_conf_floor_for_protected(t, conf, floor=0.40)))\n'
|
||||
' filtered = tmp\n'
|
||||
' # Rescue names/short tokens dropped by strict filters\n'
|
||||
' rescued = rescue_name_and_short_tokens(raw, min_conf=0.20)\n'
|
||||
' filtered = merge_rescued_items(filtered, rescued, iou_threshold=0.55)\n'
|
||||
)
|
||||
return text.replace(anchor, block)
|
||||
|
||||
def main():
|
||||
if not TARGET.exists():
|
||||
raise FileNotFoundError(f"Not found: {TARGET}")
|
||||
|
||||
src = TARGET.read_text(encoding="utf-8")
|
||||
out = src
|
||||
|
||||
out = cut_after_first_entrypoint(out)
|
||||
out = replace_bad_vars(out)
|
||||
out = ensure_autofix_chain(out)
|
||||
out = ensure_split_commit(out)
|
||||
out = ensure_rescue_pipeline(out)
|
||||
|
||||
TARGET.write_text(out, encoding="utf-8")
|
||||
print("✅ Patched manga-translator.py")
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
56
older-code/regenerate_debug.py
Normal file
56
older-code/regenerate_debug.py
Normal file
@@ -0,0 +1,56 @@
|
||||
#!/usr/bin/env python3
|
||||
# -*- coding: utf-8 -*-
|
||||
"""
|
||||
Regenerate debug_clusters.png with the new split bubbles.json
|
||||
"""
|
||||
|
||||
import json
|
||||
import cv2
|
||||
import numpy as np
|
||||
|
||||
def quad_bbox(quad):
|
||||
"""Convert quad to bounding box"""
|
||||
xs = [p[0] for p in quad]
|
||||
ys = [p[1] for p in quad]
|
||||
return (min(xs), min(ys), max(xs), max(ys))
|
||||
|
||||
def save_debug_clusters_from_json(
|
||||
image_path="004.png",
|
||||
bubbles_path="bubbles.json",
|
||||
out_path="debug_clusters.png"
|
||||
):
|
||||
img = cv2.imread(image_path)
|
||||
if img is None:
|
||||
print(f"❌ Cannot load image: {image_path}")
|
||||
return
|
||||
|
||||
# Load bubbles.json
|
||||
with open(bubbles_path, "r", encoding="utf-8") as f:
|
||||
bubbles_data = json.load(f)
|
||||
|
||||
# Draw all quad polygons in white (erasing original text)
|
||||
for bid_str, bubble_info in bubbles_data.items():
|
||||
for quad in bubble_info.get("quads", []):
|
||||
pts = np.array(quad, dtype=np.int32)
|
||||
cv2.fillPoly(img, [pts], (255, 255, 255))
|
||||
cv2.polylines(img, [pts], True, (180, 180, 180), 1)
|
||||
|
||||
# Draw bounding boxes with labels
|
||||
for bid_str, bubble_info in bubbles_data.items():
|
||||
bid = int(bid_str)
|
||||
x = bubble_info["x"]
|
||||
y = bubble_info["y"]
|
||||
w = bubble_info["w"]
|
||||
h = bubble_info["h"]
|
||||
x2 = x + w
|
||||
y2 = y + h
|
||||
|
||||
cv2.rectangle(img, (x, y), (x2, y2), (0, 220, 0), 2)
|
||||
cv2.putText(img, f"BOX#{bid}", (x + 2, max(15, y + 16)),
|
||||
cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 220, 0), 2)
|
||||
|
||||
cv2.imwrite(out_path, img)
|
||||
print(f"✅ Saved: {out_path}")
|
||||
|
||||
if __name__ == "__main__":
|
||||
save_debug_clusters_from_json()
|
||||
183
older-code/split_bubbles.py
Normal file
183
older-code/split_bubbles.py
Normal file
@@ -0,0 +1,183 @@
|
||||
#!/usr/bin/env python3
|
||||
# -*- coding: utf-8 -*-
|
||||
"""
|
||||
Helper script to split bubbles with multiple separate text regions.
|
||||
Run this to manually split Box 2 and Box 7 from debug_clusters.png
|
||||
"""
|
||||
|
||||
import json
|
||||
import numpy as np
|
||||
from typing import List, Tuple, Dict
|
||||
|
||||
def quad_bbox(quad):
|
||||
"""Convert quad to bounding box"""
|
||||
xs = [p[0] for p in quad]
|
||||
ys = [p[1] for p in quad]
|
||||
return (min(xs), min(ys), max(xs), max(ys))
|
||||
|
||||
def boxes_union_xyxy(boxes):
|
||||
"""Union of multiple boxes"""
|
||||
boxes = [b for b in boxes if b is not None]
|
||||
if not boxes:
|
||||
return None
|
||||
return (
|
||||
int(min(b[0] for b in boxes)),
|
||||
int(min(b[1] for b in boxes)),
|
||||
int(max(b[2] for b in boxes)),
|
||||
int(max(b[3] for b in boxes)),
|
||||
)
|
||||
|
||||
def xyxy_to_xywh(bbox):
|
||||
"""Convert xyxy format to xywh"""
|
||||
if bbox is None:
|
||||
return None
|
||||
x1, y1, x2, y2 = bbox
|
||||
return {"x": int(x1), "y": int(y1), "w": int(x2 - x1), "h": int(y2 - y1)}
|
||||
|
||||
def bbox_area_xyxy(b):
|
||||
"""Calculate area of a bounding box in xyxy format"""
|
||||
if b is None:
|
||||
return 0
|
||||
x1, y1, x2, y2 = b
|
||||
return (x2 - x1) * (y2 - y1)
|
||||
|
||||
def split_bubble_by_vertical_gap(bubble_id: int, bubble_data: Dict, filtered_indices_map: Dict):
|
||||
"""
|
||||
Attempt to split a bubble by detecting a significant vertical gap between columns of text.
|
||||
Returns: (left_indices, right_indices, gap_size) or None if no split
|
||||
"""
|
||||
quad_bboxes = bubble_data['quad_bboxes']
|
||||
quads = bubble_data['quads']
|
||||
|
||||
if len(quads) < 2:
|
||||
return None
|
||||
|
||||
# Get x-coordinates with original indices
|
||||
x_coords = []
|
||||
for i, quad in enumerate(quads):
|
||||
bbox = quad_bbox(quad)
|
||||
x_center = (bbox[0] + bbox[2]) / 2.0
|
||||
x_coords.append((i, x_center, bbox))
|
||||
|
||||
# Sort by x-coordinate
|
||||
x_coords_sorted = sorted(x_coords, key=lambda t: t[1])
|
||||
|
||||
# Find the largest gap between consecutive x positions
|
||||
max_gap = 0
|
||||
split_pos = -1
|
||||
|
||||
for i in range(len(x_coords_sorted) - 1):
|
||||
gap = x_coords_sorted[i + 1][1] - x_coords_sorted[i][1]
|
||||
if gap > max_gap:
|
||||
max_gap = gap
|
||||
split_pos = i
|
||||
|
||||
# If gap is large enough, split
|
||||
min_gap_threshold = 80 # pixels
|
||||
if split_pos != -1 and max_gap > min_gap_threshold:
|
||||
# Get ORIGINAL indices for left and right
|
||||
left_indices = [x_coords_sorted[j][0] for j in range(split_pos + 1)]
|
||||
right_indices = [x_coords_sorted[j][0] for j in range(split_pos + 1, len(x_coords_sorted))]
|
||||
|
||||
return (left_indices, right_indices, max_gap)
|
||||
|
||||
return None
|
||||
|
||||
def split_bubbles_in_json(input_file="bubbles.json", output_file="bubbles_split.json", bubble_ids_to_split=[2, 7]):
|
||||
"""Split specified bubbles in the JSON file"""
|
||||
|
||||
with open(input_file, "r", encoding="utf-8") as f:
|
||||
data = json.load(f)
|
||||
|
||||
new_data = {}
|
||||
next_bid = max(int(k) for k in data.keys()) + 1
|
||||
|
||||
for bid_str, bubble_data in data.items():
|
||||
bid = int(bid_str)
|
||||
|
||||
if bid not in bubble_ids_to_split:
|
||||
# Keep original
|
||||
new_data[bid_str] = bubble_data
|
||||
continue
|
||||
|
||||
# Try to split
|
||||
split_result = split_bubble_by_vertical_gap(bid, bubble_data, {})
|
||||
|
||||
if split_result:
|
||||
left_indices, right_indices, gap_size = split_result
|
||||
|
||||
print(f"\n🔀 Splitting BOX#{bid} (gap={gap_size:.1f}px)")
|
||||
print(f" Left indices: {left_indices}")
|
||||
print(f" Right indices: {right_indices}")
|
||||
|
||||
# Create left bubble - keep the original bubble ID
|
||||
left_quads = [bubble_data['quads'][i] for i in left_indices]
|
||||
left_quad_bboxes = [bubble_data['quad_bboxes'][i] for i in left_indices]
|
||||
left_bbox = boxes_union_xyxy([quad_bbox(q) for q in left_quads])
|
||||
left_bbox_padded = (
|
||||
max(0, left_bbox[0] - 3),
|
||||
max(0, left_bbox[1] - 3),
|
||||
left_bbox[2] + 3,
|
||||
left_bbox[3] + 3
|
||||
)
|
||||
|
||||
print(f" Left bbox: {left_bbox} -> padded: {left_bbox_padded}")
|
||||
|
||||
new_data[str(bid)] = {
|
||||
"x": left_bbox_padded[0],
|
||||
"y": left_bbox_padded[1],
|
||||
"w": left_bbox_padded[2] - left_bbox_padded[0],
|
||||
"h": left_bbox_padded[3] - left_bbox_padded[1],
|
||||
"reading_order": bubble_data.get("reading_order", bid),
|
||||
"quad_bboxes": left_quad_bboxes,
|
||||
"quads": left_quads,
|
||||
"text_bbox": xyxy_to_xywh(left_bbox),
|
||||
"line_bboxes": [],
|
||||
"line_union_bbox": xyxy_to_xywh(boxes_union_xyxy([quad_bbox(q) for q in left_quads])),
|
||||
"line_union_area": int(bbox_area_xyxy(boxes_union_xyxy([quad_bbox(q) for q in left_quads]))),
|
||||
}
|
||||
|
||||
# Create right bubble - with new ID
|
||||
right_quads = [bubble_data['quads'][i] for i in right_indices]
|
||||
right_quad_bboxes = [bubble_data['quad_bboxes'][i] for i in right_indices]
|
||||
right_bbox = boxes_union_xyxy([quad_bbox(q) for q in right_quads])
|
||||
right_bbox_padded = (
|
||||
max(0, right_bbox[0] - 3),
|
||||
max(0, right_bbox[1] - 3),
|
||||
right_bbox[2] + 3,
|
||||
right_bbox[3] + 3
|
||||
)
|
||||
|
||||
print(f" Right bbox: {right_bbox} -> padded: {right_bbox_padded}")
|
||||
|
||||
new_data[str(next_bid)] = {
|
||||
"x": right_bbox_padded[0],
|
||||
"y": right_bbox_padded[1],
|
||||
"w": right_bbox_padded[2] - right_bbox_padded[0],
|
||||
"h": right_bbox_padded[3] - right_bbox_padded[1],
|
||||
"reading_order": bubble_data.get("reading_order", next_bid),
|
||||
"quad_bboxes": right_quad_bboxes,
|
||||
"quads": right_quads,
|
||||
"text_bbox": xyxy_to_xywh(right_bbox),
|
||||
"line_bboxes": [],
|
||||
"line_union_bbox": xyxy_to_xywh(boxes_union_xyxy([quad_bbox(q) for q in right_quads])),
|
||||
"line_union_area": int(bbox_area_xyxy(boxes_union_xyxy([quad_bbox(q) for q in right_quads]))),
|
||||
}
|
||||
|
||||
next_bid += 1
|
||||
else:
|
||||
# No split needed
|
||||
new_data[bid_str] = bubble_data
|
||||
|
||||
# Write output
|
||||
with open(output_file, "w", encoding="utf-8") as f:
|
||||
json.dump(new_data, f, indent=2, ensure_ascii=False)
|
||||
|
||||
print(f"\n✅ Saved to {output_file}")
|
||||
|
||||
if __name__ == "__main__":
|
||||
split_bubbles_in_json(
|
||||
input_file="bubbles_original.json", # Always read from original
|
||||
output_file="bubbles_split.json",
|
||||
bubble_ids_to_split=[2, 7]
|
||||
)
|
||||
154
older-code/split_final.py
Normal file
154
older-code/split_final.py
Normal file
@@ -0,0 +1,154 @@
|
||||
#!/usr/bin/env python3
|
||||
# -*- coding: utf-8 -*-
|
||||
"""
|
||||
Properly split Box 2 and Box 7 by extracting quads from original and writing to new JSON
|
||||
"""
|
||||
|
||||
import json
|
||||
import copy
|
||||
|
||||
def quad_bbox(quad):
|
||||
xs = [p[0] for p in quad]
|
||||
ys = [p[1] for p in quad]
|
||||
return (min(xs), min(ys), max(xs), max(ys))
|
||||
|
||||
def boxes_union_xyxy(boxes):
|
||||
boxes = [b for b in boxes if b is not None]
|
||||
if not boxes:
|
||||
return None
|
||||
return (
|
||||
int(min(b[0] for b in boxes)),
|
||||
int(min(b[1] for b in boxes)),
|
||||
int(max(b[2] for b in boxes)),
|
||||
int(max(b[3] for b in boxes)),
|
||||
)
|
||||
|
||||
def xyxy_to_xywh(bbox):
|
||||
if bbox is None:
|
||||
return None
|
||||
x1, y1, x2, y2 = bbox
|
||||
return {"x": int(x1), "y": int(y1), "w": int(x2 - x1), "h": int(y2 - y1)}
|
||||
|
||||
def bbox_area_xyxy(b):
|
||||
if b is None:
|
||||
return 0
|
||||
x1, y1, x2, y2 = b
|
||||
return (x2 - x1) * (y2 - y1)
|
||||
|
||||
# Load original
|
||||
with open("bubbles_original.json", "r", encoding="utf-8") as f:
|
||||
original = json.load(f)
|
||||
|
||||
new_data = {}
|
||||
|
||||
# Copy all non-split bubbles
|
||||
for bid_str, bubble_data in original.items():
|
||||
bid = int(bid_str)
|
||||
if bid not in [2, 7]:
|
||||
new_data[bid_str] = copy.deepcopy(bubble_data)
|
||||
|
||||
# Split Box 2
|
||||
print("🔀 Splitting Box 2...")
|
||||
box2_data = original["2"]
|
||||
left_indices_2 = [10, 1, 2, 4, 8, 0, 3, 6, 11, 12]
|
||||
right_indices_2 = [5, 7, 9]
|
||||
|
||||
# Left part keeps ID 2
|
||||
left_quads_2 = [box2_data['quads'][i] for i in left_indices_2]
|
||||
left_quad_bboxes_2 = [box2_data['quad_bboxes'][i] for i in left_indices_2]
|
||||
left_bbox_2 = boxes_union_xyxy([quad_bbox(q) for q in left_quads_2])
|
||||
left_bbox_2_padded = (max(0, left_bbox_2[0]-3), max(0, left_bbox_2[1]-3), left_bbox_2[2]+3, left_bbox_2[3]+3)
|
||||
|
||||
new_data["2"] = {
|
||||
"x": left_bbox_2_padded[0],
|
||||
"y": left_bbox_2_padded[1],
|
||||
"w": left_bbox_2_padded[2] - left_bbox_2_padded[0],
|
||||
"h": left_bbox_2_padded[3] - left_bbox_2_padded[1],
|
||||
"reading_order": box2_data.get("reading_order", 2),
|
||||
"quad_bboxes": left_quad_bboxes_2,
|
||||
"quads": [[list(p) for p in quad] for quad in left_quads_2], # Explicit list conversion
|
||||
"text_bbox": xyxy_to_xywh(left_bbox_2),
|
||||
"line_bboxes": [],
|
||||
"line_union_bbox": xyxy_to_xywh(boxes_union_xyxy([quad_bbox(q) for q in left_quads_2])),
|
||||
"line_union_area": int(bbox_area_xyxy(boxes_union_xyxy([quad_bbox(q) for q in left_quads_2]))),
|
||||
}
|
||||
print(f" Left: y={new_data['2']['y']}, h={new_data['2']['h']}, quads={len(left_quads_2)}")
|
||||
|
||||
# Right part gets new ID 8
|
||||
right_quads_2 = [box2_data['quads'][i] for i in right_indices_2]
|
||||
right_quad_bboxes_2 = [box2_data['quad_bboxes'][i] for i in right_indices_2]
|
||||
right_bbox_2 = boxes_union_xyxy([quad_bbox(q) for q in right_quads_2])
|
||||
right_bbox_2_padded = (max(0, right_bbox_2[0]-3), max(0, right_bbox_2[1]-3), right_bbox_2[2]+3, right_bbox_2[3]+3)
|
||||
|
||||
new_data["8"] = {
|
||||
"x": right_bbox_2_padded[0],
|
||||
"y": right_bbox_2_padded[1],
|
||||
"w": right_bbox_2_padded[2] - right_bbox_2_padded[0],
|
||||
"h": right_bbox_2_padded[3] - right_bbox_2_padded[1],
|
||||
"reading_order": box2_data.get("reading_order", 8),
|
||||
"quad_bboxes": right_quad_bboxes_2,
|
||||
"quads": [[list(p) for p in quad] for quad in right_quads_2], # Explicit list conversion
|
||||
"text_bbox": xyxy_to_xywh(right_bbox_2),
|
||||
"line_bboxes": [],
|
||||
"line_union_bbox": xyxy_to_xywh(boxes_union_xyxy([quad_bbox(q) for q in right_quads_2])),
|
||||
"line_union_area": int(bbox_area_xyxy(boxes_union_xyxy([quad_bbox(q) for q in right_quads_2]))),
|
||||
}
|
||||
print(f" Right: y={new_data['8']['y']}, h={new_data['8']['h']}, quads={len(right_quads_2)}")
|
||||
|
||||
# Split Box 7
|
||||
print("\n🔀 Splitting Box 7...")
|
||||
box7_data = original["7"]
|
||||
left_indices_7 = [8, 13, 4, 11, 2, 6]
|
||||
right_indices_7 = [0, 5, 1, 3, 7, 10, 12, 9]
|
||||
|
||||
# Left part keeps ID 7
|
||||
left_quads_7 = [box7_data['quads'][i] for i in left_indices_7]
|
||||
left_quad_bboxes_7 = [box7_data['quad_bboxes'][i] for i in left_indices_7]
|
||||
left_bbox_7 = boxes_union_xyxy([quad_bbox(q) for q in left_quads_7])
|
||||
left_bbox_7_padded = (max(0, left_bbox_7[0]-3), max(0, left_bbox_7[1]-3), left_bbox_7[2]+3, left_bbox_7[3]+3)
|
||||
|
||||
new_data["7"] = {
|
||||
"x": left_bbox_7_padded[0],
|
||||
"y": left_bbox_7_padded[1],
|
||||
"w": left_bbox_7_padded[2] - left_bbox_7_padded[0],
|
||||
"h": left_bbox_7_padded[3] - left_bbox_7_padded[1],
|
||||
"reading_order": box7_data.get("reading_order", 7),
|
||||
"quad_bboxes": left_quad_bboxes_7,
|
||||
"quads": [[list(p) for p in quad] for quad in left_quads_7], # Explicit list conversion
|
||||
"text_bbox": xyxy_to_xywh(left_bbox_7),
|
||||
"line_bboxes": [],
|
||||
"line_union_bbox": xyxy_to_xywh(boxes_union_xyxy([quad_bbox(q) for q in left_quads_7])),
|
||||
"line_union_area": int(bbox_area_xyxy(boxes_union_xyxy([quad_bbox(q) for q in left_quads_7]))),
|
||||
}
|
||||
print(f" Left: y={new_data['7']['y']}, h={new_data['7']['h']}, quads={len(left_quads_7)}")
|
||||
|
||||
# Right part gets new ID 9
|
||||
right_quads_7 = [box7_data['quads'][i] for i in right_indices_7]
|
||||
right_quad_bboxes_7 = [box7_data['quad_bboxes'][i] for i in right_indices_7]
|
||||
right_bbox_7 = boxes_union_xyxy([quad_bbox(q) for q in right_quads_7])
|
||||
right_bbox_7_padded = (max(0, right_bbox_7[0]-3), max(0, right_bbox_7[1]-3), right_bbox_7[2]+3, right_bbox_7[3]+3)
|
||||
|
||||
new_data["9"] = {
|
||||
"x": right_bbox_7_padded[0],
|
||||
"y": right_bbox_7_padded[1],
|
||||
"w": right_bbox_7_padded[2] - right_bbox_7_padded[0],
|
||||
"h": right_bbox_7_padded[3] - right_bbox_7_padded[1],
|
||||
"reading_order": box7_data.get("reading_order", 9),
|
||||
"quad_bboxes": right_quad_bboxes_7,
|
||||
"quads": [[list(p) for p in quad] for quad in right_quads_7], # Explicit list conversion
|
||||
"text_bbox": xyxy_to_xywh(right_bbox_7),
|
||||
"line_bboxes": [],
|
||||
"line_union_bbox": xyxy_to_xywh(boxes_union_xyxy([quad_bbox(q) for q in right_quads_7])),
|
||||
"line_union_area": int(bbox_area_xyxy(boxes_union_xyxy([quad_bbox(q) for q in right_quads_7]))),
|
||||
}
|
||||
print(f" Right: y={new_data['9']['y']}, h={new_data['9']['h']}, quads={len(right_quads_7)}")
|
||||
|
||||
# Sort by ID for output
|
||||
new_data_sorted = {}
|
||||
for bid in sorted([int(k) for k in new_data.keys()]):
|
||||
new_data_sorted[str(bid)] = new_data[str(bid)]
|
||||
|
||||
with open("bubbles.json", "w", encoding="utf-8") as f:
|
||||
json.dump(new_data_sorted, f, indent=2, ensure_ascii=False)
|
||||
|
||||
print(f"\n✅ Done! Saved {len(new_data_sorted)} bubbles to bubbles.json")
|
||||
75
older-code/test_panel_split.py
Normal file
75
older-code/test_panel_split.py
Normal file
@@ -0,0 +1,75 @@
|
||||
#!/usr/bin/env python3
|
||||
import sys
|
||||
sys.path.insert(0, '/Users/guillemhernandezsola/code/manga-translator')
|
||||
|
||||
import cv2
|
||||
import numpy as np
|
||||
import importlib.util
|
||||
|
||||
spec = importlib.util.spec_from_file_location("manga_translator", "/Users/guillemhernandezsola/code/manga-translator/manga-translator.py")
|
||||
mt = importlib.util.module_from_spec(spec)
|
||||
spec.loader.exec_module(mt)
|
||||
|
||||
image_path = '004.png'
|
||||
detector = mt.MacVisionDetector(source_lang='en')
|
||||
raw = detector.read(image_path)
|
||||
image = cv2.imread(image_path)
|
||||
|
||||
# Full filtering
|
||||
filtered = []
|
||||
for bbox, text, conf in raw:
|
||||
t = mt.normalize_text(text)
|
||||
qb = mt.quad_bbox(bbox)
|
||||
|
||||
if conf < 0.12 or len(t) < 1 or mt.is_noise_text(t) or mt.is_sound_effect(t) or mt.is_title_text(t):
|
||||
continue
|
||||
if qb[1] < int(image.shape[0] * mt.TOP_BAND_RATIO):
|
||||
if conf < 0.70 and len(t) >= 5:
|
||||
continue
|
||||
|
||||
filtered.append((bbox, t, conf))
|
||||
|
||||
# Get grouping
|
||||
bubbles, bubble_boxes, bubble_quads, bubble_indices = mt.group_tokens(
|
||||
filtered, image.shape, gap_px=mt.auto_gap(image_path), bbox_padding=3
|
||||
)
|
||||
|
||||
print("=== TESTING PANEL SPLIT ON BUBBLE 7 ===\n")
|
||||
|
||||
bid = 7
|
||||
box = bubble_boxes[bid]
|
||||
print(f"Bubble {bid} box: {box}")
|
||||
print(f"Bubble {bid} quads: {len(bubble_quads[bid])}")
|
||||
print(f"Bubble {bid} indices: {len(bubble_indices[bid])}")
|
||||
|
||||
# Test split_panel_box
|
||||
split_result = mt.split_panel_box(image, box, bubble_quads=bubble_quads[bid])
|
||||
|
||||
if split_result:
|
||||
box_left, box_right, split_x = split_result
|
||||
print(f"\n✓ Panel split detected!")
|
||||
print(f" Split X: {split_x}")
|
||||
print(f" Left box: {box_left}")
|
||||
print(f" Right box: {box_right}")
|
||||
|
||||
# Simulate index split
|
||||
left_idxs, right_idxs = [], []
|
||||
for idx in bubble_indices[bid]:
|
||||
cx, cy = mt.quad_center(filtered[idx][0])
|
||||
if cx < split_x:
|
||||
left_idxs.append(idx)
|
||||
else:
|
||||
right_idxs.append(idx)
|
||||
|
||||
print(f"\n Left indices ({len(left_idxs)}): {left_idxs}")
|
||||
print(f" Right indices ({len(right_idxs)}): {right_idxs}")
|
||||
|
||||
if left_idxs and right_idxs:
|
||||
print(f"\n✓ Split is valid (both sides have content)")
|
||||
else:
|
||||
print(f"\n✗ Split is invalid (one side is empty)")
|
||||
else:
|
||||
print(f"\n✗ No panel split detected")
|
||||
print(f" Threshold would be: quads >= 10? {len(bubble_quads[bid]) >= 10}")
|
||||
print(f" Width >= 50? {box[2] - box[0] >= 50}")
|
||||
print(f" Height >= 50? {box[3] - box[1] >= 50}")
|
||||
159
pipeline-render.py
Normal file
159
pipeline-render.py
Normal file
@@ -0,0 +1,159 @@
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
pipeline_render.py
|
||||
───────────────────────────────────────────────────────────────
|
||||
Standalone Rendering Pipeline
|
||||
|
||||
Usage:
|
||||
python pipeline-render.py /path/to/chapter/folder
|
||||
"""
|
||||
|
||||
import os
|
||||
import sys
|
||||
import argparse
|
||||
import zipfile
|
||||
import importlib.util
|
||||
from pathlib import Path
|
||||
import cv2 # ✅ Added OpenCV to load the image
|
||||
|
||||
# ─────────────────────────────────────────────
|
||||
# CONFIG
|
||||
# ─────────────────────────────────────────────
|
||||
DEFAULT_FONT_PATH = "fonts/ComicNeue-Regular.ttf"
|
||||
|
||||
# ─────────────────────────────────────────────
|
||||
# DYNAMIC MODULE LOADER
|
||||
# ─────────────────────────────────────────────
|
||||
def load_module(name, filepath):
|
||||
spec = importlib.util.spec_from_file_location(name, filepath)
|
||||
if spec is None or spec.loader is None:
|
||||
raise FileNotFoundError(f"Cannot load spec for {filepath}")
|
||||
module = importlib.util.module_from_spec(spec)
|
||||
spec.loader.exec_module(module)
|
||||
return module
|
||||
|
||||
# ─────────────────────────────────────────────
|
||||
# HELPERS
|
||||
# ─────────────────────────────────────────────
|
||||
def sorted_pages(chapter_dir):
|
||||
exts = {".jpg", ".jpeg", ".png", ".webp"}
|
||||
pages = [
|
||||
p for p in Path(chapter_dir).iterdir()
|
||||
if p.is_file() and p.suffix.lower() in exts
|
||||
]
|
||||
return sorted(pages, key=lambda p: p.stem)
|
||||
|
||||
def pack_rendered_cbz(chapter_dir, output_cbz, rendered_files):
|
||||
if not rendered_files:
|
||||
print("⚠️ No rendered pages found — CBZ not created.")
|
||||
return
|
||||
|
||||
with zipfile.ZipFile(output_cbz, "w", compression=zipfile.ZIP_STORED) as zf:
|
||||
for rp in rendered_files:
|
||||
arcname = rp.name
|
||||
zf.write(rp, arcname)
|
||||
|
||||
print(f"\n✅ Rendered CBZ saved → {output_cbz}")
|
||||
print(f"📦 Contains: {len(rendered_files)} translated pages ready to read.")
|
||||
|
||||
# ─────────────────────────────────────────────
|
||||
# PER-PAGE PIPELINE
|
||||
# ─────────────────────────────────────────────
|
||||
def process_render(page_path, workdir, renderer_module, font_path):
|
||||
print(f"\n{'─' * 70}")
|
||||
print(f"🎨 RENDERING: {page_path.name}")
|
||||
print(f"{'─' * 70}")
|
||||
|
||||
txt_path = workdir / "output.txt"
|
||||
json_path = workdir / "bubbles.json"
|
||||
out_img = workdir / page_path.name
|
||||
|
||||
if not txt_path.exists() or not json_path.exists():
|
||||
print(" ⚠️ Missing output.txt or bubbles.json. Did you run the OCR pipeline first?")
|
||||
return None
|
||||
|
||||
# ✅ FIX: Load the image into memory (as a NumPy array) before passing it
|
||||
img_array = cv2.imread(str(page_path.resolve()))
|
||||
if img_array is None:
|
||||
print(f" ❌ Failed to load image: {page_path.name}")
|
||||
return None
|
||||
|
||||
orig_dir = os.getcwd()
|
||||
try:
|
||||
os.chdir(workdir)
|
||||
|
||||
# Pass the loaded image array instead of the string path
|
||||
renderer_module.render_translations(
|
||||
img_array, # 1st arg: Image Data (NumPy array)
|
||||
str(out_img.resolve()), # 2nd arg: Output image path
|
||||
str(txt_path.resolve()), # 3rd arg: Translations text
|
||||
str(json_path.resolve()), # 4th arg: Bubbles JSON
|
||||
font_path # 5th arg: Font Path
|
||||
)
|
||||
print(" ✅ Render complete")
|
||||
return out_img
|
||||
|
||||
except Exception as e:
|
||||
print(f" ❌ Failed: {e}")
|
||||
return None
|
||||
|
||||
finally:
|
||||
os.chdir(orig_dir)
|
||||
|
||||
# ─────────────────────────────────────────────
|
||||
# MAIN
|
||||
# ─────────────────────────────────────────────
|
||||
def main():
|
||||
parser = argparse.ArgumentParser(description="Manga Rendering Pipeline")
|
||||
parser.add_argument("chapter_dir", help="Path to the folder containing original manga pages")
|
||||
args = parser.parse_args()
|
||||
|
||||
chapter_dir = Path(args.chapter_dir).resolve()
|
||||
output_cbz = chapter_dir.parent / f"{chapter_dir.name}_rendered.cbz"
|
||||
|
||||
script_dir = Path(__file__).parent
|
||||
absolute_font_path = str((script_dir / DEFAULT_FONT_PATH).resolve())
|
||||
|
||||
print("Loading renderer module...")
|
||||
try:
|
||||
renderer = load_module("manga_renderer", str(script_dir / "manga-renderer.py"))
|
||||
except Exception as e:
|
||||
print(f"❌ Could not load manga-renderer.py: {e}")
|
||||
sys.exit(1)
|
||||
|
||||
pages = sorted_pages(chapter_dir)
|
||||
if not pages:
|
||||
print(f"❌ No images found in: {chapter_dir}")
|
||||
sys.exit(1)
|
||||
|
||||
print(f"\n📖 Chapter : {chapter_dir}")
|
||||
print(f" Pages : {len(pages)}\n")
|
||||
|
||||
succeeded, failed = [], []
|
||||
rendered_files = []
|
||||
|
||||
for i, page_path in enumerate(pages, start=1):
|
||||
print(f"[{i}/{len(pages)}] Checking data for {page_path.name}...")
|
||||
workdir = Path(chapter_dir) / "translated" / page_path.stem
|
||||
|
||||
out_file = process_render(page_path, workdir, renderer, absolute_font_path)
|
||||
if out_file:
|
||||
succeeded.append(page_path.name)
|
||||
rendered_files.append(out_file)
|
||||
else:
|
||||
failed.append(page_path.name)
|
||||
|
||||
print(f"\n{'═' * 70}")
|
||||
print("RENDER PIPELINE COMPLETE")
|
||||
print(f"✅ {len(succeeded)} page(s) rendered successfully")
|
||||
if failed:
|
||||
print(f"❌ {len(failed)} page(s) skipped or failed:")
|
||||
for f in failed:
|
||||
print(f" • {f}")
|
||||
print(f"{'═' * 70}\n")
|
||||
|
||||
print("Packing final CBZ...")
|
||||
pack_rendered_cbz(chapter_dir, output_cbz, rendered_files)
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
282
pipeline-translator.py
Normal file
282
pipeline-translator.py
Normal file
@@ -0,0 +1,282 @@
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
pipeline-translator.py
|
||||
───────────────────────────────────────────────────────────────
|
||||
Translation OCR pipeline (Batch Processing Only)
|
||||
|
||||
Usage:
|
||||
python pipeline-translator.py /path/to/chapter/folder
|
||||
"""
|
||||
|
||||
import os
|
||||
import sys
|
||||
import argparse
|
||||
import importlib.util
|
||||
from pathlib import Path
|
||||
|
||||
# ─────────────────────────────────────────────────────────────
|
||||
# PIPELINE CONFIGURATION
|
||||
# Maps to the process_manga_page() signature in manga-translator.py
|
||||
# ─────────────────────────────────────────────────────────────
|
||||
PIPELINE_CONFIG = dict(
|
||||
source_lang = "en",
|
||||
target_lang = "ca",
|
||||
)
|
||||
|
||||
# ─────────────────────────────────────────────────────────────
|
||||
# DYNAMIC MODULE LOADER
|
||||
# ─────────────────────────────────────────────────────────────
|
||||
def load_module(name: str, filepath: str):
|
||||
spec = importlib.util.spec_from_file_location(name, filepath)
|
||||
if spec is None or spec.loader is None:
|
||||
raise FileNotFoundError(f"Cannot load spec for {filepath}")
|
||||
module = importlib.util.module_from_spec(spec)
|
||||
spec.loader.exec_module(module)
|
||||
return module
|
||||
|
||||
|
||||
# ─────────────────────────────────────────────────────────────
|
||||
# HELPERS
|
||||
# ─────────────────────────────────────────────────────────────
|
||||
def sorted_pages(chapter_dir: Path):
|
||||
exts = {".jpg", ".jpeg", ".png", ".webp"}
|
||||
pages = [
|
||||
p for p in chapter_dir.iterdir()
|
||||
if p.is_file() and p.suffix.lower() in exts
|
||||
]
|
||||
return sorted(pages, key=lambda p: p.stem)
|
||||
|
||||
|
||||
def make_page_workdir(chapter_dir: Path, page_stem: str) -> Path:
|
||||
workdir = chapter_dir / "translated" / page_stem
|
||||
workdir.mkdir(parents=True, exist_ok=True)
|
||||
return workdir
|
||||
|
||||
|
||||
def verify_translator_api(module) -> bool:
|
||||
"""
|
||||
Checks that the loaded module exposes process_manga_page()
|
||||
and that it accepts all keys defined in PIPELINE_CONFIG.
|
||||
Prints a warning for any missing parameter so mismatches are
|
||||
caught immediately rather than silently falling back to defaults.
|
||||
"""
|
||||
import inspect
|
||||
|
||||
fn = getattr(module, "process_manga_page", None)
|
||||
if fn is None:
|
||||
print("❌ manga-translator.py does not expose process_manga_page()")
|
||||
return False
|
||||
|
||||
sig = inspect.signature(fn)
|
||||
params = set(sig.parameters.keys())
|
||||
ok = True
|
||||
|
||||
for key in PIPELINE_CONFIG:
|
||||
if key not in params:
|
||||
print(
|
||||
f"⚠️ PIPELINE_CONFIG key '{key}' not found in "
|
||||
f"process_manga_page() — update pipeline or translator."
|
||||
)
|
||||
ok = False
|
||||
|
||||
return ok
|
||||
|
||||
|
||||
# ─────────────────────────────────────────────────────────────
|
||||
# PER-PAGE PIPELINE
|
||||
# ─────────────────────────────────────────────────────────────
|
||||
def process_page(page_path: Path, workdir: Path, translator_module) -> bool:
|
||||
print(f"\n{'─' * 70}")
|
||||
print(f" PAGE : {page_path.name}")
|
||||
print(f"{'─' * 70}")
|
||||
|
||||
orig_dir = os.getcwd()
|
||||
try:
|
||||
# Run inside the page's own workdir so debug images and
|
||||
# output files land there automatically.
|
||||
os.chdir(workdir)
|
||||
|
||||
output_json = str(workdir / "bubbles.json")
|
||||
output_txt = str(workdir / "output.txt")
|
||||
debug_path = str(workdir / "debug_clusters.png")
|
||||
|
||||
print(" ⏳ Extracting text and translating...")
|
||||
|
||||
results = translator_module.process_manga_page(
|
||||
image_path = str(page_path.resolve()),
|
||||
output_json = output_json,
|
||||
output_txt = output_txt,
|
||||
**PIPELINE_CONFIG,
|
||||
)
|
||||
|
||||
# ── Optional debug visualisation ─────────────────────
|
||||
if results:
|
||||
try:
|
||||
import cv2
|
||||
|
||||
image_bgr = cv2.imread(str(page_path.resolve()))
|
||||
if image_bgr is not None:
|
||||
# Reconstruct vis_boxes / vis_lines from results dict
|
||||
vis_boxes = {}
|
||||
vis_lines = {}
|
||||
vis_indices = {}
|
||||
|
||||
for bid_str, data in results.items():
|
||||
bid = int(bid_str)
|
||||
xywh = data["box"]
|
||||
vis_boxes[bid] = (
|
||||
xywh["x"],
|
||||
xywh["y"],
|
||||
xywh["x"] + xywh["w"],
|
||||
xywh["y"] + xywh["h"],
|
||||
)
|
||||
vis_lines[bid] = data.get("lines", [])
|
||||
vis_indices[bid] = []
|
||||
|
||||
translator_module.draw_debug_clusters(
|
||||
image_bgr = image_bgr,
|
||||
out_boxes = vis_boxes,
|
||||
out_lines = vis_lines,
|
||||
out_indices = vis_indices,
|
||||
ocr = [],
|
||||
save_path = debug_path,
|
||||
)
|
||||
except Exception as e:
|
||||
print(f" ⚠️ Debug visualisation failed (non-fatal): {e}")
|
||||
|
||||
# ── Sanity-check outputs ──────────────────────────────
|
||||
for fname in ("output.txt", "bubbles.json"):
|
||||
fpath = workdir / fname
|
||||
if not fpath.exists() or fpath.stat().st_size == 0:
|
||||
print(f" ⚠️ {fname} is missing or empty after processing.")
|
||||
|
||||
if not results:
|
||||
print(" ⚠️ process_manga_page() returned no results.")
|
||||
return False
|
||||
|
||||
print(f" ✅ Done — {len(results)} box(es) processed.")
|
||||
return True
|
||||
|
||||
except Exception as e:
|
||||
import traceback
|
||||
print(f" ❌ Failed: {e}")
|
||||
traceback.print_exc()
|
||||
return False
|
||||
|
||||
finally:
|
||||
os.chdir(orig_dir)
|
||||
|
||||
|
||||
# ─────────────────────────────────────────────────────────────
|
||||
# MAIN
|
||||
# ─────────────────────────────────────────────────────────────
|
||||
def main():
|
||||
parser = argparse.ArgumentParser(
|
||||
description="Manga Translation OCR Batch Pipeline"
|
||||
)
|
||||
parser.add_argument(
|
||||
"chapter_dir",
|
||||
help="Path to the folder containing manga page images"
|
||||
)
|
||||
parser.add_argument(
|
||||
"--start", type=int, default=1,
|
||||
help="Start from this page number (1-based, default: 1)"
|
||||
)
|
||||
parser.add_argument(
|
||||
"--end", type=int, default=None,
|
||||
help="Stop after this page number inclusive (default: all)"
|
||||
)
|
||||
parser.add_argument(
|
||||
"--source", "-s", default=None,
|
||||
help=f"Override source language (default: {PIPELINE_CONFIG['source_lang']})"
|
||||
)
|
||||
parser.add_argument(
|
||||
"--target", "-t", default=None,
|
||||
help=f"Override target language (default: {PIPELINE_CONFIG['target_lang']})"
|
||||
)
|
||||
args = parser.parse_args()
|
||||
|
||||
# Allow CLI overrides of source/target without touching PIPELINE_CONFIG
|
||||
config = dict(PIPELINE_CONFIG)
|
||||
if args.source:
|
||||
config["source_lang"] = args.source
|
||||
if args.target:
|
||||
config["target_lang"] = args.target
|
||||
|
||||
# Patch PIPELINE_CONFIG in-place so process_page() picks up overrides
|
||||
PIPELINE_CONFIG.update(config)
|
||||
|
||||
chapter_dir = Path(args.chapter_dir).resolve()
|
||||
if not chapter_dir.is_dir():
|
||||
print(f"❌ Not a directory: {chapter_dir}")
|
||||
sys.exit(1)
|
||||
|
||||
# ── Load translator module ────────────────────────────────
|
||||
script_dir = Path(__file__).parent
|
||||
module_path = script_dir / "manga-translator.py"
|
||||
|
||||
if not module_path.exists():
|
||||
print(f"❌ manga-translator.py not found in {script_dir}")
|
||||
sys.exit(1)
|
||||
|
||||
print(f"📦 Loading translator from: {module_path}")
|
||||
try:
|
||||
translator = load_module("manga_translator", str(module_path))
|
||||
except Exception as e:
|
||||
print(f"❌ Could not load manga-translator.py: {e}")
|
||||
sys.exit(1)
|
||||
|
||||
# ── API compatibility check ───────────────────────────────
|
||||
if not verify_translator_api(translator):
|
||||
print("❌ Aborting — fix the parameter mismatch above first.")
|
||||
sys.exit(1)
|
||||
|
||||
# ── Discover pages ────────────────────────────────────────
|
||||
all_pages = sorted_pages(chapter_dir)
|
||||
if not all_pages:
|
||||
print(f"❌ No images found in: {chapter_dir}")
|
||||
sys.exit(1)
|
||||
|
||||
# Apply --start / --end slice (1-based, inclusive)
|
||||
start_idx = max(0, args.start - 1)
|
||||
end_idx = args.end if args.end is not None else len(all_pages)
|
||||
pages = all_pages[start_idx:end_idx]
|
||||
|
||||
if not pages:
|
||||
print(f"❌ No pages in range [{args.start}, {args.end}]")
|
||||
sys.exit(1)
|
||||
|
||||
# ── Summary header ────────────────────────────────────────
|
||||
print(f"\n{'═' * 70}")
|
||||
print(f" 📖 Chapter : {chapter_dir.name}")
|
||||
print(f" 📄 Pages : {len(pages)} "
|
||||
f"(of {len(all_pages)} total, "
|
||||
f"range {args.start}–{end_idx})")
|
||||
print(f" 🌐 Lang : {PIPELINE_CONFIG['source_lang']} → "
|
||||
f"{PIPELINE_CONFIG['target_lang']}")
|
||||
print(f"{'═' * 70}\n")
|
||||
|
||||
succeeded, failed = [], []
|
||||
|
||||
for i, page_path in enumerate(pages, start=1):
|
||||
print(f"[{i}/{len(pages)}] {page_path.name}")
|
||||
workdir = make_page_workdir(chapter_dir, page_path.stem)
|
||||
|
||||
if process_page(page_path, workdir, translator):
|
||||
succeeded.append(page_path.name)
|
||||
else:
|
||||
failed.append(page_path.name)
|
||||
|
||||
# ── Final report ──────────────────────────────────────────
|
||||
print(f"\n{'═' * 70}")
|
||||
print(" PIPELINE COMPLETE")
|
||||
print(f" ✅ {len(succeeded)} page(s) succeeded")
|
||||
if failed:
|
||||
print(f" ❌ {len(failed)} page(s) failed:")
|
||||
for name in failed:
|
||||
print(f" • {name}")
|
||||
print(f"{'═' * 70}\n")
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
255
pipeline.py
255
pipeline.py
@@ -1,255 +0,0 @@
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
pipeline.py
|
||||
───────────────────────────────────────────────────────────────
|
||||
Translation + render pipeline
|
||||
|
||||
Flow per page:
|
||||
1) translate_manga_text() -> output.txt + bubbles.json (+ debug_clusters.png if DEBUG)
|
||||
2) render_translations() -> page_translated.png
|
||||
3) Pack CBZ with originals + rendered pages + text outputs
|
||||
|
||||
Folder structure:
|
||||
<CHAPTER_DIR>/
|
||||
├── 000.png
|
||||
├── 001.png
|
||||
└── translated/
|
||||
├── 000/
|
||||
│ ├── output.txt
|
||||
│ ├── bubbles.json
|
||||
│ ├── page_translated.png
|
||||
│ └── debug_clusters.png (optional)
|
||||
├── 001/
|
||||
│ └── ...
|
||||
└── ...
|
||||
|
||||
CBZ:
|
||||
- pages/<original pages>
|
||||
- rendered/<page_stem>_translated.png
|
||||
- translations/<page_stem>_output.txt
|
||||
"""
|
||||
|
||||
import os
|
||||
import sys
|
||||
import zipfile
|
||||
import importlib.util
|
||||
from pathlib import Path
|
||||
|
||||
|
||||
# ─────────────────────────────────────────────
|
||||
# CONFIG
|
||||
# ─────────────────────────────────────────────
|
||||
CHAPTER_DIR = "/Users/guillemhernandezsola/Downloads/Dandadan_059_2022_Digital_1r0n"
|
||||
OUTPUT_CBZ = "/Users/guillemhernandezsola/Downloads/Dandadan_059_2022_Digital_1r0n_translated.cbz"
|
||||
|
||||
SOURCE_LANG = "en"
|
||||
TARGET_LANG = "ca"
|
||||
|
||||
# translator (NEW signature-compatible)
|
||||
CONFIDENCE_THRESHOLD = 0.10
|
||||
MIN_TEXT_LENGTH = 1
|
||||
GAP_PX = "auto" # was cluster/proximity in old version
|
||||
FILTER_SFX = True
|
||||
QUALITY_THRESHOLD = 0.50
|
||||
READING_MODE = "ltr"
|
||||
DEBUG = True
|
||||
|
||||
# renderer
|
||||
RENDER_ENABLED = True
|
||||
RENDER_OUTPUT_NAME = "page_translated.png"
|
||||
|
||||
# optional custom font list for renderer
|
||||
FONT_CANDIDATES = [
|
||||
"fonts/ComicNeue-Regular.ttf",
|
||||
"fonts/ComicRelief-Regular.ttf"
|
||||
]
|
||||
|
||||
|
||||
# ─────────────────────────────────────────────
|
||||
# DYNAMIC MODULE LOADER
|
||||
# ─────────────────────────────────────────────
|
||||
def load_module(name, filepath):
|
||||
spec = importlib.util.spec_from_file_location(name, filepath)
|
||||
if spec is None or spec.loader is None:
|
||||
raise FileNotFoundError(f"Cannot load spec for {filepath}")
|
||||
module = importlib.util.module_from_spec(spec)
|
||||
spec.loader.exec_module(module)
|
||||
return module
|
||||
|
||||
|
||||
# ─────────────────────────────────────────────
|
||||
# HELPERS
|
||||
# ─────────────────────────────────────────────
|
||||
def sorted_pages(chapter_dir):
|
||||
exts = {".jpg", ".jpeg", ".png", ".webp"}
|
||||
pages = [
|
||||
p for p in Path(chapter_dir).iterdir()
|
||||
if p.is_file() and p.suffix.lower() in exts
|
||||
]
|
||||
return sorted(pages, key=lambda p: p.stem)
|
||||
|
||||
|
||||
def make_page_workdir(chapter_dir, page_stem):
|
||||
workdir = Path(chapter_dir) / "translated" / page_stem
|
||||
workdir.mkdir(parents=True, exist_ok=True)
|
||||
return workdir
|
||||
|
||||
|
||||
def pack_cbz(chapter_dir, translated_dir, output_cbz):
|
||||
exts = {".jpg", ".jpeg", ".png", ".webp"}
|
||||
|
||||
pages = sorted(
|
||||
[p for p in Path(chapter_dir).iterdir()
|
||||
if p.is_file() and p.suffix.lower() in exts],
|
||||
key=lambda p: p.stem
|
||||
)
|
||||
|
||||
txts = sorted(
|
||||
translated_dir.rglob("output.txt"),
|
||||
key=lambda p: p.parent.name
|
||||
)
|
||||
|
||||
rendered = sorted(
|
||||
translated_dir.rglob(RENDER_OUTPUT_NAME),
|
||||
key=lambda p: p.parent.name
|
||||
)
|
||||
|
||||
if not pages:
|
||||
print("⚠️ No original pages found — CBZ not created.")
|
||||
return
|
||||
|
||||
with zipfile.ZipFile(output_cbz, "w", compression=zipfile.ZIP_STORED) as zf:
|
||||
# original pages
|
||||
for img in pages:
|
||||
arcname = f"pages/{img.name}"
|
||||
zf.write(img, arcname)
|
||||
print(f" 🖼 {arcname}")
|
||||
|
||||
# rendered pages
|
||||
for rp in rendered:
|
||||
arcname = f"rendered/{rp.parent.name}_translated.png"
|
||||
zf.write(rp, arcname)
|
||||
print(f" 🎨 {arcname}")
|
||||
|
||||
# text outputs
|
||||
for txt in txts:
|
||||
arcname = f"translations/{txt.parent.name}_output.txt"
|
||||
zf.write(txt, arcname)
|
||||
print(f" 📄 {arcname}")
|
||||
|
||||
print(
|
||||
f"\n✅ CBZ saved → {output_cbz} "
|
||||
f"({len(pages)} original, {len(rendered)} rendered, {len(txts)} text)"
|
||||
)
|
||||
|
||||
|
||||
# ─────────────────────────────────────────────
|
||||
# PER-PAGE PIPELINE
|
||||
# ─────────────────────────────────────────────
|
||||
def process_page(page_path, workdir, translator_module, renderer_module):
|
||||
"""
|
||||
Runs translator + renderer for one page.
|
||||
All generated files are written inside workdir.
|
||||
"""
|
||||
print(f"\n{'─' * 70}")
|
||||
print(f"PAGE: {page_path.name}")
|
||||
print(f"{'─' * 70}")
|
||||
|
||||
orig_dir = os.getcwd()
|
||||
try:
|
||||
os.chdir(workdir)
|
||||
|
||||
# 1) translate
|
||||
translator_module.translate_manga_text(
|
||||
image_path= str(page_path.resolve()),
|
||||
source_lang=SOURCE_LANG,
|
||||
target_lang=TARGET_LANG,
|
||||
confidence_threshold=CONFIDENCE_THRESHOLD,
|
||||
min_text_length=MIN_TEXT_LENGTH,
|
||||
gap_px=GAP_PX,
|
||||
filter_sound_effects=FILTER_SFX,
|
||||
quality_threshold=QUALITY_THRESHOLD,
|
||||
export_to_file="output.txt",
|
||||
export_bubbles_to="bubbles.json",
|
||||
reading_mode=READING_MODE,
|
||||
debug=DEBUG
|
||||
)
|
||||
print(" ✅ translator done")
|
||||
|
||||
# 2) render
|
||||
if RENDER_ENABLED:
|
||||
renderer_module.render_translations(
|
||||
input_image=str(page_path.resolve()),
|
||||
output_image=RENDER_OUTPUT_NAME,
|
||||
translations_file="output.txt",
|
||||
bubbles_file="bubbles.json",
|
||||
font_candidates=FONT_CANDIDATES
|
||||
)
|
||||
print(" ✅ renderer done")
|
||||
|
||||
return True
|
||||
|
||||
except Exception as e:
|
||||
print(f" ❌ Failed: {e}")
|
||||
return False
|
||||
|
||||
finally:
|
||||
os.chdir(orig_dir)
|
||||
|
||||
|
||||
# ─────────────────────────────────────────────
|
||||
# MAIN
|
||||
# ─────────────────────────────────────────────
|
||||
def main():
|
||||
print("Loading modules...")
|
||||
|
||||
try:
|
||||
translator = load_module("manga_translator", "manga-translator.py")
|
||||
except Exception as e:
|
||||
print(f"❌ Could not load manga-translator.py: {e}")
|
||||
sys.exit(1)
|
||||
|
||||
try:
|
||||
renderer = load_module("manga_renderer", "manga-renderer.py")
|
||||
except Exception as e:
|
||||
print(f"❌ Could not load manga-renderer.py: {e}")
|
||||
sys.exit(1)
|
||||
|
||||
pages = sorted_pages(CHAPTER_DIR)
|
||||
if not pages:
|
||||
print(f"❌ No images found in: {CHAPTER_DIR}")
|
||||
sys.exit(1)
|
||||
|
||||
print(f"\n📖 Chapter : {CHAPTER_DIR}")
|
||||
print(f" Pages : {len(pages)}")
|
||||
print(f" Source : {SOURCE_LANG} → Target: {TARGET_LANG}")
|
||||
print(f" Render : {'ON' if RENDER_ENABLED else 'OFF'}\n")
|
||||
|
||||
translated_dir = Path(CHAPTER_DIR) / "translated"
|
||||
succeeded = []
|
||||
failed = []
|
||||
|
||||
for i, page_path in enumerate(pages, start=1):
|
||||
print(f"[{i}/{len(pages)}] {page_path.name}")
|
||||
workdir = make_page_workdir(CHAPTER_DIR, page_path.stem)
|
||||
ok = process_page(page_path, workdir, translator, renderer)
|
||||
if ok:
|
||||
succeeded.append(page_path.name)
|
||||
else:
|
||||
failed.append(page_path.name)
|
||||
|
||||
print(f"\n{'═' * 70}")
|
||||
print("PIPELINE COMPLETE")
|
||||
print(f"✅ {len(succeeded)} page(s) succeeded")
|
||||
if failed:
|
||||
print(f"❌ {len(failed)} page(s) failed:")
|
||||
for f in failed:
|
||||
print(f" • {f}")
|
||||
print(f"{'═' * 70}\n")
|
||||
|
||||
print("Packing CBZ...")
|
||||
pack_cbz(CHAPTER_DIR, translated_dir, OUTPUT_CBZ)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
@@ -1,19 +0,0 @@
|
||||
# ─────────────────────────────────────────────
|
||||
# manga-translator + manga-renderer
|
||||
# Python >= 3.9 recommended
|
||||
# ─────────────────────────────────────────────
|
||||
|
||||
# Computer vision + image processing
|
||||
opencv-python>=4.8.0
|
||||
numpy>=1.24.0
|
||||
Pillow>=10.0.0
|
||||
|
||||
# OCR engine (manga-translator)
|
||||
manga-ocr>=0.1.8
|
||||
|
||||
# Translation (manga-translator)
|
||||
deep-translator>=1.11.0
|
||||
|
||||
# HTTP / file handling used internally by manga-ocr
|
||||
requests>=2.31.0
|
||||
|
||||
Reference in New Issue
Block a user