Compare commits

..

23 Commits

Author SHA1 Message Date
Guillem Hernandez Sola
037dadd920 Added fixes 2026-04-22 18:01:29 +02:00
Guillem Hernandez Sola
285e9ca393 Cleaning 2026-04-22 16:28:10 +02:00
Guillem Hernandez Sola
d77db83cfe Everything 2026-04-22 16:27:56 +02:00
Guillem Hernandez Sola
b730037a06 Added big stuff 2026-04-22 16:18:59 +02:00
Guillem Hernandez Sola
7837aeaa9b Added fixes 2026-04-22 14:05:25 +02:00
Guillem Hernandez Sola
455b4ad82c starting point 2026-04-22 11:49:25 +02:00
Guillem Hernandez Sola
b6b0df4774 Added stuff 2026-04-22 10:51:57 +02:00
Guillem Hernandez Sola
512bb32f66 Added all 2026-04-21 23:27:56 +02:00
Guillem Hernandez Sola
494631c967 Some fixes running 2026-04-21 23:03:17 +02:00
Guillem Hernandez Sola
27a3e6f98a Added some changes2 2026-04-21 22:43:17 +02:00
Guillem Hernandez Sola
f00647e668 Added new styles 2026-04-21 21:45:46 +02:00
Guillem Hernandez Sola
a5c81f4ff0 Added new styles 2026-04-21 21:27:22 +02:00
Guillem Hernandez Sola
f56ee49abf Added all 2026-04-21 21:03:35 +02:00
Guillem Hernandez Sola
ba5f001e75 ADded pipeline-translator.py 2026-04-21 20:09:11 +02:00
Guillem Hernandez Sola
2fb5e9eb7b Added improvements 2026-04-21 19:51:59 +02:00
Guillem Hernandez Sola
dfa52f54eb Added new rendered 2026-04-21 18:53:34 +02:00
Guillem Hernandez Sola
bd475d8f01 Added all 2026-04-21 17:34:10 +02:00
Guillem Hernandez Sola
f753a78ba4 Split 2026-04-21 17:12:32 +02:00
Guillem Hernandez Sola
3800f6cf3f Added bubble split 2026-04-20 19:02:24 +02:00
Guillem Hernandez Sola
beb8557e19 Started pipelines, render not working 2026-04-16 21:17:00 +02:00
Guillem Hernandez Sola
39765a6cf1 Added pipeline 2026-04-16 19:58:05 +02:00
Guillem Hernandez Sola
5aa79d986a First beta 2 2026-04-15 21:41:01 +02:00
Guillem Hernandez Sola
dd1cf54f86 First beta 2026-04-15 21:12:41 +02:00
28 changed files with 4459 additions and 1645 deletions

6
.gitignore vendored
View File

@@ -9,6 +9,11 @@
.venv311/
#Folders to test
Spy_x_Family_076/
Dandadan_059/
Lv999/
# Icon must end with two \r
Icon
@@ -23,6 +28,7 @@ Icon
*.jpg
*.jpeg
*.json
*.webp
# Files that might appear in the root of a volume
.DocumentRevisions-V100

View File

@@ -0,0 +1,53 @@
# Manga Translator OCR Pipeline
A robust manga/comic OCR + translation pipeline with:
- EasyOCR (default, reliable on macOS M1)
- Optional PaddleOCR (auto-fallback if unavailable)
- Bubble clustering and line-level boxes
- Robust reread pass (multi-preprocessing + slight rotation)
- Translation export + debug overlays
---
## ✨ Features
- OCR from raw manga pages
- Noise filtering (`BOX` debug artifacts, tiny garbage tokens, symbols)
- Speech bubble grouping
- Reading order estimation (`ltr` / `rtl`)
- Translation output (`output.txt`)
- Structured bubble metadata (`bubbles.json`)
- Visual debug output (`debug_clusters.png`)
---
## 🧰 Requirements
- macOS (Apple Silicon supported)
- Python **3.11** recommended
- Homebrew (for Python install)
---
## 🚀 Setup (Python 3.11 venv)
```bash
cd /path/to/manga-translator
# 1) Create venv with 3.11
/opt/homebrew/bin/python3.11 -m venv venv
# 2) Activate
source venv/bin/activate
# 3) Verify interpreter
python -V
# expected: Python 3.11.x
# 4) Install dependencies
python -m pip install --upgrade pip setuptools wheel
python -m pip install -r requirements.txt
# Optional Paddle runtime
python -m pip install paddlepaddle || true

BIN
fonts/ComicNeue-Bold.ttf Executable file

Binary file not shown.

Binary file not shown.

BIN
fonts/ComicRelief-Bold.ttf Executable file

Binary file not shown.

Binary file not shown.

BIN
fonts/Komika.ttf Normal file

Binary file not shown.

Binary file not shown.

BIN
fonts/animeace2_bld.ttf Normal file

Binary file not shown.

BIN
fonts/animeace2_ital.ttf Normal file

Binary file not shown.

BIN
fonts/animeace2_reg.ttf Normal file

Binary file not shown.

View File

@@ -1,509 +1,357 @@
import os
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
manga-renderer.py
Inputs: 001.jpg + bubbles.json + output_001.txt
Output: translated_page_001.png
Strategy:
1. For every bubble, white-fill all its OCR quads (erases original text cleanly).
2. Detect the original font size from the OCR bounding boxes.
3. Dynamically wrap and scale down the translated text if it exceeds the bubble dimensions.
4. Render the translated text centered inside the bubble bounding box.
5. Uses uniform line heights (ascent + descent) to prevent Catalan accent collisions (È, À).
6. Adds a dynamic white stroke to the text to cover any residual original characters.
"""
import json
import re
import textwrap
import cv2
import numpy as np
from PIL import Image, ImageDraw, ImageFont
from typing import Dict, List, Tuple, Optional, Set, Any
# ============================================================
# CONFIG — edit these paths to match your setup
# ============================================================
IMAGE_PATH = "004.png"
BUBBLES_PATH = "bubbles_004.json"
TRANSLATIONS_PATH = "output_004.txt"
OUTPUT_PATH = "translated_page_004.png"
# ─────────────────────────────────────────────
# CONFIG
# ─────────────────────────────────────────────
DEFAULT_FONT_CANDIDATES = [
"fonts/ComicRelief-Regular.ttf",
"fonts/ComicNeue-Regular.ttf",
# Font candidates — Prioritizes Laffayette for Catalan, with safe fallbacks
FONT_CANDIDATES = [
"fonts/animeace2_reg.ttf",
"fonts/ComicNeue-Bold.ttf",
]
DEFAULT_FONT_COLOR = (0, 0, 0)
DEFAULT_STROKE_COLOR = (255, 255, 255)
MAX_FONT_SIZE = 20
MIN_FONT_SIZE = 6
DEFAULT_FONT_SIZE = 18
MIN_FONT_SIZE = 8
QUAD_PAD = 4 # extra pixels added around each quad before white-fill
# Guarantee full wipe of yellow squares
YELLOW_BOX_PAD_X = 1
YELLOW_BOX_PAD_Y = 1
YELLOW_UNION_PAD_X = 4
YELLOW_UNION_PAD_Y = 4
# ============================================================
# SKIP LIST
# ============================================================
SKIP_BUBBLE_IDS: Set[int] = {
# Add any bubble IDs you do NOT want rendered here.
}
# Optional extra cleanup expansion
ENABLE_EXTRA_CLEAN = True
EXTRA_DILATE_ITERS = 1
EXTRA_CLOSE_ITERS = 1
# ============================================================
# FONT LOADER
# ============================================================
def load_font(path: str, size: int) -> Optional[ImageFont.FreeTypeFont]:
"""Try every face index in a .ttc collection. Validate with getbbox."""
indices = range(4) if path.lower().endswith(".ttc") else [0]
for idx in indices:
try:
font = ImageFont.truetype(path, size, index=idx)
font.getbbox("A") # raises if face metrics are broken
return font
except Exception:
continue
return None
# Bubble detection (for optional extra mask / border preservation)
FLOOD_TOL = 30
def resolve_font_path() -> str:
"""Return the path for the first working candidate."""
for candidate in FONT_CANDIDATES:
if load_font(candidate, DEFAULT_FONT_SIZE) is not None:
print(f" ✅ Font: {candidate}")
return candidate
print(" ⚠️ No TrueType font found — using Pillow bitmap fallback")
return ""
# Border restoration: keep very conservative
ENABLE_EDGE_RESTORE = True
EDGE_RESTORE_DILATE = 1
# Text layout inside yellow-union
TEXT_INSET = 0.92
# ─────────────────────────────────────────────
# PARSERS
# ─────────────────────────────────────────────
def parse_translations(translations_file):
# ============================================================
# PARSERS
# ============================================================
def parse_translations(filepath: str) -> Dict[int, str]:
"""
Reads output.txt and returns {bubble_id: translated_text}.
Lines look like: #2|1|vision-base|ORIGINAL|TRANSLATED|FLAGS
"""
translations = {}
originals = {}
flags_map = {}
with open(translations_file, "r", encoding="utf-8") as f:
with open(filepath, "r", encoding="utf-8") as f:
for line in f:
line = line.strip()
if not line.startswith("#"):
continue
parts = line.split("|")
if len(parts) < 5:
continue
try:
bubble_id = int(parts[0].lstrip("#"))
except Exception:
bid = int(parts[0].lstrip("#"))
translated = parts[4].strip()
if translated and translated != "-":
translations[bid] = translated
except ValueError:
continue
return translations
if len(parts) >= 5:
original = parts[2].strip()
translated = parts[3].strip()
flags = parts[4].strip()
elif len(parts) >= 4:
original = parts[2].strip()
translated = parts[3].strip()
flags = "-"
elif len(parts) >= 3:
original = ""
translated = parts[2].strip()
flags = "-"
else:
continue
def parse_bubbles(filepath: str):
"""Returns the full JSON data."""
with open(filepath, "r", encoding="utf-8") as f:
data = json.load(f)
return data
if translated.startswith("["):
continue
translations[bubble_id] = translated
originals[bubble_id] = original
flags_map[bubble_id] = flags
return translations, originals, flags_map
def parse_bubbles(bubbles_file):
with open(bubbles_file, "r", encoding="utf-8") as f:
raw = json.load(f)
return {int(k): v for k, v in raw.items()}
# ─────────────────────────────────────────────
# HELPERS
# ─────────────────────────────────────────────
def clamp(v, lo, hi):
return max(lo, min(hi, v))
def xywh_to_xyxy(box):
if not box:
return None
x = int(box.get("x", 0))
y = int(box.get("y", 0))
w = int(box.get("w", 0))
h = int(box.get("h", 0))
return (x, y, x + w, y + h)
def union_xyxy(boxes):
boxes = [b for b in boxes if b is not None]
if not boxes:
return None
x1 = min(b[0] for b in boxes)
y1 = min(b[1] for b in boxes)
x2 = max(b[2] for b in boxes)
y2 = max(b[3] for b in boxes)
if x2 <= x1 or y2 <= y1:
return None
return (x1, y1, x2, y2)
def bbox_from_mask(mask):
ys, xs = np.where(mask > 0)
if len(xs) == 0:
return None
return (int(xs.min()), int(ys.min()), int(xs.max()) + 1, int(ys.max()) + 1)
def normalize_text(s):
t = s.upper().strip()
t = re.sub(r"[^\w]+", "", t)
return t
def is_sfx_like(text):
t = normalize_text(text)
return bool(len(t) <= 8 and re.fullmatch(r"(SHA+|BIP+|BEEP+|HN+|AH+|OH+)", t))
# ─────────────────────────────────────────────
# FONT
# ─────────────────────────────────────────────
def load_font_from_candidates(candidates, size):
for path in candidates:
if path and os.path.exists(path):
try:
return ImageFont.truetype(path, size), path
except Exception:
continue
return ImageFont.load_default(), "PIL_DEFAULT"
def measure_text(draw, text, font):
bb = draw.textbbox((0, 0), text, font=font)
return bb[2] - bb[0], bb[3] - bb[1]
def wrap_text(draw, text, font, max_width):
words = text.split()
lines = []
cur = ""
for w in words:
test = (cur + " " + w).strip()
tw, _ = measure_text(draw, test, font)
if tw <= max_width or not cur:
cur = test
else:
lines.append(cur)
cur = w
if cur:
lines.append(cur)
if not lines:
return [""], 0, 0
widths = []
heights = []
for ln in lines:
lw, lh = measure_text(draw, ln, font)
widths.append(lw)
heights.append(lh)
gap = max(2, heights[0] // 5)
total_h = sum(heights) + gap * (len(lines) - 1)
return lines, total_h, max(widths)
def fit_font(draw, text, font_candidates, safe_w, safe_h):
for size in range(MAX_FONT_SIZE, MIN_FONT_SIZE - 1, -1):
font, _ = load_font_from_candidates(font_candidates, size)
lines, total_h, max_w = wrap_text(draw, text, font, safe_w)
if total_h <= safe_h and max_w <= safe_w:
return font, lines, total_h
font, _ = load_font_from_candidates(font_candidates, MIN_FONT_SIZE)
lines, total_h, _ = wrap_text(draw, text, font, safe_w)
return font, lines, total_h
def draw_text_with_stroke(draw, pos, text, font, fill, stroke_fill):
x, y = pos
_, h = measure_text(draw, text, font)
sw = 2 if h <= 11 else 1
for dx in range(-sw, sw + 1):
for dy in range(-sw, sw + 1):
if dx == 0 and dy == 0:
continue
draw.text((x + dx, y + dy), text, font=font, fill=stroke_fill)
draw.text((x, y), text, font=font, fill=fill)
# ─────────────────────────────────────────────
# MASK BUILDERS
# ─────────────────────────────────────────────
def build_yellow_mask(bubble_data, img_h, img_w):
# ============================================================
# ERASE — white-fill every OCR quad (with small padding)
# ============================================================
def erase_quads(
image_bgr,
bubbles_data: Dict[str, dict],
translations: Dict[int, str],
skip_ids: Set[int],
pad: int = QUAD_PAD
):
"""
HARD GUARANTEE:
Returned mask always covers all yellow squares (line_bboxes).
White-fills OCR quads ONLY for bubbles that:
- have a translation in output.txt AND
- are NOT in SKIP_BUBBLE_IDS
"""
mask = np.zeros((img_h, img_w), dtype=np.uint8)
ih, iw = image_bgr.shape[:2]
result = image_bgr.copy()
# Preferred: exact line boxes
line_boxes = bubble_data.get("line_bboxes", [])
for lb in line_boxes:
b = xywh_to_xyxy(lb)
if not b:
erased_count = 0
skipped_count = 0
for bid_str, val in bubbles_data.items():
bid = int(bid_str)
quads = val.get("quads", [])
if bid in skip_ids or bid not in translations:
skipped_count += 1
continue
x1, y1, x2, y2 = b
x1 -= YELLOW_BOX_PAD_X
y1 -= YELLOW_BOX_PAD_Y
x2 += YELLOW_BOX_PAD_X
y2 += YELLOW_BOX_PAD_Y
x1 = clamp(x1, 0, img_w - 1)
y1 = clamp(y1, 0, img_h - 1)
x2 = clamp(x2, 1, img_w)
y2 = clamp(y2, 1, img_h)
if x2 > x1 and y2 > y1:
cv2.rectangle(mask, (x1, y1), (x2, y2), 255, -1)
# If no line boxes available, use line_union fallback
if np.count_nonzero(mask) == 0:
ub = xywh_to_xyxy(bubble_data.get("line_union_bbox"))
if ub:
x1, y1, x2, y2 = ub
x1 -= YELLOW_UNION_PAD_X
y1 -= YELLOW_UNION_PAD_Y
x2 += YELLOW_UNION_PAD_X
y2 += YELLOW_UNION_PAD_Y
x1 = clamp(x1, 0, img_w - 1)
y1 = clamp(y1, 0, img_h - 1)
x2 = clamp(x2, 1, img_w)
y2 = clamp(y2, 1, img_h)
if x2 > x1 and y2 > y1:
cv2.rectangle(mask, (x1, y1), (x2, y2), 255, -1)
for quad in quads:
pts = np.array(quad, dtype=np.int32)
cv2.fillPoly(result, [pts], (255, 255, 255))
# Last fallback: text_bbox
if np.count_nonzero(mask) == 0:
tb = xywh_to_xyxy(bubble_data.get("text_bbox"))
if tb:
x1, y1, x2, y2 = tb
x1 -= YELLOW_UNION_PAD_X
y1 -= YELLOW_UNION_PAD_Y
x2 += YELLOW_UNION_PAD_X
y2 += YELLOW_UNION_PAD_Y
x1 = clamp(x1, 0, img_w - 1)
y1 = clamp(y1, 0, img_h - 1)
x2 = clamp(x2, 1, img_w)
y2 = clamp(y2, 1, img_h)
if x2 > x1 and y2 > y1:
cv2.rectangle(mask, (x1, y1), (x2, y2), 255, -1)
xs = [p[0] for p in quad]; ys = [p[1] for p in quad]
x1 = max(0, min(xs) - pad)
y1 = max(0, min(ys) - pad)
x2 = min(iw - 1, max(xs) + pad)
y2 = min(ih - 1, max(ys) + pad)
cv2.rectangle(result, (x1, y1), (x2, y2), (255, 255, 255), -1)
return mask
erased_count += 1
print(f" Erased : {erased_count} bubbles")
print(f" Ignored: {skipped_count} bubbles (no translation or in skip list)")
return result
def bubble_interior_mask(img_bgr, bubble_data):
# ============================================================
# DYNAMIC TEXT FITTING
# ============================================================
def get_original_font_size(bubble_data: dict, fallback_size: int = DEFAULT_FONT_SIZE) -> int:
"""Calculates the original font size based on the OCR bounding boxes."""
line_bboxes = bubble_data.get("line_bboxes", [])
if not line_bboxes:
return fallback_size
heights = [box["h"] for box in line_bboxes]
median_h = int(np.median(heights))
estimated_size = int(median_h * 0.85)
return max(MIN_FONT_SIZE, min(estimated_size, 60))
def fit_text_dynamically(
text: str,
font_path: str,
max_w: int,
max_h: int,
target_font_size: int
) -> Tuple[List[str], Any, int, int]:
"""
Optional helper to expand clean region safely; never used to shrink yellow coverage.
Wraps text and scales down font size if it exceeds the bubble dimensions.
Returns: (wrapped_lines, font_object, line_spacing, final_font_size)
"""
h, w = img_bgr.shape[:2]
font_size = target_font_size
panel = xywh_to_xyxy(bubble_data.get("panel_bbox"))
if panel is None:
panel = (0, 0, w, h)
px1, py1, px2, py2 = panel
if not font_path:
font = ImageFont.load_default()
char_w = 6
chars_per_line = max(1, int(max_w / char_w))
wrapped_lines = textwrap.wrap(text, width=chars_per_line)
return wrapped_lines, font, 4, 10
seed = bubble_data.get("seed_point", {})
sx = int(seed.get("x", bubble_data.get("x", 0) + bubble_data.get("w", 1) // 2))
sy = int(seed.get("y", bubble_data.get("y", 0) + bubble_data.get("h", 1) // 2))
sx = clamp(sx, 1, w - 2)
sy = clamp(sy, 1, h - 2)
while font_size >= MIN_FONT_SIZE:
font = load_font(font_path, font_size)
if font is None:
font = ImageFont.load_default()
return [text], font, 4, 10
gray = cv2.cvtColor(img_bgr, cv2.COLOR_BGR2GRAY)
_, binary = cv2.threshold(gray, 200, 255, cv2.THRESH_BINARY)
char_bbox = font.getbbox("A")
char_w = (char_bbox[2] - char_bbox[0]) or 10
chars_per_line = max(1, int((max_w * 0.95) / char_w))
panel_bin = np.zeros_like(binary)
panel_bin[py1:py2, px1:px2] = binary[py1:py2, px1:px2]
wrapped_lines = textwrap.wrap(text, width=chars_per_line)
# if seed on dark pixel, search nearby white
if gray[sy, sx] < 150:
found = False
search_r = max(2, min(bubble_data.get("w", 20), bubble_data.get("h", 20)) // 3)
for r in range(1, search_r + 1):
for dy in range(-r, r + 1):
for dx in range(-r, r + 1):
nx, ny = sx + dx, sy + dy
if px1 <= nx < px2 and py1 <= ny < py2 and gray[ny, nx] >= 200:
sx, sy = nx, ny
found = True
break
if found:
break
if found:
break
# Use uniform font metrics for height to protect accents like È
line_spacing = max(2, int(font_size * 0.15))
if hasattr(font, 'getmetrics'):
ascent, descent = font.getmetrics()
line_h = ascent + descent
else:
line_h = font_size
if not found:
m = np.zeros((h, w), dtype=np.uint8)
bx = bubble_data.get("x", 0)
by = bubble_data.get("y", 0)
bw = bubble_data.get("w", 20)
bh = bubble_data.get("h", 20)
cv2.ellipse(m, (bx + bw // 2, by + bh // 2), (max(4, bw // 2), max(4, bh // 2)), 0, 0, 360, 255, -1)
return m
total_h = (line_h * len(wrapped_lines)) + (line_spacing * max(0, len(wrapped_lines) - 1))
ff_mask = np.zeros((h + 2, w + 2), dtype=np.uint8)
flood = panel_bin.copy()
cv2.floodFill(
flood, ff_mask, (sx, sy), 255,
loDiff=FLOOD_TOL, upDiff=FLOOD_TOL,
flags=cv2.FLOODFILL_FIXED_RANGE
max_line_w = 0
for line in wrapped_lines:
bbox = font.getbbox(line)
lw = bbox[2] - bbox[0]
max_line_w = max(max_line_w, lw)
if max_line_w <= max_w and total_h <= max_h:
return wrapped_lines, font, line_spacing, font_size
font_size -= 2
font = load_font(font_path, MIN_FONT_SIZE) or ImageFont.load_default()
char_bbox = font.getbbox("A") if hasattr(font, 'getbbox') else (0,0,6,10)
char_w = (char_bbox[2] - char_bbox[0]) or 6
chars_per_line = max(1, int(max_w / char_w))
wrapped_lines = textwrap.wrap(text, width=chars_per_line)
return wrapped_lines, font, max(2, int(MIN_FONT_SIZE * 0.15)), MIN_FONT_SIZE
# ============================================================
# RENDER
# ============================================================
def render_text(
image_bgr,
bubbles_data: Dict[str, dict],
translations: Dict[int, str],
font_path: str,
skip_ids: Set[int]
):
"""
Draws the translated text centered in the line_union_bbox of each bubble.
Adds a dynamic white stroke (outline) to cover any residual original characters.
"""
image_rgb = cv2.cvtColor(image_bgr, cv2.COLOR_BGR2RGB)
pil_img = Image.fromarray(image_rgb)
draw = ImageDraw.Draw(pil_img)
rendered_count = 0
for bid_str, val in bubbles_data.items():
bid = int(bid_str)
if bid in skip_ids or bid not in translations:
continue
text = translations[bid]
union_box = val.get("line_union_bbox")
if not union_box:
union_box = val.get("text_bbox")
if not union_box:
continue
bx, by, bw, bh = union_box["x"], union_box["y"], union_box["w"], union_box["h"]
pad_x = int(bw * 0.1)
pad_y = int(bh * 0.1)
bx -= pad_x // 2
by -= pad_y // 2
bw += pad_x
bh += pad_y
target_size = get_original_font_size(val)
wrapped_lines, font, line_spacing, final_size = fit_text_dynamically(text, font_path, bw, bh, target_size)
# Use uniform typographic line height for rendering to protect accents
if hasattr(font, 'getmetrics'):
ascent, descent = font.getmetrics()
line_h = ascent + descent
else:
line_h = final_size
total_text_height = (line_h * len(wrapped_lines)) + (line_spacing * max(0, len(wrapped_lines) - 1))
current_y = by + (bh - total_text_height) // 2
# Dynamic outline thickness based on the final scaled font size
outline_thickness = max(2, int(final_size * 0.10))
for i, line in enumerate(wrapped_lines):
if hasattr(font, 'getbbox'):
bbox = font.getbbox(line)
lw = bbox[2] - bbox[0]
else:
lw = len(line) * 6
current_x = bx + (bw - lw) // 2
# Draw text with white stroke for artifact coverage
draw.text(
(current_x, current_y),
line,
fill=(0, 0, 0),
font=font,
stroke_width=outline_thickness,
stroke_fill=(255, 255, 255)
)
# Advance Y by the uniform line height + spacing
current_y += line_h + line_spacing
rendered_count += 1
print(f" Rendered: {rendered_count} bubbles (with uniform line spacing & outlines)")
return cv2.cvtColor(np.array(pil_img), cv2.COLOR_RGB2BGR)
# ============================================================
# MAIN
# ============================================================
def main():
print(f"Loading image: {IMAGE_PATH}")
image_bgr = cv2.imread(IMAGE_PATH)
if image_bgr is None:
print(f"❌ Error: Could not load {IMAGE_PATH}")
return
print(f"Loading translations: {TRANSLATIONS_PATH}")
translations = parse_translations(TRANSLATIONS_PATH)
print(f"Loading bubble data: {BUBBLES_PATH}")
bubbles_data = parse_bubbles(BUBBLES_PATH)
print("Resolving font...")
font_path = resolve_font_path()
print("\n--- Step 1: Erasing original text ---")
erased_bgr = erase_quads(
image_bgr=image_bgr,
bubbles_data=bubbles_data,
translations=translations,
skip_ids=SKIP_BUBBLE_IDS,
pad=QUAD_PAD
)
m = (ff_mask[1:-1, 1:-1] * 255).astype(np.uint8)
m = cv2.morphologyEx(m, cv2.MORPH_CLOSE, np.ones((3, 3), np.uint8), iterations=1)
return m
def build_clean_mask(img_bgr, bubble_data):
"""
FINAL RULE:
clean_mask MUST cover yellow_mask completely.
"""
h, w = img_bgr.shape[:2]
yellow = build_yellow_mask(bubble_data, h, w)
# start with guaranteed yellow
clean = yellow.copy()
if ENABLE_EXTRA_CLEAN:
bubble_m = bubble_interior_mask(img_bgr, bubble_data)
extra = cv2.dilate(yellow, np.ones((3, 3), np.uint8), iterations=EXTRA_DILATE_ITERS)
extra = cv2.morphologyEx(extra, cv2.MORPH_CLOSE, np.ones((3, 3), np.uint8), iterations=EXTRA_CLOSE_ITERS)
extra = cv2.bitwise_and(extra, bubble_m)
# IMPORTANT: union with yellow (never subtract yellow)
clean = cv2.bitwise_or(yellow, extra)
# final guarantee (defensive)
clean = cv2.bitwise_or(clean, yellow)
return clean, yellow
# ─────────────────────────────────────────────
# DRAW BUBBLE
# ─────────────────────────────────────────────
def draw_bubble(
pil_img,
img_bgr_ref,
bubble_data,
original_text,
translated_text,
font_candidates,
font_color,
stroke_color
):
if original_text and translated_text:
if normalize_text(original_text) == normalize_text(translated_text) and is_sfx_like(original_text):
return "skip_sfx"
rgb = np.array(pil_img)
h, w = rgb.shape[:2]
clean_mask, yellow_mask = build_clean_mask(img_bgr_ref, bubble_data)
if np.count_nonzero(clean_mask) == 0:
return "skip_no_area"
# 1) FORCE white fill on clean mask (includes full yellow by guarantee)
rgb[clean_mask == 255] = [255, 255, 255]
# 2) Optional edge restore, but NEVER overwrite yellow coverage
if ENABLE_EDGE_RESTORE:
bubble_m = bubble_interior_mask(img_bgr_ref, bubble_data)
edge = cv2.morphologyEx(bubble_m, cv2.MORPH_GRADIENT, np.ones((3, 3), np.uint8))
edge = cv2.dilate(edge, np.ones((3, 3), np.uint8), iterations=EDGE_RESTORE_DILATE)
# Don't restore where yellow exists (hard guarantee)
edge[yellow_mask == 255] = 0
orig_rgb = cv2.cvtColor(img_bgr_ref, cv2.COLOR_BGR2RGB)
rgb[edge == 255] = orig_rgb[edge == 255]
pil_img.paste(Image.fromarray(rgb))
if not translated_text:
return "clean_only"
# text region based on yellow area (exact requirement)
text_bbox = bbox_from_mask(yellow_mask)
if text_bbox is None:
text_bbox = bbox_from_mask(clean_mask)
if text_bbox is None:
return "skip_no_area"
x1, y1, x2, y2 = text_bbox
draw = ImageDraw.Draw(pil_img)
text_cx = int((x1 + x2) / 2)
text_cy = int((y1 + y2) / 2)
safe_w = max(16, int((x2 - x1) * TEXT_INSET))
safe_h = max(16, int((y2 - y1) * TEXT_INSET))
font, lines, total_h = fit_font(draw, translated_text, font_candidates, safe_w, safe_h)
y_cursor = int(round(text_cy - total_h / 2.0))
for line in lines:
lw, lh = measure_text(draw, line, font)
x = text_cx - lw // 2
draw_text_with_stroke(draw, (x, y_cursor), line, font, fill=font_color, stroke_fill=stroke_color)
y_cursor += lh + max(lh // 5, 2)
return "rendered"
# ─────────────────────────────────────────────
# MAIN
# ─────────────────────────────────────────────
def render_translations(
input_image,
output_image,
translations_file,
bubbles_file,
font_candidates=DEFAULT_FONT_CANDIDATES,
font_color=DEFAULT_FONT_COLOR,
stroke_color=DEFAULT_STROKE_COLOR
):
img_bgr = cv2.imread(input_image)
if img_bgr is None:
raise FileNotFoundError(f"Cannot load image: {input_image}")
img_pil = Image.fromarray(cv2.cvtColor(img_bgr, cv2.COLOR_BGR2RGB))
translations, originals, flags_map = parse_translations(translations_file)
bubbles = parse_bubbles(bubbles_file)
rendered, skipped = 0, 0
def sort_key(item):
bid, _ = item
b = bubbles.get(bid, {})
return int(b.get("reading_order", bid))
for bubble_id, translated_text in sorted(translations.items(), key=sort_key):
if bubble_id not in bubbles:
skipped += 1
continue
bubble_data = bubbles[bubble_id]
original_text = originals.get(bubble_id, "")
status = draw_bubble(
pil_img=img_pil,
img_bgr_ref=img_bgr,
bubble_data=bubble_data,
original_text=original_text,
translated_text=translated_text,
font_candidates=font_candidates,
font_color=font_color,
stroke_color=stroke_color
)
if status.startswith("skip"):
skipped += 1
else:
rendered += 1
out_bgr = cv2.cvtColor(np.array(img_pil), cv2.COLOR_RGB2BGR)
cv2.imwrite(output_image, out_bgr)
print(f"✅ Done — {rendered} rendered, {skipped} skipped.")
print(f"📄 Output → {output_image}")
print("Guarantee: full yellow-square area is always white-cleaned before drawing text.")
print("\n--- Step 2: Rendering translated text ---")
final_bgr = render_text(
image_bgr=erased_bgr,
bubbles_data=bubbles_data,
translations=translations,
font_path=font_path,
skip_ids=SKIP_BUBBLE_IDS
)
print(f"\nSaving final image to: {OUTPUT_PATH}")
cv2.imwrite(OUTPUT_PATH, final_bgr)
print("✅ Done!")
if __name__ == "__main__":
render_translations(
input_image="001-page.png",
output_image="page_translated.png",
translations_file="output.txt",
bubbles_file="bubbles.json",
font_candidates=DEFAULT_FONT_CANDIDATES,
font_color=DEFAULT_FONT_COLOR,
stroke_color=DEFAULT_STROKE_COLOR
)
main()

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,37 @@
#!/usr/bin/env python3
import sys
sys.path.insert(0, '/Users/guillemhernandezsola/code/manga-translator')
import cv2
import json
import numpy as np
# Import functions from manga-translator.py
import importlib.util
spec = importlib.util.spec_from_file_location("manga_translator", "/Users/guillemhernandezsola/code/manga-translator/manga-translator.py")
mt = importlib.util.module_from_spec(spec)
spec.loader.exec_module(mt)
image_path = '004.png'
detector = mt.MacVisionDetector(source_lang='en')
raw = detector.read(image_path)
# Load current bubbles to see what box 5 contains
with open('bubbles.json') as f:
bubbles_data = json.load(f)
box5_data = bubbles_data['5']
box5_bounds = (box5_data['x'], box5_data['y'], box5_data['x'] + box5_data['w'], box5_data['y'] + box5_data['h'])
print(f'Box 5 bounds (xyxy): {box5_bounds}')
print()
# Print all detections sorted by position
print('All raw detections:')
for i, (bbox, text, conf) in enumerate(sorted(raw, key=lambda x: (mt.quad_bbox(x[0])[1], mt.quad_bbox(x[0])[0]))):
b = mt.quad_bbox(bbox)
t_norm = mt.normalize_text(text)
print(f'{i:2d}. [{b[0]:3d},{b[1]:3d} -> {b[2]:3d},{b[3]:3d}] conf={conf:.2f} text="{t_norm}"')
# Check if this overlaps with box 5
b5_x1, b5_y1, b5_x2, b5_y2 = box5_bounds
if not (b[2] < b5_x1 or b[0] > b5_x2 or b[3] < b5_y1 or b[1] > b5_y2):
print(f' ^ OVERLAPS with Box 5!')

View File

@@ -0,0 +1,95 @@
#!/usr/bin/env python3
import sys
sys.path.insert(0, '/Users/guillemhernandezsola/code/manga-translator')
import cv2
import json
import numpy as np
import importlib.util
spec = importlib.util.spec_from_file_location("manga_translator", "/Users/guillemhernandezsola/code/manga-translator/manga-translator.py")
mt = importlib.util.module_from_spec(spec)
spec.loader.exec_module(mt)
image_path = '004.png'
detector = mt.MacVisionDetector(source_lang='en')
raw = detector.read(image_path)
image = cv2.imread(image_path)
# Filter as the pipeline does
filtered = []
for bbox, text, conf in raw:
t = mt.normalize_text(text)
qb = mt.quad_bbox(bbox)
if conf < 0.12:
continue
if len(t) < 1:
continue
if mt.is_noise_text(t):
continue
if mt.is_sound_effect(t):
continue
if mt.is_title_text(t):
continue
filtered.append((bbox, t, conf))
# Run grouping
bubbles, bubble_boxes, bubble_quads, bubble_indices = mt.group_tokens(
filtered, image.shape, gap_px=18, bbox_padding=3
)
print("=== BUBBLE 7 & 8 ANALYSIS ===\n")
print("Current Bubble 7 (right side content):")
for bid in [7]:
if bid in bubble_indices:
box = bubble_boxes[bid]
print(f" Box: {box}")
print(f" Indices: {bubble_indices[bid]}")
indices = bubble_indices[bid]
boxes = [mt.quad_bbox(filtered[i][0]) for i in indices]
min_x = min(b[0] for b in boxes)
max_x = max(b[2] for b in boxes)
print(f" X range: {min_x} - {max_x}")
for idx in indices:
b = mt.quad_bbox(filtered[idx][0])
print(f" {idx}: x=[{b[0]:3d},{b[2]:3d}] y=[{b[1]:3d},{b[3]:3d}] = {filtered[idx][1]}")
print("\nCurrent Bubble 8 (left side content):")
for bid in [8]:
if bid in bubble_indices:
box = bubble_boxes[bid]
print(f" Box: {box}")
print(f" Indices: {bubble_indices[bid]}")
indices = bubble_indices[bid]
boxes = [mt.quad_bbox(filtered[i][0]) for i in indices]
min_x = min(b[0] for b in boxes)
max_x = max(b[2] for b in boxes)
print(f" X range: {min_x} - {max_x}")
for idx in indices:
b = mt.quad_bbox(filtered[idx][0])
print(f" {idx}: x=[{b[0]:3d},{b[2]:3d}] y=[{b[1]:3d},{b[3]:3d}] = {filtered[idx][1]}")
# Check the horizontal gap between them
print("\n=== GAP ANALYSIS ===")
if 7 in bubble_indices and 8 in bubble_indices:
boxes7 = [mt.quad_bbox(filtered[i][0]) for i in bubble_indices[7]]
boxes8 = [mt.quad_bbox(filtered[i][0]) for i in bubble_indices[8]]
max_x7 = max(b[2] for b in boxes7)
min_x8 = min(b[0] for b in boxes8)
print(f"Bubble 7 max X: {max_x7}")
print(f"Bubble 8 min X: {min_x8}")
print(f"Horizontal gap: {min_x8 - max_x7}")
# Check Y overlap
min_y7 = min(b[1] for b in boxes7)
max_y7 = max(b[3] for b in boxes7)
min_y8 = min(b[1] for b in boxes8)
max_y8 = max(b[3] for b in boxes8)
print(f"\nBubble 7 Y range: {min_y7} - {max_y7}")
print(f"Bubble 8 Y range: {min_y8} - {max_y8}")
print(f"Y overlap: {max(0, min(max_y7, max_y8) - max(min_y7, min_y8))} pixels")

View File

@@ -0,0 +1,55 @@
#!/usr/bin/env python3
import sys
sys.path.insert(0, '/Users/guillemhernandezsola/code/manga-translator')
import cv2
import json
import numpy as np
import importlib.util
spec = importlib.util.spec_from_file_location("manga_translator", "/Users/guillemhernandezsola/code/manga-translator/manga-translator.py")
mt = importlib.util.module_from_spec(spec)
spec.loader.exec_module(mt)
image_path = '004.png'
detector = mt.MacVisionDetector(source_lang='en')
raw = detector.read(image_path)
image = cv2.imread(image_path)
# Filter as the pipeline does
filtered = []
for bbox, text, conf in raw:
t = mt.normalize_text(text)
qb = mt.quad_bbox(bbox)
if conf < 0.12:
continue
if len(t) < 1:
continue
if mt.is_noise_text(t):
continue
if mt.is_sound_effect(t):
continue
if mt.is_title_text(t):
continue
filtered.append((bbox, t, conf))
print(f"Filtered {len(filtered)} detections")
# Now run grouping
bubbles, bubble_boxes, bubble_quads, bubble_indices = mt.group_tokens(
filtered, image.shape, gap_px=18, bbox_padding=3
)
# Find which bubble contains box 5
box5_region = (378, 570, 536, 753)
print("\n=== BUBBLES ===")
for bid, box in bubble_boxes.items():
print(f"Bubble {bid}: {box}")
print(f" Indices: {bubble_indices[bid]}")
print(f" Detections:")
for idx in bubble_indices[bid]:
b = mt.quad_bbox(filtered[idx][0])
print(f" {idx}: [{b[0]:3d},{b[1]:3d} -> {b[2]:3d},{b[3]:3d}] = {filtered[idx][1]}")

77
older-code/check_box7.py Normal file
View File

@@ -0,0 +1,77 @@
#!/usr/bin/env python3
import sys
sys.path.insert(0, '/Users/guillemhernandezsola/code/manga-translator')
import cv2
import json
import numpy as np
import importlib.util
spec = importlib.util.spec_from_file_location("manga_translator", "/Users/guillemhernandezsola/code/manga-translator/manga-translator.py")
mt = importlib.util.module_from_spec(spec)
spec.loader.exec_module(mt)
image_path = '004.png'
detector = mt.MacVisionDetector(source_lang='en')
raw = detector.read(image_path)
image = cv2.imread(image_path)
# Filter as the pipeline does
filtered = []
for bbox, text, conf in raw:
t = mt.normalize_text(text)
qb = mt.quad_bbox(bbox)
if conf < 0.12:
continue
if len(t) < 1:
continue
if mt.is_noise_text(t):
continue
if mt.is_sound_effect(t):
continue
if mt.is_title_text(t):
continue
filtered.append((bbox, t, conf))
# Now run grouping
bubbles, bubble_boxes, bubble_quads, bubble_indices = mt.group_tokens(
filtered, image.shape, gap_px=18, bbox_padding=3
)
# Check current bubbles.json for reference
with open('bubbles.json') as f:
old_bubbles = json.load(f)
print("=== BOX 5 ===")
print(f"Old bounds (from bubbles.json): x={old_bubbles['5']['x']}, y={old_bubbles['5']['y']}, w={old_bubbles['5']['w']}, h={old_bubbles['5']['h']}")
print(f" (xyxy): ({old_bubbles['5']['x']}, {old_bubbles['5']['y']}, {old_bubbles['5']['x'] + old_bubbles['5']['w']}, {old_bubbles['5']['y'] + old_bubbles['5']['h']})")
# Find bubble at that location in current grouping
for bid, box in bubble_boxes.items():
if box[0] == 371 and box[1] == 563: # New box 5 location
print(f"Current bubble {bid}: {box}")
print(f" Detections: {bubble_indices[bid]}")
for idx in bubble_indices[bid]:
b = mt.quad_bbox(filtered[idx][0])
print(f" {idx}: [{b[0]:3d},{b[1]:3d} -> {b[2]:3d},{b[3]:3d}] = {filtered[idx][1]}")
print("\n=== BOX 7 ===")
print(f"Old bounds (from bubbles.json): x={old_bubbles['7']['x']}, y={old_bubbles['7']['y']}, w={old_bubbles['7']['w']}, h={old_bubbles['7']['h']}")
print(f" (xyxy): ({old_bubbles['7']['x']}, {old_bubbles['7']['y']}, {old_bubbles['7']['x'] + old_bubbles['7']['w']}, {old_bubbles['7']['y'] + old_bubbles['7']['h']})")
# Find corresponding bubble
for bid, box in bubble_boxes.items():
x1, y1, x2, y2 = box
# Check if this overlaps with old box 7
old_x1, old_y1 = old_bubbles['7']['x'], old_bubbles['7']['y']
old_x2 = old_x1 + old_bubbles['7']['w']
old_y2 = old_y1 + old_bubbles['7']['h']
if not (x2 < old_x1 or x1 > old_x2 or y2 < old_y1 or y1 > old_y2):
print(f"Current bubble {bid}: {box}")
print(f" Detections: {bubble_indices[bid]}")
for idx in bubble_indices[bid]:
b = mt.quad_bbox(filtered[idx][0])
print(f" {idx}: [{b[0]:3d},{b[1]:3d} -> {b[2]:3d},{b[3]:3d}] = {filtered[idx][1]}")

View File

@@ -0,0 +1,68 @@
#!/usr/bin/env python3
import sys
sys.path.insert(0, '/Users/guillemhernandezsola/code/manga-translator')
import cv2
import numpy as np
import importlib.util
spec = importlib.util.spec_from_file_location("manga_translator", "/Users/guillemhernandezsola/code/manga-translator/manga-translator.py")
mt = importlib.util.module_from_spec(spec)
spec.loader.exec_module(mt)
image_path = '004.png'
detector = mt.MacVisionDetector(source_lang='en')
raw = detector.read(image_path)
image = cv2.imread(image_path)
# Filter
filtered = []
for bbox, text, conf in raw:
t = mt.normalize_text(text)
if conf < 0.12 or len(t) < 1 or mt.is_noise_text(t) or mt.is_sound_effect(t) or mt.is_title_text(t):
continue
filtered.append((bbox, t, conf))
# Get the indices we're interested in (left and right bubbles)
left_indices = [41, 42, 43, 44, 45, 46] # LET, GO, OFF, ME, AL-, REA-
right_indices = [47, 48, 49, 50, 51, 52, 53, 54] # DON'T, WORRY!, HARUKO, ...
print("=== CHECKING GROUPING CONDITIONS ===\n")
# Check if they would be united in group_tokens
boxes_left = [mt.quad_bbox(filtered[i][0]) for i in left_indices]
boxes_right = [mt.quad_bbox(filtered[i][0]) for i in right_indices]
# Check overlap_or_near
print("Checking overlap_or_near with gap=18:")
for li, bi in enumerate(left_indices):
for ri, bj in enumerate(right_indices):
b_left = boxes_left[li]
b_right = boxes_right[ri]
gap_x = max(0, max(b_left[0], b_right[0]) - min(b_left[2], b_right[2]))
gap_y = max(0, max(b_left[1], b_right[1]) - min(b_left[3], b_right[3]))
overlaps = gap_x <= 18 and gap_y <= 18
if overlaps:
print(f" {bi} and {bj} overlap/near: gap_x={gap_x}, gap_y={gap_y}")
# Check distance check
hs = [max(1.0, b[3] - b[1]) for b in [*boxes_left, *boxes_right]]
med_h = float(np.median(hs)) if hs else 12.0
dist_thresh = max(20.0, med_h * 2.2)
print(f"\nMedian height: {med_h}")
print(f"Distance threshold: {dist_thresh}")
print("\nChecking distance check:")
for li, bi in enumerate(left_indices[:1]): # Just check first from each
for ri, bj in enumerate(right_indices[:1]):
b_left = boxes_left[li]
b_right = boxes_right[ri]
cx_left = (b_left[0] + b_left[2]) / 2.0
cy_left = (b_left[1] + b_left[3]) / 2.0
cx_right = (b_right[0] + b_right[2]) / 2.0
cy_right = (b_right[1] + b_right[3]) / 2.0
d = ((cx_left - cx_right) ** 2 + (cy_left - cy_right) ** 2) ** 0.5
within_dist = d <= dist_thresh
within_y = abs(cy_left - cy_right) <= med_h * 3.0
print(f" {bi} to {bj}: distance={d:.1f}, within_dist={within_dist}, within_y_tol={within_y}")

View File

@@ -0,0 +1,107 @@
#!/usr/bin/env python3
# Debug script to see what bubbles are produced after splitting
import sys
sys.path.insert(0, '/Users/guillemhernandezsola/code/manga-translator')
import cv2
import json
import numpy as np
import importlib.util
spec = importlib.util.spec_from_file_location("manga_translator", "/Users/guillemhernandezsola/code/manga-translator/manga-translator.py")
mt = importlib.util.module_from_spec(spec)
spec.loader.exec_module(mt)
image_path = '004.png'
detector = mt.MacVisionDetector(source_lang='en')
raw = detector.read(image_path)
image = cv2.imread(image_path)
# Full filtering as pipeline does
filtered = []
skipped = 0
ih, iw = image.shape[:2]
for bbox, text, conf in raw:
t = mt.normalize_text(text)
qb = mt.quad_bbox(bbox)
if conf < 0.12:
skipped += 1
continue
if len(t) < 1:
skipped += 1
continue
if mt.is_noise_text(t):
skipped += 1
continue
if mt.is_sound_effect(t):
skipped += 1
continue
if mt.is_title_text(t):
skipped += 1
continue
if qb[1] < int(ih * mt.TOP_BAND_RATIO):
if conf < 0.70 and len(t) >= 5:
skipped += 1
continue
filtered.append((bbox, t, conf))
resolved_gap = mt.auto_gap(image_path)
bubbles, bubble_boxes, bubble_quads, bubble_indices = mt.group_tokens(
filtered, image.shape, gap_px=resolved_gap, bbox_padding=3
)
print("=== AFTER GROUPING ===")
print(f"Bubbles dict keys: {sorted(bubbles.keys())}")
for bid in [7, 8]:
if bid in bubbles:
print(f"\nBubble {bid}:")
print(f" Box: {bubble_boxes[bid]}")
print(f" Indices ({len(bubble_indices[bid])}): {bubble_indices[bid]}")
print(f" Quads ({len(bubble_quads[bid])})")
# Now simulate the split logic
new_bubbles, new_bubble_boxes, new_bubble_quads, new_bubble_indices = {}, {}, {}, {}
next_bid = max(bubbles.keys()) + 1 if bubbles else 1
splits_performed = []
for bid in list(bubbles.keys()):
box = bubble_boxes[bid]
bubble_split = None
# Try split
split_result = mt.split_panel_box(image, box, bubble_quads=bubble_quads[bid])
if split_result:
box_left, box_right, split_x = split_result
# ... split logic ...
bubble_split = "panel_split"
if bubble_split is None:
col_split = mt.split_bubble_if_multiple_columns(bubble_indices[bid], filtered, bid=bid)
if col_split:
bubble_split = "column_split"
if bubble_split:
splits_performed.append(f"Bubble {bid}: {bubble_split}")
# Don't actually split here, just mark it
else:
# No split
new_bubbles[bid] = bubbles[bid]
new_bubble_boxes[bid] = bubble_boxes[bid]
new_bubble_quads[bid] = bubble_quads[bid]
new_bubble_indices[bid] = bubble_indices[bid]
print("\n=== AFTER SPLIT LOGIC ===")
print(f"Splits detected: {len(splits_performed)}")
for s in splits_performed:
print(f" {s}")
print(f"\nBubbles dict keys: {sorted(new_bubbles.keys())}")
for bid in [7, 8]:
if bid in new_bubbles:
print(f"\nBubble {bid}:")
print(f" Box: {new_bubble_boxes[bid]}")
print(f" Indices ({len(new_bubble_indices[bid])}): {new_bubble_indices[bid][:3]}...")

View File

@@ -0,0 +1,119 @@
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
import re
from pathlib import Path
TARGET = Path("manga-translator.py")
def cut_after_first_entrypoint(text: str) -> str:
"""
Keep only first full __main__ block and remove duplicated tail if present.
"""
m = re.search(r'(?m)^if __name__ == "__main__":\s*$', text)
if not m:
return text
start = m.start()
# Keep entrypoint block plus indented lines after it
lines = text[start:].splitlines(True)
keep = []
keep.append(lines[0]) # if __name__...
i = 1
while i < len(lines):
ln = lines[i]
if ln.strip() == "":
keep.append(ln)
i += 1
continue
# if dedented back to col 0 => end of block
if not ln.startswith((" ", "\t")):
break
keep.append(ln)
i += 1
cleaned = text[:start] + "".join(keep)
return cleaned
def replace_bad_vars(text: str) -> str:
text = text.replace(
"merge_micro_boxes_relaxed(bubbles, bubble_boxes, bubble_quads, bubble_indices, ocr, image_bgr)",
"merge_micro_boxes_relaxed(bubbles, bubble_boxes, bubble_quads, bubble_indices, filtered, image)"
)
text = text.replace(
"reattach_orphan_short_tokens(bubbles, bubble_boxes, bubble_quads, bubble_indices, ocr)",
"reattach_orphan_short_tokens(bubbles, bubble_boxes, bubble_quads, bubble_indices, filtered)"
)
return text
def ensure_autofix_chain(text: str) -> str:
old = (
" # ── Auto-fix (split + merge) ──────────────────────────────────────────\n"
" if auto_fix_bubbles:\n"
" bubbles, bubble_boxes, bubble_quads, bubble_indices = merge_micro_boxes_relaxed(bubbles, bubble_boxes, bubble_quads, bubble_indices, filtered, image)\n"
)
new = (
" # ── Auto-fix (split + merge) ──────────────────────────────────────────\n"
" if auto_fix_bubbles:\n"
" bubbles, bubble_boxes, bubble_quads, bubble_indices = auto_fix_bubble_detection(\n"
" bubble_boxes, bubble_indices, bubble_quads, bubbles, filtered, image)\n"
" bubbles, bubble_boxes, bubble_quads, bubble_indices = merge_micro_boxes_relaxed(\n"
" bubbles, bubble_boxes, bubble_quads, bubble_indices, filtered, image)\n"
)
return text.replace(old, new)
def ensure_split_commit(text: str) -> str:
marker = " # ── Remove nested / duplicate boxes ──────────────────────────────────\n"
if marker not in text:
return text
if "bubbles = new_bubbles" in text:
return text
inject = (
" bubbles = new_bubbles\n"
" bubble_boxes = new_bubble_boxes\n"
" bubble_quads = new_bubble_quads\n"
" bubble_indices = new_bubble_indices\n\n"
)
return text.replace(marker, inject + marker)
def ensure_rescue_pipeline(text: str) -> str:
anchor = ' print(f"Kept: {len(filtered)} | Skipped: {skipped}")\n'
if anchor not in text:
return text
if "rescue_name_and_short_tokens(raw" in text:
return text
block = (
' print(f"Kept: {len(filtered)} | Skipped: {skipped}")\n'
' # Protect short dialogue tokens confidence\n'
' tmp = []\n'
' for bbox, t, conf in filtered:\n'
' tmp.append((bbox, t, maybe_conf_floor_for_protected(t, conf, floor=0.40)))\n'
' filtered = tmp\n'
' # Rescue names/short tokens dropped by strict filters\n'
' rescued = rescue_name_and_short_tokens(raw, min_conf=0.20)\n'
' filtered = merge_rescued_items(filtered, rescued, iou_threshold=0.55)\n'
)
return text.replace(anchor, block)
def main():
if not TARGET.exists():
raise FileNotFoundError(f"Not found: {TARGET}")
src = TARGET.read_text(encoding="utf-8")
out = src
out = cut_after_first_entrypoint(out)
out = replace_bad_vars(out)
out = ensure_autofix_chain(out)
out = ensure_split_commit(out)
out = ensure_rescue_pipeline(out)
TARGET.write_text(out, encoding="utf-8")
print("✅ Patched manga-translator.py")
if __name__ == "__main__":
main()

View File

@@ -0,0 +1,56 @@
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
Regenerate debug_clusters.png with the new split bubbles.json
"""
import json
import cv2
import numpy as np
def quad_bbox(quad):
"""Convert quad to bounding box"""
xs = [p[0] for p in quad]
ys = [p[1] for p in quad]
return (min(xs), min(ys), max(xs), max(ys))
def save_debug_clusters_from_json(
image_path="004.png",
bubbles_path="bubbles.json",
out_path="debug_clusters.png"
):
img = cv2.imread(image_path)
if img is None:
print(f"❌ Cannot load image: {image_path}")
return
# Load bubbles.json
with open(bubbles_path, "r", encoding="utf-8") as f:
bubbles_data = json.load(f)
# Draw all quad polygons in white (erasing original text)
for bid_str, bubble_info in bubbles_data.items():
for quad in bubble_info.get("quads", []):
pts = np.array(quad, dtype=np.int32)
cv2.fillPoly(img, [pts], (255, 255, 255))
cv2.polylines(img, [pts], True, (180, 180, 180), 1)
# Draw bounding boxes with labels
for bid_str, bubble_info in bubbles_data.items():
bid = int(bid_str)
x = bubble_info["x"]
y = bubble_info["y"]
w = bubble_info["w"]
h = bubble_info["h"]
x2 = x + w
y2 = y + h
cv2.rectangle(img, (x, y), (x2, y2), (0, 220, 0), 2)
cv2.putText(img, f"BOX#{bid}", (x + 2, max(15, y + 16)),
cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 220, 0), 2)
cv2.imwrite(out_path, img)
print(f"✅ Saved: {out_path}")
if __name__ == "__main__":
save_debug_clusters_from_json()

183
older-code/split_bubbles.py Normal file
View File

@@ -0,0 +1,183 @@
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
Helper script to split bubbles with multiple separate text regions.
Run this to manually split Box 2 and Box 7 from debug_clusters.png
"""
import json
import numpy as np
from typing import List, Tuple, Dict
def quad_bbox(quad):
"""Convert quad to bounding box"""
xs = [p[0] for p in quad]
ys = [p[1] for p in quad]
return (min(xs), min(ys), max(xs), max(ys))
def boxes_union_xyxy(boxes):
"""Union of multiple boxes"""
boxes = [b for b in boxes if b is not None]
if not boxes:
return None
return (
int(min(b[0] for b in boxes)),
int(min(b[1] for b in boxes)),
int(max(b[2] for b in boxes)),
int(max(b[3] for b in boxes)),
)
def xyxy_to_xywh(bbox):
"""Convert xyxy format to xywh"""
if bbox is None:
return None
x1, y1, x2, y2 = bbox
return {"x": int(x1), "y": int(y1), "w": int(x2 - x1), "h": int(y2 - y1)}
def bbox_area_xyxy(b):
"""Calculate area of a bounding box in xyxy format"""
if b is None:
return 0
x1, y1, x2, y2 = b
return (x2 - x1) * (y2 - y1)
def split_bubble_by_vertical_gap(bubble_id: int, bubble_data: Dict, filtered_indices_map: Dict):
"""
Attempt to split a bubble by detecting a significant vertical gap between columns of text.
Returns: (left_indices, right_indices, gap_size) or None if no split
"""
quad_bboxes = bubble_data['quad_bboxes']
quads = bubble_data['quads']
if len(quads) < 2:
return None
# Get x-coordinates with original indices
x_coords = []
for i, quad in enumerate(quads):
bbox = quad_bbox(quad)
x_center = (bbox[0] + bbox[2]) / 2.0
x_coords.append((i, x_center, bbox))
# Sort by x-coordinate
x_coords_sorted = sorted(x_coords, key=lambda t: t[1])
# Find the largest gap between consecutive x positions
max_gap = 0
split_pos = -1
for i in range(len(x_coords_sorted) - 1):
gap = x_coords_sorted[i + 1][1] - x_coords_sorted[i][1]
if gap > max_gap:
max_gap = gap
split_pos = i
# If gap is large enough, split
min_gap_threshold = 80 # pixels
if split_pos != -1 and max_gap > min_gap_threshold:
# Get ORIGINAL indices for left and right
left_indices = [x_coords_sorted[j][0] for j in range(split_pos + 1)]
right_indices = [x_coords_sorted[j][0] for j in range(split_pos + 1, len(x_coords_sorted))]
return (left_indices, right_indices, max_gap)
return None
def split_bubbles_in_json(input_file="bubbles.json", output_file="bubbles_split.json", bubble_ids_to_split=[2, 7]):
"""Split specified bubbles in the JSON file"""
with open(input_file, "r", encoding="utf-8") as f:
data = json.load(f)
new_data = {}
next_bid = max(int(k) for k in data.keys()) + 1
for bid_str, bubble_data in data.items():
bid = int(bid_str)
if bid not in bubble_ids_to_split:
# Keep original
new_data[bid_str] = bubble_data
continue
# Try to split
split_result = split_bubble_by_vertical_gap(bid, bubble_data, {})
if split_result:
left_indices, right_indices, gap_size = split_result
print(f"\n🔀 Splitting BOX#{bid} (gap={gap_size:.1f}px)")
print(f" Left indices: {left_indices}")
print(f" Right indices: {right_indices}")
# Create left bubble - keep the original bubble ID
left_quads = [bubble_data['quads'][i] for i in left_indices]
left_quad_bboxes = [bubble_data['quad_bboxes'][i] for i in left_indices]
left_bbox = boxes_union_xyxy([quad_bbox(q) for q in left_quads])
left_bbox_padded = (
max(0, left_bbox[0] - 3),
max(0, left_bbox[1] - 3),
left_bbox[2] + 3,
left_bbox[3] + 3
)
print(f" Left bbox: {left_bbox} -> padded: {left_bbox_padded}")
new_data[str(bid)] = {
"x": left_bbox_padded[0],
"y": left_bbox_padded[1],
"w": left_bbox_padded[2] - left_bbox_padded[0],
"h": left_bbox_padded[3] - left_bbox_padded[1],
"reading_order": bubble_data.get("reading_order", bid),
"quad_bboxes": left_quad_bboxes,
"quads": left_quads,
"text_bbox": xyxy_to_xywh(left_bbox),
"line_bboxes": [],
"line_union_bbox": xyxy_to_xywh(boxes_union_xyxy([quad_bbox(q) for q in left_quads])),
"line_union_area": int(bbox_area_xyxy(boxes_union_xyxy([quad_bbox(q) for q in left_quads]))),
}
# Create right bubble - with new ID
right_quads = [bubble_data['quads'][i] for i in right_indices]
right_quad_bboxes = [bubble_data['quad_bboxes'][i] for i in right_indices]
right_bbox = boxes_union_xyxy([quad_bbox(q) for q in right_quads])
right_bbox_padded = (
max(0, right_bbox[0] - 3),
max(0, right_bbox[1] - 3),
right_bbox[2] + 3,
right_bbox[3] + 3
)
print(f" Right bbox: {right_bbox} -> padded: {right_bbox_padded}")
new_data[str(next_bid)] = {
"x": right_bbox_padded[0],
"y": right_bbox_padded[1],
"w": right_bbox_padded[2] - right_bbox_padded[0],
"h": right_bbox_padded[3] - right_bbox_padded[1],
"reading_order": bubble_data.get("reading_order", next_bid),
"quad_bboxes": right_quad_bboxes,
"quads": right_quads,
"text_bbox": xyxy_to_xywh(right_bbox),
"line_bboxes": [],
"line_union_bbox": xyxy_to_xywh(boxes_union_xyxy([quad_bbox(q) for q in right_quads])),
"line_union_area": int(bbox_area_xyxy(boxes_union_xyxy([quad_bbox(q) for q in right_quads]))),
}
next_bid += 1
else:
# No split needed
new_data[bid_str] = bubble_data
# Write output
with open(output_file, "w", encoding="utf-8") as f:
json.dump(new_data, f, indent=2, ensure_ascii=False)
print(f"\n✅ Saved to {output_file}")
if __name__ == "__main__":
split_bubbles_in_json(
input_file="bubbles_original.json", # Always read from original
output_file="bubbles_split.json",
bubble_ids_to_split=[2, 7]
)

154
older-code/split_final.py Normal file
View File

@@ -0,0 +1,154 @@
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
Properly split Box 2 and Box 7 by extracting quads from original and writing to new JSON
"""
import json
import copy
def quad_bbox(quad):
xs = [p[0] for p in quad]
ys = [p[1] for p in quad]
return (min(xs), min(ys), max(xs), max(ys))
def boxes_union_xyxy(boxes):
boxes = [b for b in boxes if b is not None]
if not boxes:
return None
return (
int(min(b[0] for b in boxes)),
int(min(b[1] for b in boxes)),
int(max(b[2] for b in boxes)),
int(max(b[3] for b in boxes)),
)
def xyxy_to_xywh(bbox):
if bbox is None:
return None
x1, y1, x2, y2 = bbox
return {"x": int(x1), "y": int(y1), "w": int(x2 - x1), "h": int(y2 - y1)}
def bbox_area_xyxy(b):
if b is None:
return 0
x1, y1, x2, y2 = b
return (x2 - x1) * (y2 - y1)
# Load original
with open("bubbles_original.json", "r", encoding="utf-8") as f:
original = json.load(f)
new_data = {}
# Copy all non-split bubbles
for bid_str, bubble_data in original.items():
bid = int(bid_str)
if bid not in [2, 7]:
new_data[bid_str] = copy.deepcopy(bubble_data)
# Split Box 2
print("🔀 Splitting Box 2...")
box2_data = original["2"]
left_indices_2 = [10, 1, 2, 4, 8, 0, 3, 6, 11, 12]
right_indices_2 = [5, 7, 9]
# Left part keeps ID 2
left_quads_2 = [box2_data['quads'][i] for i in left_indices_2]
left_quad_bboxes_2 = [box2_data['quad_bboxes'][i] for i in left_indices_2]
left_bbox_2 = boxes_union_xyxy([quad_bbox(q) for q in left_quads_2])
left_bbox_2_padded = (max(0, left_bbox_2[0]-3), max(0, left_bbox_2[1]-3), left_bbox_2[2]+3, left_bbox_2[3]+3)
new_data["2"] = {
"x": left_bbox_2_padded[0],
"y": left_bbox_2_padded[1],
"w": left_bbox_2_padded[2] - left_bbox_2_padded[0],
"h": left_bbox_2_padded[3] - left_bbox_2_padded[1],
"reading_order": box2_data.get("reading_order", 2),
"quad_bboxes": left_quad_bboxes_2,
"quads": [[list(p) for p in quad] for quad in left_quads_2], # Explicit list conversion
"text_bbox": xyxy_to_xywh(left_bbox_2),
"line_bboxes": [],
"line_union_bbox": xyxy_to_xywh(boxes_union_xyxy([quad_bbox(q) for q in left_quads_2])),
"line_union_area": int(bbox_area_xyxy(boxes_union_xyxy([quad_bbox(q) for q in left_quads_2]))),
}
print(f" Left: y={new_data['2']['y']}, h={new_data['2']['h']}, quads={len(left_quads_2)}")
# Right part gets new ID 8
right_quads_2 = [box2_data['quads'][i] for i in right_indices_2]
right_quad_bboxes_2 = [box2_data['quad_bboxes'][i] for i in right_indices_2]
right_bbox_2 = boxes_union_xyxy([quad_bbox(q) for q in right_quads_2])
right_bbox_2_padded = (max(0, right_bbox_2[0]-3), max(0, right_bbox_2[1]-3), right_bbox_2[2]+3, right_bbox_2[3]+3)
new_data["8"] = {
"x": right_bbox_2_padded[0],
"y": right_bbox_2_padded[1],
"w": right_bbox_2_padded[2] - right_bbox_2_padded[0],
"h": right_bbox_2_padded[3] - right_bbox_2_padded[1],
"reading_order": box2_data.get("reading_order", 8),
"quad_bboxes": right_quad_bboxes_2,
"quads": [[list(p) for p in quad] for quad in right_quads_2], # Explicit list conversion
"text_bbox": xyxy_to_xywh(right_bbox_2),
"line_bboxes": [],
"line_union_bbox": xyxy_to_xywh(boxes_union_xyxy([quad_bbox(q) for q in right_quads_2])),
"line_union_area": int(bbox_area_xyxy(boxes_union_xyxy([quad_bbox(q) for q in right_quads_2]))),
}
print(f" Right: y={new_data['8']['y']}, h={new_data['8']['h']}, quads={len(right_quads_2)}")
# Split Box 7
print("\n🔀 Splitting Box 7...")
box7_data = original["7"]
left_indices_7 = [8, 13, 4, 11, 2, 6]
right_indices_7 = [0, 5, 1, 3, 7, 10, 12, 9]
# Left part keeps ID 7
left_quads_7 = [box7_data['quads'][i] for i in left_indices_7]
left_quad_bboxes_7 = [box7_data['quad_bboxes'][i] for i in left_indices_7]
left_bbox_7 = boxes_union_xyxy([quad_bbox(q) for q in left_quads_7])
left_bbox_7_padded = (max(0, left_bbox_7[0]-3), max(0, left_bbox_7[1]-3), left_bbox_7[2]+3, left_bbox_7[3]+3)
new_data["7"] = {
"x": left_bbox_7_padded[0],
"y": left_bbox_7_padded[1],
"w": left_bbox_7_padded[2] - left_bbox_7_padded[0],
"h": left_bbox_7_padded[3] - left_bbox_7_padded[1],
"reading_order": box7_data.get("reading_order", 7),
"quad_bboxes": left_quad_bboxes_7,
"quads": [[list(p) for p in quad] for quad in left_quads_7], # Explicit list conversion
"text_bbox": xyxy_to_xywh(left_bbox_7),
"line_bboxes": [],
"line_union_bbox": xyxy_to_xywh(boxes_union_xyxy([quad_bbox(q) for q in left_quads_7])),
"line_union_area": int(bbox_area_xyxy(boxes_union_xyxy([quad_bbox(q) for q in left_quads_7]))),
}
print(f" Left: y={new_data['7']['y']}, h={new_data['7']['h']}, quads={len(left_quads_7)}")
# Right part gets new ID 9
right_quads_7 = [box7_data['quads'][i] for i in right_indices_7]
right_quad_bboxes_7 = [box7_data['quad_bboxes'][i] for i in right_indices_7]
right_bbox_7 = boxes_union_xyxy([quad_bbox(q) for q in right_quads_7])
right_bbox_7_padded = (max(0, right_bbox_7[0]-3), max(0, right_bbox_7[1]-3), right_bbox_7[2]+3, right_bbox_7[3]+3)
new_data["9"] = {
"x": right_bbox_7_padded[0],
"y": right_bbox_7_padded[1],
"w": right_bbox_7_padded[2] - right_bbox_7_padded[0],
"h": right_bbox_7_padded[3] - right_bbox_7_padded[1],
"reading_order": box7_data.get("reading_order", 9),
"quad_bboxes": right_quad_bboxes_7,
"quads": [[list(p) for p in quad] for quad in right_quads_7], # Explicit list conversion
"text_bbox": xyxy_to_xywh(right_bbox_7),
"line_bboxes": [],
"line_union_bbox": xyxy_to_xywh(boxes_union_xyxy([quad_bbox(q) for q in right_quads_7])),
"line_union_area": int(bbox_area_xyxy(boxes_union_xyxy([quad_bbox(q) for q in right_quads_7]))),
}
print(f" Right: y={new_data['9']['y']}, h={new_data['9']['h']}, quads={len(right_quads_7)}")
# Sort by ID for output
new_data_sorted = {}
for bid in sorted([int(k) for k in new_data.keys()]):
new_data_sorted[str(bid)] = new_data[str(bid)]
with open("bubbles.json", "w", encoding="utf-8") as f:
json.dump(new_data_sorted, f, indent=2, ensure_ascii=False)
print(f"\n✅ Done! Saved {len(new_data_sorted)} bubbles to bubbles.json")

View File

@@ -0,0 +1,75 @@
#!/usr/bin/env python3
import sys
sys.path.insert(0, '/Users/guillemhernandezsola/code/manga-translator')
import cv2
import numpy as np
import importlib.util
spec = importlib.util.spec_from_file_location("manga_translator", "/Users/guillemhernandezsola/code/manga-translator/manga-translator.py")
mt = importlib.util.module_from_spec(spec)
spec.loader.exec_module(mt)
image_path = '004.png'
detector = mt.MacVisionDetector(source_lang='en')
raw = detector.read(image_path)
image = cv2.imread(image_path)
# Full filtering
filtered = []
for bbox, text, conf in raw:
t = mt.normalize_text(text)
qb = mt.quad_bbox(bbox)
if conf < 0.12 or len(t) < 1 or mt.is_noise_text(t) or mt.is_sound_effect(t) or mt.is_title_text(t):
continue
if qb[1] < int(image.shape[0] * mt.TOP_BAND_RATIO):
if conf < 0.70 and len(t) >= 5:
continue
filtered.append((bbox, t, conf))
# Get grouping
bubbles, bubble_boxes, bubble_quads, bubble_indices = mt.group_tokens(
filtered, image.shape, gap_px=mt.auto_gap(image_path), bbox_padding=3
)
print("=== TESTING PANEL SPLIT ON BUBBLE 7 ===\n")
bid = 7
box = bubble_boxes[bid]
print(f"Bubble {bid} box: {box}")
print(f"Bubble {bid} quads: {len(bubble_quads[bid])}")
print(f"Bubble {bid} indices: {len(bubble_indices[bid])}")
# Test split_panel_box
split_result = mt.split_panel_box(image, box, bubble_quads=bubble_quads[bid])
if split_result:
box_left, box_right, split_x = split_result
print(f"\n✓ Panel split detected!")
print(f" Split X: {split_x}")
print(f" Left box: {box_left}")
print(f" Right box: {box_right}")
# Simulate index split
left_idxs, right_idxs = [], []
for idx in bubble_indices[bid]:
cx, cy = mt.quad_center(filtered[idx][0])
if cx < split_x:
left_idxs.append(idx)
else:
right_idxs.append(idx)
print(f"\n Left indices ({len(left_idxs)}): {left_idxs}")
print(f" Right indices ({len(right_idxs)}): {right_idxs}")
if left_idxs and right_idxs:
print(f"\n✓ Split is valid (both sides have content)")
else:
print(f"\n✗ Split is invalid (one side is empty)")
else:
print(f"\n✗ No panel split detected")
print(f" Threshold would be: quads >= 10? {len(bubble_quads[bid]) >= 10}")
print(f" Width >= 50? {box[2] - box[0] >= 50}")
print(f" Height >= 50? {box[3] - box[1] >= 50}")

159
pipeline-render.py Normal file
View File

@@ -0,0 +1,159 @@
#!/usr/bin/env python3
"""
pipeline_render.py
───────────────────────────────────────────────────────────────
Standalone Rendering Pipeline
Usage:
python pipeline-render.py /path/to/chapter/folder
"""
import os
import sys
import argparse
import zipfile
import importlib.util
from pathlib import Path
import cv2 # ✅ Added OpenCV to load the image
# ─────────────────────────────────────────────
# CONFIG
# ─────────────────────────────────────────────
DEFAULT_FONT_PATH = "fonts/ComicNeue-Regular.ttf"
# ─────────────────────────────────────────────
# DYNAMIC MODULE LOADER
# ─────────────────────────────────────────────
def load_module(name, filepath):
spec = importlib.util.spec_from_file_location(name, filepath)
if spec is None or spec.loader is None:
raise FileNotFoundError(f"Cannot load spec for {filepath}")
module = importlib.util.module_from_spec(spec)
spec.loader.exec_module(module)
return module
# ─────────────────────────────────────────────
# HELPERS
# ─────────────────────────────────────────────
def sorted_pages(chapter_dir):
exts = {".jpg", ".jpeg", ".png", ".webp"}
pages = [
p for p in Path(chapter_dir).iterdir()
if p.is_file() and p.suffix.lower() in exts
]
return sorted(pages, key=lambda p: p.stem)
def pack_rendered_cbz(chapter_dir, output_cbz, rendered_files):
if not rendered_files:
print("⚠️ No rendered pages found — CBZ not created.")
return
with zipfile.ZipFile(output_cbz, "w", compression=zipfile.ZIP_STORED) as zf:
for rp in rendered_files:
arcname = rp.name
zf.write(rp, arcname)
print(f"\n✅ Rendered CBZ saved → {output_cbz}")
print(f"📦 Contains: {len(rendered_files)} translated pages ready to read.")
# ─────────────────────────────────────────────
# PER-PAGE PIPELINE
# ─────────────────────────────────────────────
def process_render(page_path, workdir, renderer_module, font_path):
print(f"\n{'' * 70}")
print(f"🎨 RENDERING: {page_path.name}")
print(f"{'' * 70}")
txt_path = workdir / "output.txt"
json_path = workdir / "bubbles.json"
out_img = workdir / page_path.name
if not txt_path.exists() or not json_path.exists():
print(" ⚠️ Missing output.txt or bubbles.json. Did you run the OCR pipeline first?")
return None
# ✅ FIX: Load the image into memory (as a NumPy array) before passing it
img_array = cv2.imread(str(page_path.resolve()))
if img_array is None:
print(f" ❌ Failed to load image: {page_path.name}")
return None
orig_dir = os.getcwd()
try:
os.chdir(workdir)
# Pass the loaded image array instead of the string path
renderer_module.render_translations(
img_array, # 1st arg: Image Data (NumPy array)
str(out_img.resolve()), # 2nd arg: Output image path
str(txt_path.resolve()), # 3rd arg: Translations text
str(json_path.resolve()), # 4th arg: Bubbles JSON
font_path # 5th arg: Font Path
)
print(" ✅ Render complete")
return out_img
except Exception as e:
print(f" ❌ Failed: {e}")
return None
finally:
os.chdir(orig_dir)
# ─────────────────────────────────────────────
# MAIN
# ─────────────────────────────────────────────
def main():
parser = argparse.ArgumentParser(description="Manga Rendering Pipeline")
parser.add_argument("chapter_dir", help="Path to the folder containing original manga pages")
args = parser.parse_args()
chapter_dir = Path(args.chapter_dir).resolve()
output_cbz = chapter_dir.parent / f"{chapter_dir.name}_rendered.cbz"
script_dir = Path(__file__).parent
absolute_font_path = str((script_dir / DEFAULT_FONT_PATH).resolve())
print("Loading renderer module...")
try:
renderer = load_module("manga_renderer", str(script_dir / "manga-renderer.py"))
except Exception as e:
print(f"❌ Could not load manga-renderer.py: {e}")
sys.exit(1)
pages = sorted_pages(chapter_dir)
if not pages:
print(f"❌ No images found in: {chapter_dir}")
sys.exit(1)
print(f"\n📖 Chapter : {chapter_dir}")
print(f" Pages : {len(pages)}\n")
succeeded, failed = [], []
rendered_files = []
for i, page_path in enumerate(pages, start=1):
print(f"[{i}/{len(pages)}] Checking data for {page_path.name}...")
workdir = Path(chapter_dir) / "translated" / page_path.stem
out_file = process_render(page_path, workdir, renderer, absolute_font_path)
if out_file:
succeeded.append(page_path.name)
rendered_files.append(out_file)
else:
failed.append(page_path.name)
print(f"\n{'' * 70}")
print("RENDER PIPELINE COMPLETE")
print(f"{len(succeeded)} page(s) rendered successfully")
if failed:
print(f"{len(failed)} page(s) skipped or failed:")
for f in failed:
print(f"{f}")
print(f"{'' * 70}\n")
print("Packing final CBZ...")
pack_rendered_cbz(chapter_dir, output_cbz, rendered_files)
if __name__ == "__main__":
main()

282
pipeline-translator.py Normal file
View File

@@ -0,0 +1,282 @@
#!/usr/bin/env python3
"""
pipeline-translator.py
───────────────────────────────────────────────────────────────
Translation OCR pipeline (Batch Processing Only)
Usage:
python pipeline-translator.py /path/to/chapter/folder
"""
import os
import sys
import argparse
import importlib.util
from pathlib import Path
# ─────────────────────────────────────────────────────────────
# PIPELINE CONFIGURATION
# Maps to the process_manga_page() signature in manga-translator.py
# ─────────────────────────────────────────────────────────────
PIPELINE_CONFIG = dict(
source_lang = "en",
target_lang = "ca",
)
# ─────────────────────────────────────────────────────────────
# DYNAMIC MODULE LOADER
# ─────────────────────────────────────────────────────────────
def load_module(name: str, filepath: str):
spec = importlib.util.spec_from_file_location(name, filepath)
if spec is None or spec.loader is None:
raise FileNotFoundError(f"Cannot load spec for {filepath}")
module = importlib.util.module_from_spec(spec)
spec.loader.exec_module(module)
return module
# ─────────────────────────────────────────────────────────────
# HELPERS
# ─────────────────────────────────────────────────────────────
def sorted_pages(chapter_dir: Path):
exts = {".jpg", ".jpeg", ".png", ".webp"}
pages = [
p for p in chapter_dir.iterdir()
if p.is_file() and p.suffix.lower() in exts
]
return sorted(pages, key=lambda p: p.stem)
def make_page_workdir(chapter_dir: Path, page_stem: str) -> Path:
workdir = chapter_dir / "translated" / page_stem
workdir.mkdir(parents=True, exist_ok=True)
return workdir
def verify_translator_api(module) -> bool:
"""
Checks that the loaded module exposes process_manga_page()
and that it accepts all keys defined in PIPELINE_CONFIG.
Prints a warning for any missing parameter so mismatches are
caught immediately rather than silently falling back to defaults.
"""
import inspect
fn = getattr(module, "process_manga_page", None)
if fn is None:
print("❌ manga-translator.py does not expose process_manga_page()")
return False
sig = inspect.signature(fn)
params = set(sig.parameters.keys())
ok = True
for key in PIPELINE_CONFIG:
if key not in params:
print(
f"⚠️ PIPELINE_CONFIG key '{key}' not found in "
f"process_manga_page() — update pipeline or translator."
)
ok = False
return ok
# ─────────────────────────────────────────────────────────────
# PER-PAGE PIPELINE
# ─────────────────────────────────────────────────────────────
def process_page(page_path: Path, workdir: Path, translator_module) -> bool:
print(f"\n{'' * 70}")
print(f" PAGE : {page_path.name}")
print(f"{'' * 70}")
orig_dir = os.getcwd()
try:
# Run inside the page's own workdir so debug images and
# output files land there automatically.
os.chdir(workdir)
output_json = str(workdir / "bubbles.json")
output_txt = str(workdir / "output.txt")
debug_path = str(workdir / "debug_clusters.png")
print(" ⏳ Extracting text and translating...")
results = translator_module.process_manga_page(
image_path = str(page_path.resolve()),
output_json = output_json,
output_txt = output_txt,
**PIPELINE_CONFIG,
)
# ── Optional debug visualisation ─────────────────────
if results:
try:
import cv2
image_bgr = cv2.imread(str(page_path.resolve()))
if image_bgr is not None:
# Reconstruct vis_boxes / vis_lines from results dict
vis_boxes = {}
vis_lines = {}
vis_indices = {}
for bid_str, data in results.items():
bid = int(bid_str)
xywh = data["box"]
vis_boxes[bid] = (
xywh["x"],
xywh["y"],
xywh["x"] + xywh["w"],
xywh["y"] + xywh["h"],
)
vis_lines[bid] = data.get("lines", [])
vis_indices[bid] = []
translator_module.draw_debug_clusters(
image_bgr = image_bgr,
out_boxes = vis_boxes,
out_lines = vis_lines,
out_indices = vis_indices,
ocr = [],
save_path = debug_path,
)
except Exception as e:
print(f" ⚠️ Debug visualisation failed (non-fatal): {e}")
# ── Sanity-check outputs ──────────────────────────────
for fname in ("output.txt", "bubbles.json"):
fpath = workdir / fname
if not fpath.exists() or fpath.stat().st_size == 0:
print(f" ⚠️ {fname} is missing or empty after processing.")
if not results:
print(" ⚠️ process_manga_page() returned no results.")
return False
print(f" ✅ Done — {len(results)} box(es) processed.")
return True
except Exception as e:
import traceback
print(f" ❌ Failed: {e}")
traceback.print_exc()
return False
finally:
os.chdir(orig_dir)
# ─────────────────────────────────────────────────────────────
# MAIN
# ─────────────────────────────────────────────────────────────
def main():
parser = argparse.ArgumentParser(
description="Manga Translation OCR Batch Pipeline"
)
parser.add_argument(
"chapter_dir",
help="Path to the folder containing manga page images"
)
parser.add_argument(
"--start", type=int, default=1,
help="Start from this page number (1-based, default: 1)"
)
parser.add_argument(
"--end", type=int, default=None,
help="Stop after this page number inclusive (default: all)"
)
parser.add_argument(
"--source", "-s", default=None,
help=f"Override source language (default: {PIPELINE_CONFIG['source_lang']})"
)
parser.add_argument(
"--target", "-t", default=None,
help=f"Override target language (default: {PIPELINE_CONFIG['target_lang']})"
)
args = parser.parse_args()
# Allow CLI overrides of source/target without touching PIPELINE_CONFIG
config = dict(PIPELINE_CONFIG)
if args.source:
config["source_lang"] = args.source
if args.target:
config["target_lang"] = args.target
# Patch PIPELINE_CONFIG in-place so process_page() picks up overrides
PIPELINE_CONFIG.update(config)
chapter_dir = Path(args.chapter_dir).resolve()
if not chapter_dir.is_dir():
print(f"❌ Not a directory: {chapter_dir}")
sys.exit(1)
# ── Load translator module ────────────────────────────────
script_dir = Path(__file__).parent
module_path = script_dir / "manga-translator.py"
if not module_path.exists():
print(f"❌ manga-translator.py not found in {script_dir}")
sys.exit(1)
print(f"📦 Loading translator from: {module_path}")
try:
translator = load_module("manga_translator", str(module_path))
except Exception as e:
print(f"❌ Could not load manga-translator.py: {e}")
sys.exit(1)
# ── API compatibility check ───────────────────────────────
if not verify_translator_api(translator):
print("❌ Aborting — fix the parameter mismatch above first.")
sys.exit(1)
# ── Discover pages ────────────────────────────────────────
all_pages = sorted_pages(chapter_dir)
if not all_pages:
print(f"❌ No images found in: {chapter_dir}")
sys.exit(1)
# Apply --start / --end slice (1-based, inclusive)
start_idx = max(0, args.start - 1)
end_idx = args.end if args.end is not None else len(all_pages)
pages = all_pages[start_idx:end_idx]
if not pages:
print(f"❌ No pages in range [{args.start}, {args.end}]")
sys.exit(1)
# ── Summary header ────────────────────────────────────────
print(f"\n{'' * 70}")
print(f" 📖 Chapter : {chapter_dir.name}")
print(f" 📄 Pages : {len(pages)} "
f"(of {len(all_pages)} total, "
f"range {args.start}{end_idx})")
print(f" 🌐 Lang : {PIPELINE_CONFIG['source_lang']}"
f"{PIPELINE_CONFIG['target_lang']}")
print(f"{'' * 70}\n")
succeeded, failed = [], []
for i, page_path in enumerate(pages, start=1):
print(f"[{i}/{len(pages)}] {page_path.name}")
workdir = make_page_workdir(chapter_dir, page_path.stem)
if process_page(page_path, workdir, translator):
succeeded.append(page_path.name)
else:
failed.append(page_path.name)
# ── Final report ──────────────────────────────────────────
print(f"\n{'' * 70}")
print(" PIPELINE COMPLETE")
print(f"{len(succeeded)} page(s) succeeded")
if failed:
print(f"{len(failed)} page(s) failed:")
for name in failed:
print(f"{name}")
print(f"{'' * 70}\n")
if __name__ == "__main__":
main()

View File

@@ -1,255 +0,0 @@
#!/usr/bin/env python3
"""
pipeline.py
───────────────────────────────────────────────────────────────
Translation + render pipeline
Flow per page:
1) translate_manga_text() -> output.txt + bubbles.json (+ debug_clusters.png if DEBUG)
2) render_translations() -> page_translated.png
3) Pack CBZ with originals + rendered pages + text outputs
Folder structure:
<CHAPTER_DIR>/
├── 000.png
├── 001.png
└── translated/
├── 000/
│ ├── output.txt
│ ├── bubbles.json
│ ├── page_translated.png
│ └── debug_clusters.png (optional)
├── 001/
│ └── ...
└── ...
CBZ:
- pages/<original pages>
- rendered/<page_stem>_translated.png
- translations/<page_stem>_output.txt
"""
import os
import sys
import zipfile
import importlib.util
from pathlib import Path
# ─────────────────────────────────────────────
# CONFIG
# ─────────────────────────────────────────────
CHAPTER_DIR = "/Users/guillemhernandezsola/Downloads/Dandadan_059_2022_Digital_1r0n"
OUTPUT_CBZ = "/Users/guillemhernandezsola/Downloads/Dandadan_059_2022_Digital_1r0n_translated.cbz"
SOURCE_LANG = "en"
TARGET_LANG = "ca"
# translator (NEW signature-compatible)
CONFIDENCE_THRESHOLD = 0.10
MIN_TEXT_LENGTH = 1
GAP_PX = "auto" # was cluster/proximity in old version
FILTER_SFX = True
QUALITY_THRESHOLD = 0.50
READING_MODE = "ltr"
DEBUG = True
# renderer
RENDER_ENABLED = True
RENDER_OUTPUT_NAME = "page_translated.png"
# optional custom font list for renderer
FONT_CANDIDATES = [
"fonts/ComicNeue-Regular.ttf",
"fonts/ComicRelief-Regular.ttf"
]
# ─────────────────────────────────────────────
# DYNAMIC MODULE LOADER
# ─────────────────────────────────────────────
def load_module(name, filepath):
spec = importlib.util.spec_from_file_location(name, filepath)
if spec is None or spec.loader is None:
raise FileNotFoundError(f"Cannot load spec for {filepath}")
module = importlib.util.module_from_spec(spec)
spec.loader.exec_module(module)
return module
# ─────────────────────────────────────────────
# HELPERS
# ─────────────────────────────────────────────
def sorted_pages(chapter_dir):
exts = {".jpg", ".jpeg", ".png", ".webp"}
pages = [
p for p in Path(chapter_dir).iterdir()
if p.is_file() and p.suffix.lower() in exts
]
return sorted(pages, key=lambda p: p.stem)
def make_page_workdir(chapter_dir, page_stem):
workdir = Path(chapter_dir) / "translated" / page_stem
workdir.mkdir(parents=True, exist_ok=True)
return workdir
def pack_cbz(chapter_dir, translated_dir, output_cbz):
exts = {".jpg", ".jpeg", ".png", ".webp"}
pages = sorted(
[p for p in Path(chapter_dir).iterdir()
if p.is_file() and p.suffix.lower() in exts],
key=lambda p: p.stem
)
txts = sorted(
translated_dir.rglob("output.txt"),
key=lambda p: p.parent.name
)
rendered = sorted(
translated_dir.rglob(RENDER_OUTPUT_NAME),
key=lambda p: p.parent.name
)
if not pages:
print("⚠️ No original pages found — CBZ not created.")
return
with zipfile.ZipFile(output_cbz, "w", compression=zipfile.ZIP_STORED) as zf:
# original pages
for img in pages:
arcname = f"pages/{img.name}"
zf.write(img, arcname)
print(f" 🖼 {arcname}")
# rendered pages
for rp in rendered:
arcname = f"rendered/{rp.parent.name}_translated.png"
zf.write(rp, arcname)
print(f" 🎨 {arcname}")
# text outputs
for txt in txts:
arcname = f"translations/{txt.parent.name}_output.txt"
zf.write(txt, arcname)
print(f" 📄 {arcname}")
print(
f"\n✅ CBZ saved → {output_cbz} "
f"({len(pages)} original, {len(rendered)} rendered, {len(txts)} text)"
)
# ─────────────────────────────────────────────
# PER-PAGE PIPELINE
# ─────────────────────────────────────────────
def process_page(page_path, workdir, translator_module, renderer_module):
"""
Runs translator + renderer for one page.
All generated files are written inside workdir.
"""
print(f"\n{'' * 70}")
print(f"PAGE: {page_path.name}")
print(f"{'' * 70}")
orig_dir = os.getcwd()
try:
os.chdir(workdir)
# 1) translate
translator_module.translate_manga_text(
image_path= str(page_path.resolve()),
source_lang=SOURCE_LANG,
target_lang=TARGET_LANG,
confidence_threshold=CONFIDENCE_THRESHOLD,
min_text_length=MIN_TEXT_LENGTH,
gap_px=GAP_PX,
filter_sound_effects=FILTER_SFX,
quality_threshold=QUALITY_THRESHOLD,
export_to_file="output.txt",
export_bubbles_to="bubbles.json",
reading_mode=READING_MODE,
debug=DEBUG
)
print(" ✅ translator done")
# 2) render
if RENDER_ENABLED:
renderer_module.render_translations(
input_image=str(page_path.resolve()),
output_image=RENDER_OUTPUT_NAME,
translations_file="output.txt",
bubbles_file="bubbles.json",
font_candidates=FONT_CANDIDATES
)
print(" ✅ renderer done")
return True
except Exception as e:
print(f" ❌ Failed: {e}")
return False
finally:
os.chdir(orig_dir)
# ─────────────────────────────────────────────
# MAIN
# ─────────────────────────────────────────────
def main():
print("Loading modules...")
try:
translator = load_module("manga_translator", "manga-translator.py")
except Exception as e:
print(f"❌ Could not load manga-translator.py: {e}")
sys.exit(1)
try:
renderer = load_module("manga_renderer", "manga-renderer.py")
except Exception as e:
print(f"❌ Could not load manga-renderer.py: {e}")
sys.exit(1)
pages = sorted_pages(CHAPTER_DIR)
if not pages:
print(f"❌ No images found in: {CHAPTER_DIR}")
sys.exit(1)
print(f"\n📖 Chapter : {CHAPTER_DIR}")
print(f" Pages : {len(pages)}")
print(f" Source : {SOURCE_LANG} → Target: {TARGET_LANG}")
print(f" Render : {'ON' if RENDER_ENABLED else 'OFF'}\n")
translated_dir = Path(CHAPTER_DIR) / "translated"
succeeded = []
failed = []
for i, page_path in enumerate(pages, start=1):
print(f"[{i}/{len(pages)}] {page_path.name}")
workdir = make_page_workdir(CHAPTER_DIR, page_path.stem)
ok = process_page(page_path, workdir, translator, renderer)
if ok:
succeeded.append(page_path.name)
else:
failed.append(page_path.name)
print(f"\n{'' * 70}")
print("PIPELINE COMPLETE")
print(f"{len(succeeded)} page(s) succeeded")
if failed:
print(f"{len(failed)} page(s) failed:")
for f in failed:
print(f"{f}")
print(f"{'' * 70}\n")
print("Packing CBZ...")
pack_cbz(CHAPTER_DIR, translated_dir, OUTPUT_CBZ)
if __name__ == "__main__":
main()

View File

@@ -1,19 +0,0 @@
# ─────────────────────────────────────────────
# manga-translator + manga-renderer
# Python >= 3.9 recommended
# ─────────────────────────────────────────────
# Computer vision + image processing
opencv-python>=4.8.0
numpy>=1.24.0
Pillow>=10.0.0
# OCR engine (manga-translator)
manga-ocr>=0.1.8
# Translation (manga-translator)
deep-translator>=1.11.0
# HTTP / file handling used internally by manga-ocr
requests>=2.31.0