Files
manga-translator/manga-renderer.py
Guillem Hernandez Sola ead32cef24 Ellipses
2026-04-12 18:47:30 +02:00

715 lines
25 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
"""
manga-renderer.py
─────────────────────────────────────────────────────────────────
Pipeline:
1. Detect panel boundaries using border-line detection
2. Split wide panels that contain internal vertical borders
3. For each bubble:
a. Detect real bubble ellipse via flood-fill + contour
b. Assign bubble to its panel (max overlap)
c. Clip + nudge ellipse to stay inside panel bounds
d. White-fill the clipped rotated ellipse
e. Fit + centre translated text inside safe area
"""
import os
import math
import json
import cv2
import numpy as np
from PIL import Image, ImageDraw, ImageFont
# ─────────────────────────────────────────────
# CONSTANTS
# ─────────────────────────────────────────────
DEFAULT_FONT_PATH = "fonts/ComicRelief-Regular.ttf"
DEFAULT_FONT_COLOR = (0, 0, 0)
WHITE = (255, 255, 255)
MAX_FONT_SIZE = 22
MIN_FONT_SIZE = 6
FONT_SIZE_STEP = 1
TEXT_RATIO = 0.82
FLOOD_TOLERANCE = 30
BORDER_SHRINK_PX = 4
MIN_PANEL_AREA_RATIO = 0.02
# How far the center can be nudged as a fraction
# of the semi-axis before we resort to shrinking
MAX_NUDGE_RATIO = 0.30
# Debug colors (BGR)
DBG_COLOR_PANEL = (200, 200, 0)
DBG_COLOR_DETECTED = (0, 200, 0)
DBG_COLOR_FILL = (0, 0, 255)
DBG_COLOR_SAFE = (255, 120, 0)
DBG_COLOR_CENTER = (255, 255, 0)
DBG_COLOR_SEED = (255, 0, 255)
DBG_COLOR_LABEL = (80, 80, 200)
DBG_THICKNESS = 2
DBG_CENTER_R = 5
# ─────────────────────────────────────────────
# PARSERS
# ─────────────────────────────────────────────
def parse_translations(translations_file):
translations = {}
with open(translations_file, "r", encoding="utf-8") as f:
for line in f:
line = line.strip()
if not line.startswith("#"):
continue
parts = line.split("|")
if len(parts) < 3:
continue
try:
bubble_id = int(parts[0].lstrip("#"))
translated = parts[2].strip()
if translated.startswith("["):
continue
translations[bubble_id] = translated
except ValueError:
continue
return translations
def parse_bubbles(bubbles_file):
with open(bubbles_file, "r", encoding="utf-8") as f:
raw = json.load(f)
return {int(k): v for k, v in raw.items()}
# ─────────────────────────────────────────────
# FONT HELPERS
# ─────────────────────────────────────────────
def load_font(font_path, size):
if font_path and os.path.exists(font_path):
try:
return ImageFont.truetype(font_path, size)
except Exception:
pass
return ImageFont.load_default()
def measure_text(draw, text, font):
bbox = draw.textbbox((0, 0), text, font=font)
return bbox[2] - bbox[0], bbox[3] - bbox[1]
def wrap_text(draw, text, font, max_width):
words, lines, current = text.split(), [], ""
for word in words:
test = (current + " " + word).strip()
w, _ = measure_text(draw, test, font)
if w <= max_width or not current:
current = test
else:
lines.append(current)
current = word
if current:
lines.append(current)
if not lines:
return [""], 0, 0
heights, widths = [], []
for line in lines:
w, h = measure_text(draw, line, font)
widths.append(w)
heights.append(h)
line_gap = max(heights[0] // 5, 2) if heights else 2
total_height = sum(heights) + line_gap * (len(lines) - 1)
return lines, total_height, max(widths) if widths else 0
def best_fit_font(draw, text, font_path, safe_w, safe_h):
for size in range(MAX_FONT_SIZE, MIN_FONT_SIZE - 1, -FONT_SIZE_STEP):
font = load_font(font_path, size)
lines, total_h, max_lw = wrap_text(draw, text, font, safe_w)
if total_h <= safe_h and max_lw <= safe_w:
return font, lines, total_h
font = load_font(font_path, MIN_FONT_SIZE)
lines, total_h, _ = wrap_text(draw, text, font, safe_w)
return font, lines, total_h
# ─────────────────────────────────────────────
# PANEL DETECTION HELPERS
# ─────────────────────────────────────────────
def merge_nested_panels(panels):
"""
Removes panels that are >80% contained inside
a larger panel. Keeps the larger one.
"""
if len(panels) <= 1:
return panels
panels_sorted = sorted(
panels,
key=lambda p: (p[2] - p[0]) * (p[3] - p[1]),
reverse=True
)
keep = []
for panel in panels_sorted:
px1, py1, px2, py2 = panel
p_area = (px2 - px1) * (py2 - py1)
dominated = False
for kept in keep:
kx1, ky1, kx2, ky2 = kept
ix1 = max(px1, kx1); iy1 = max(py1, ky1)
ix2 = min(px2, kx2); iy2 = min(py2, ky2)
if ix2 > ix1 and iy2 > iy1:
inter = (ix2 - ix1) * (iy2 - iy1)
if inter / p_area > 0.80:
dominated = True
break
if not dominated:
keep.append(panel)
return keep
def split_panels_on_internal_borders(panels, v_lines,
img_w, img_h):
"""
For each panel wider than 30% of the image, checks
whether a strong vertical border line runs through
its interior. If found, splits into two sub-panels.
"""
result = []
for (px1, py1, px2, py2) in panels:
pw = px2 - px1
if pw < img_w * 0.30:
result.append((px1, py1, px2, py2))
continue
margin = int(pw * 0.20)
search_x1 = px1 + margin
search_x2 = px2 - margin
panel_vlines = v_lines[py1:py2, search_x1:search_x2]
col_sums = panel_vlines.sum(axis=0)
panel_h = py2 - py1
threshold = panel_h * 255 * 0.40
split_cols = np.where(col_sums > threshold)[0]
if len(split_cols) == 0:
result.append((px1, py1, px2, py2))
continue
split_x = int(np.median(split_cols)) + search_x1
left_w = split_x - px1
right_w = px2 - split_x
if left_w > img_w * 0.10 and right_w > img_w * 0.10:
result.append((px1, py1, split_x, py2))
result.append((split_x, py1, px2, py2))
print(f" ✂️ Split ({px1},{py1})→({px2},{py2}) "
f"at x={split_x}")
else:
result.append((px1, py1, px2, py2))
return result
# ─────────────────────────────────────────────
# PANEL DETECTION (v2 — border-line based)
# ─────────────────────────────────────────────
def detect_panels(img_bgr):
"""
Detects manga panel boundaries using morphological
line detection on dark border pixels.
1. Threshold dark pixels → border candidates
2. Horizontal kernel → long horizontal lines
3. Vertical kernel → long vertical lines
4. Combine + dilate → closed border skeleton
5. Invert → panel interior blobs
6. connectedComponents → one blob per panel
7. Filter by area, shape, minimum dimensions
8. Merge nested panels
9. Split wide panels on internal vertical borders
"""
img_h, img_w = img_bgr.shape[:2]
total_area = img_h * img_w
min_area = total_area * MIN_PANEL_AREA_RATIO
gray = cv2.cvtColor(img_bgr, cv2.COLOR_BGR2GRAY)
_, dark_mask = cv2.threshold(
gray, 80, 255, cv2.THRESH_BINARY_INV)
h_len = max(40, img_w // 25)
h_kernel = cv2.getStructuringElement(
cv2.MORPH_RECT, (h_len, 1))
h_lines = cv2.morphologyEx(
dark_mask, cv2.MORPH_OPEN, h_kernel)
v_len = max(40, img_h // 25)
v_kernel = cv2.getStructuringElement(
cv2.MORPH_RECT, (1, v_len))
v_lines = cv2.morphologyEx(
dark_mask, cv2.MORPH_OPEN, v_kernel)
borders = cv2.bitwise_or(h_lines, v_lines)
close_kernel = np.ones((5, 5), np.uint8)
borders = cv2.dilate(borders, close_kernel, iterations=2)
panel_interior = cv2.bitwise_not(borders)
num_labels, labels, stats, centroids = \
cv2.connectedComponentsWithStats(
panel_interior, connectivity=8)
panels = []
for label_id in range(1, num_labels):
area = stats[label_id, cv2.CC_STAT_AREA]
if area < min_area:
continue
x = stats[label_id, cv2.CC_STAT_LEFT]
y = stats[label_id, cv2.CC_STAT_TOP]
w = stats[label_id, cv2.CC_STAT_WIDTH]
h = stats[label_id, cv2.CC_STAT_HEIGHT]
x2 = x + w
y2 = y + h
if w * h > total_area * 0.90:
continue
aspect = max(w, h) / max(min(w, h), 1)
if aspect > 15:
continue
# Skip panels too narrow/short to be real panels
if w < img_w * 0.05 or h < img_h * 0.05:
continue
panels.append((x, y, x2, y2))
panels = merge_nested_panels(panels)
panels = split_panels_on_internal_borders(
panels, v_lines, img_w, img_h)
panels.sort(key=lambda p: (p[1] // 100, p[0]))
if not panels:
print(" ⚠️ No panels detected — using full image as panel")
panels = [(0, 0, img_w, img_h)]
print(f" 📐 {len(panels)} panel(s) detected:")
for i, (x1, y1, x2, y2) in enumerate(panels, 1):
pct = (x2 - x1) * (y2 - y1) / total_area * 100
print(f" Panel {i}: ({x1},{y1})→({x2},{y2}) "
f"{x2-x1}×{y2-y1}px area={pct:.1f}%")
return panels
# ─────────────────────────────────────────────
# BUBBLE → PANEL ASSIGNMENT
# ─────────────────────────────────────────────
def assign_panel(bubble_data, panels, img_w, img_h):
bx = bubble_data["x"]; bw = bubble_data["w"]
by = bubble_data["y"]; bh = bubble_data["h"]
bcx = bx + bw / 2.0; bcy = by + bh / 2.0
best_panel, best_overlap = None, 0
for (px1, py1, px2, py2) in panels:
ix1 = max(bx, px1); iy1 = max(by, py1)
ix2 = min(bx+bw, px2); iy2 = min(by+bh, py2)
if ix2 > ix1 and iy2 > iy1:
overlap = (ix2 - ix1) * (iy2 - iy1)
if overlap > best_overlap:
best_overlap = overlap
best_panel = (px1, py1, px2, py2)
if best_panel is None:
for (px1, py1, px2, py2) in panels:
if px1 <= bcx <= px2 and py1 <= bcy <= py2:
return (px1, py1, px2, py2)
return (0, 0, img_w, img_h)
return best_panel
# ─────────────────────────────────────────────
# BUBBLE ELLIPSE DETECTION (flood-fill)
# ─────────────────────────────────────────────
def detect_bubble_ellipse(img_bgr, bubble_data, panel):
x = bubble_data["x"]; w = bubble_data["w"]
y = bubble_data["y"]; h = bubble_data["h"]
img_h, img_w = img_bgr.shape[:2]
px1, py1, px2, py2 = panel
seed_x = max(1, min(img_w - 2, int(x + w / 2.0)))
seed_y = max(1, min(img_h - 2, int(y + h / 2.0)))
gray = cv2.cvtColor(img_bgr, cv2.COLOR_BGR2GRAY)
_, binary = cv2.threshold(gray, 200, 255, cv2.THRESH_BINARY)
panel_mask = np.zeros_like(binary)
panel_mask[py1:py2, px1:px2] = binary[py1:py2, px1:px2]
if gray[seed_y, seed_x] < 150:
found = False
for r in range(1, min(w, h) // 3):
for dy in range(-r, r + 1):
for dx in range(-r, r + 1):
nx, ny = seed_x + dx, seed_y + dy
if (px1 <= nx < px2 and py1 <= ny < py2
and gray[ny, nx] >= 200):
seed_x, seed_y = nx, ny
found = True
break
if found: break
if found: break
if not found:
return None
flood_mask = np.zeros((img_h + 2, img_w + 2), dtype=np.uint8)
flood_fill_img = panel_mask.copy()
cv2.floodFill(flood_fill_img, flood_mask,
(seed_x, seed_y), 255,
loDiff=FLOOD_TOLERANCE, upDiff=FLOOD_TOLERANCE,
flags=cv2.FLOODFILL_FIXED_RANGE)
filled_region = flood_mask[1:-1, 1:-1] * 255
contours, _ = cv2.findContours(
filled_region, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
if not contours:
return None
bubble_contour = max(contours, key=cv2.contourArea)
if len(bubble_contour) < 5:
return None
if cv2.contourArea(bubble_contour) < 100:
return None
(ecx, ecy), (ew, eh), angle = cv2.fitEllipse(bubble_contour)
return float(ecx), float(ecy), float(ew/2), float(eh/2), float(angle)
# ─────────────────────────────────────────────
# CLIP + NUDGE ELLIPSE TO PANEL
# ─────────────────────────────────────────────
def clip_ellipse_to_panel(cx, cy, sa, sb, angle, panel,
shrink=BORDER_SHRINK_PX):
"""
Keeps the ellipse inside the panel by:
1. Applying border shrink margin
2. Nudging center inward (up to MAX_NUDGE_RATIO)
3. Shrinking axes only for remaining overflow
Returns (cx, cy, sa, sb) — center may be adjusted.
"""
px1, py1, px2, py2 = panel
inner_x1 = px1 + shrink
inner_y1 = py1 + shrink
inner_x2 = px2 - shrink
inner_y2 = py2 - shrink
sa_s = max(sa - shrink, 1.0)
sb_s = max(sb - shrink, 1.0)
for _ in range(3):
rad = math.radians(angle)
hw = math.sqrt((sa_s * math.cos(rad))**2 +
(sb_s * math.sin(rad))**2)
hh = math.sqrt((sa_s * math.sin(rad))**2 +
(sb_s * math.cos(rad))**2)
ovf_l = max(0, inner_x1 - (cx - hw))
ovf_r = max(0, (cx + hw) - inner_x2)
ovf_t = max(0, inner_y1 - (cy - hh))
ovf_b = max(0, (cy + hh) - inner_y2)
if max(ovf_l, ovf_r, ovf_t, ovf_b) == 0:
break
# Step 1: nudge center inward
max_nx = sa_s * MAX_NUDGE_RATIO
max_ny = sb_s * MAX_NUDGE_RATIO
cx += min(ovf_l, max_nx) - min(ovf_r, max_nx)
cy += min(ovf_t, max_ny) - min(ovf_b, max_ny)
# Step 2: recompute overflow after nudge
rad = math.radians(angle)
hw = math.sqrt((sa_s * math.cos(rad))**2 +
(sb_s * math.sin(rad))**2)
hh = math.sqrt((sa_s * math.sin(rad))**2 +
(sb_s * math.cos(rad))**2)
ovf_l = max(0, inner_x1 - (cx - hw))
ovf_r = max(0, (cx + hw) - inner_x2)
ovf_t = max(0, inner_y1 - (cy - hh))
ovf_b = max(0, (cy + hh) - inner_y2)
max_ovf = max(ovf_l, ovf_r, ovf_t, ovf_b)
# Step 3: shrink only remaining overflow
if max_ovf > 0:
sa_s = max(sa_s - max_ovf, 1.0)
sb_s = max(sb_s - max_ovf, 1.0)
return cx, cy, sa_s, sb_s
# ─────────────────────────────────────────────
# GET FINAL RENDER ELLIPSE PARAMS
# ─────────────────────────────────────────────
def get_render_ellipse(img_bgr, bubble_data, panel):
x = bubble_data["x"]; w = bubble_data["w"]
y = bubble_data["y"]; h = bubble_data["h"]
detected = detect_bubble_ellipse(img_bgr, bubble_data, panel)
if detected is not None:
ecx, ecy, sa, sb, angle = detected
ecx, ecy, sa_fill, sb_fill = clip_ellipse_to_panel(
ecx, ecy, sa, sb, angle, panel)
safe_w = sa_fill * math.sqrt(2) * TEXT_RATIO
safe_h = sb_fill * math.sqrt(2) * TEXT_RATIO
return (ecx, ecy, sa_fill, sb_fill, angle,
sa, sb, safe_w, safe_h, "detected")
else:
cx = x + w / 2.0; cy = y + h / 2.0
sa = w / 2.0; sb = h / 2.0
cx, cy, sa_fill, sb_fill = clip_ellipse_to_panel(
cx, cy, sa, sb, 0.0, panel)
safe_w = sa_fill * math.sqrt(2) * TEXT_RATIO
safe_h = sb_fill * math.sqrt(2) * TEXT_RATIO
return (cx, cy, sa_fill, sb_fill, 0.0,
sa, sb, safe_w, safe_h, "fallback")
# ─────────────────────────────────────────────
# DRAW ONE BUBBLE
# ─────────────────────────────────────────────
def draw_bubble(pil_img, img_bgr, bubble_data,
translated_text, font_path,
font_color, panel):
(cx, cy, sa_fill, sb_fill, angle,
sa_det, sb_det,
safe_w, safe_h, method) = get_render_ellipse(
img_bgr, bubble_data, panel)
cx_i = int(round(cx))
cy_i = int(round(cy))
img_h, img_w = img_bgr.shape[:2]
mask = np.zeros((img_h, img_w), dtype=np.uint8)
cv2.ellipse(mask, (cx_i, cy_i),
(int(math.ceil(sa_fill)),
int(math.ceil(sb_fill))),
angle, 0, 360, 255, -1)
img_np = np.array(pil_img)
img_np[mask == 255] = [255, 255, 255]
pil_img.paste(Image.fromarray(img_np))
if not translated_text:
return method
sw = max(int(safe_w), 1)
sh = max(int(safe_h), 1)
draw = ImageDraw.Draw(pil_img)
font, lines, total_h = best_fit_font(
draw, translated_text, font_path, sw, sh)
if not lines:
return method
y_cursor = cy_i - total_h // 2
for line in lines:
lw, lh = measure_text(draw, line, font)
draw.text((cx_i - lw // 2, y_cursor),
line, font=font, fill=font_color)
y_cursor += lh + max(lh // 5, 2)
return method
# ─────────────────────────────────────────────
# DEBUG OVERLAY
# ─────────────────────────────────────────────
def save_debug_ellipses(input_image_path, bubbles,
translations, panels, output_path):
img = cv2.imread(input_image_path)
if img is None:
print(f" ⚠️ Debug: cannot load {input_image_path}")
return
overlay = img.copy()
img_h, img_w = img.shape[:2]
for i, (px1, py1, px2, py2) in enumerate(panels, 1):
cv2.rectangle(overlay, (px1, py1), (px2, py2),
DBG_COLOR_PANEL, 3)
cv2.putText(overlay, f"P{i}",
(px1 + 4, py1 + 22),
cv2.FONT_HERSHEY_SIMPLEX,
0.65, DBG_COLOR_PANEL, 2)
for bubble_id in sorted(translations.keys()):
if bubble_id not in bubbles:
continue
bubble_data = bubbles[bubble_id]
panel = assign_panel(bubble_data, panels, img_w, img_h)
x = bubble_data["x"]; w = bubble_data["w"]
y = bubble_data["y"]; h = bubble_data["h"]
(cx, cy, sa_fill, sb_fill, angle,
sa_det, sb_det,
safe_w, safe_h, method) = get_render_ellipse(
img, bubble_data, panel)
cx_i = int(round(cx)); cy_i = int(round(cy))
sa_d_i = int(math.ceil(sa_det))
sb_d_i = int(math.ceil(sb_det))
sa_f_i = int(math.ceil(sa_fill))
sb_f_i = int(math.ceil(sb_fill))
sw_i = int(safe_w); sh_i = int(safe_h)
cv2.ellipse(overlay, (cx_i, cy_i),
(sa_d_i, sb_d_i), angle, 0, 360,
DBG_COLOR_DETECTED, DBG_THICKNESS)
cv2.ellipse(overlay, (cx_i, cy_i),
(sa_f_i, sb_f_i), angle, 0, 360,
DBG_COLOR_FILL, DBG_THICKNESS)
cv2.rectangle(overlay,
(cx_i - sw_i//2, cy_i - sh_i//2),
(cx_i + sw_i//2, cy_i + sh_i//2),
DBG_COLOR_SAFE, DBG_THICKNESS)
cv2.circle(overlay, (cx_i, cy_i),
DBG_CENTER_R, DBG_COLOR_CENTER, -1)
cv2.circle(overlay,
(int(x + w/2), int(y + h/2)),
DBG_CENTER_R - 1, DBG_COLOR_SEED, -1)
tag = "D" if method == "detected" else "F"
cv2.putText(overlay, f"#{bubble_id}({tag})",
(cx_i - sa_d_i, cy_i - sb_d_i - 6),
cv2.FONT_HERSHEY_SIMPLEX,
0.50, DBG_COLOR_LABEL, 2)
debug_img = cv2.addWeighted(overlay, 0.85, img, 0.15, 0)
cv2.imwrite(output_path, debug_img)
print(f" 🐛 Debug saved → {output_path}")
print()
print(" Legend:")
print(" 🟡 YELLOW → Panel boundary")
print(" 🟢 GREEN → Detected bubble ellipse")
print(" 🔴 RED → Fill ellipse (nudged + clipped)")
print(" 🔵 BLUE → Text safe rectangle")
print(" 🔵 CYAN → Ellipse center (may be nudged)")
print(" 🟣 MAGENTA → Original flood seed point")
print(" (D) = contour detected | (F) = bbox fallback")
# ─────────────────────────────────────────────
# MAIN RENDER FUNCTION
# ─────────────────────────────────────────────
def render_translations(
input_image,
output_image,
translations_file,
bubbles_file,
font_path = DEFAULT_FONT_PATH,
font_color = DEFAULT_FONT_COLOR,
debug = False,
debug_path = "debug_ellipses.png",
):
img_bgr = cv2.imread(input_image)
if img_bgr is None:
raise FileNotFoundError(
f"Cannot load image: {input_image}")
img_h, img_w = img_bgr.shape[:2]
img_pil = Image.fromarray(
cv2.cvtColor(img_bgr, cv2.COLOR_BGR2RGB))
translations = parse_translations(translations_file)
bubbles = parse_bubbles(bubbles_file)
print("\n📐 Detecting panels...")
panels = detect_panels(img_bgr)
print(f"\n🎨 Rendering {len(translations)} bubble(s)...")
print(f" Font : {font_path}")
print(f" Border shrink : -{BORDER_SHRINK_PX}px")
print(f" Max nudge ratio : {MAX_NUDGE_RATIO}")
print(f" Flood tolerance : {FLOOD_TOLERANCE}")
print(f" Text ratio : {TEXT_RATIO}")
if debug:
print(f" Debug mode : ON → {debug_path}")
save_debug_ellipses(input_image, bubbles,
translations, panels, debug_path)
rendered = 0; skipped = 0
n_detect = 0; n_fallbk = 0
for bubble_id, translated_text in sorted(translations.items()):
if bubble_id not in bubbles:
print(f" ⚠️ #{bubble_id}: not in bubbles.json — skipped")
skipped += 1
continue
bubble_data = bubbles[bubble_id]
panel = assign_panel(bubble_data, panels, img_w, img_h)
method = draw_bubble(
img_pil, img_bgr, bubble_data,
translated_text, font_path, font_color, panel)
tag = "🔍 detected" if method == "detected" else "📦 fallback"
if method == "detected": n_detect += 1
else: n_fallbk += 1
px1, py1, px2, py2 = panel
print(f" ✅ #{bubble_id} [{tag}] "
f"panel=({px1},{py1})→({px2},{py2}) "
f"\"{translated_text[:35]}\"")
rendered += 1
result_cv = cv2.cvtColor(np.array(img_pil),
cv2.COLOR_RGB2BGR)
cv2.imwrite(output_image, result_cv)
print(f"\n✅ Done — {rendered} rendered "
f"({n_detect} detected, {n_fallbk} fallback), "
f"{skipped} skipped.")
print(f"📄 Output → {output_image}")
# ─────────────────────────────────────────────
# ENTRY POINT
# ─────────────────────────────────────────────
if __name__ == "__main__":
render_translations(
input_image = "002-page.jpg",
output_image = "page_translated.png",
translations_file = "output.txt",
bubbles_file = "bubbles.json",
font_path = DEFAULT_FONT_PATH,
font_color = DEFAULT_FONT_COLOR,
debug = True,
debug_path = "debug_ellipses.png",
)