844 lines
27 KiB
Python
844 lines
27 KiB
Python
"""
|
|
manga-renderer.py
|
|
─────────────────────────────────────────────────────────────────
|
|
Pipeline:
|
|
1. Detect panel boundaries
|
|
2. Assign bubble -> panel
|
|
3. Detect/fallback bubble ellipse
|
|
4. Clean original text region:
|
|
- OCR union mask (default)
|
|
- Hybrid mask fallback
|
|
- Ellipse mode optional
|
|
5. Render translated text with ellipse-aware wrapping
|
|
"""
|
|
|
|
import os
|
|
import math
|
|
import json
|
|
import re
|
|
|
|
import cv2
|
|
import numpy as np
|
|
from PIL import Image, ImageDraw, ImageFont
|
|
|
|
|
|
# ─────────────────────────────────────────────
|
|
# CONSTANTS
|
|
# ─────────────────────────────────────────────
|
|
DEFAULT_FONT_CANDIDATES = [
|
|
"fonts/AnimeAce2_reg.ttf",
|
|
"fonts/WildWordsRoman.ttf",
|
|
"fonts/ComicRelief-Regular.ttf",
|
|
"fonts/NotoSans-Regular.ttf",
|
|
]
|
|
DEFAULT_FONT_COLOR = (0, 0, 0)
|
|
DEFAULT_STROKE_COLOR = (255, 255, 255)
|
|
|
|
MAX_FONT_SIZE = 20
|
|
MIN_FONT_SIZE = 6
|
|
FONT_SIZE_STEP = 1
|
|
TEXT_RATIO = 0.76
|
|
|
|
FLOOD_TOLERANCE = 30
|
|
BORDER_SHRINK_PX = 4
|
|
MIN_PANEL_AREA_RATIO = 0.02
|
|
MAX_NUDGE_RATIO = 0.30
|
|
|
|
# Cleaning mode:
|
|
# "ocr_union" -> precise cleanup from OCR quad boxes (recommended)
|
|
# "hybrid" -> rounded-rect + inner ellipse
|
|
# "ellipse" -> legacy large ellipse fill
|
|
CLEAN_MODE = "ocr_union"
|
|
|
|
# OCR-union cleaning tuning
|
|
OCR_CLEAN_PAD_X = 12
|
|
OCR_CLEAN_PAD_Y = 10
|
|
OCR_CLEAN_MIN_W = 24
|
|
OCR_CLEAN_MIN_H = 24
|
|
OCR_CLEAN_CLOSE_KERNEL = 5
|
|
OCR_CLEAN_DILATE = 1
|
|
|
|
# Hybrid cleanup mask tuning
|
|
CLEAN_MASK_RECT_SCALE_W = 1.08
|
|
CLEAN_MASK_RECT_SCALE_H = 1.20
|
|
CLEAN_MASK_ELLIPSE_SCALE = 0.84
|
|
CLEAN_MASK_BLUR = 0
|
|
|
|
|
|
# ─────────────────────────────────────────────
|
|
# PARSERS
|
|
# ─────────────────────────────────────────────
|
|
def parse_translations(translations_file):
|
|
translations = {}
|
|
originals = {}
|
|
flags_map = {}
|
|
|
|
with open(translations_file, "r", encoding="utf-8") as f:
|
|
for line in f:
|
|
line = line.strip()
|
|
if not line.startswith("#"):
|
|
continue
|
|
parts = line.split("|")
|
|
# Format:
|
|
# #ID|ORDER|ORIGINAL|TRANSLATED|FLAGS
|
|
# backward-compatible with older variants
|
|
try:
|
|
bubble_id = int(parts[0].lstrip("#"))
|
|
except Exception:
|
|
continue
|
|
|
|
if len(parts) >= 5:
|
|
original = parts[2].strip()
|
|
translated = parts[3].strip()
|
|
flags = parts[4].strip()
|
|
elif len(parts) >= 4:
|
|
original = parts[2].strip()
|
|
translated = parts[3].strip()
|
|
flags = "-"
|
|
elif len(parts) >= 3:
|
|
original = ""
|
|
translated = parts[2].strip()
|
|
flags = "-"
|
|
else:
|
|
continue
|
|
|
|
if translated.startswith("["):
|
|
continue
|
|
|
|
translations[bubble_id] = translated
|
|
originals[bubble_id] = original
|
|
flags_map[bubble_id] = flags
|
|
|
|
return translations, originals, flags_map
|
|
|
|
|
|
def parse_bubbles(bubbles_file):
|
|
with open(bubbles_file, "r", encoding="utf-8") as f:
|
|
raw = json.load(f)
|
|
return {int(k): v for k, v in raw.items()}
|
|
|
|
|
|
# ─────────────────────────────────────────────
|
|
# HELPERS
|
|
# ─────────────────────────────────────────────
|
|
def normalize_text(s):
|
|
t = s.upper().strip()
|
|
t = re.sub(r"[^\w]+", "", t)
|
|
return t
|
|
|
|
|
|
def is_sfx_like(text):
|
|
t = normalize_text(text)
|
|
if len(t) <= 8 and re.fullmatch(r"(SHA+|BIP+|BEEP+|HN+|AH+|OH+)", t):
|
|
return True
|
|
return False
|
|
|
|
|
|
# ─────────────────────────────────────────────
|
|
# FONT HELPERS
|
|
# ─────────────────────────────────────────────
|
|
def load_font_from_candidates(candidates, size):
|
|
for path in candidates:
|
|
if path and os.path.exists(path):
|
|
try:
|
|
return ImageFont.truetype(path, size), path
|
|
except Exception:
|
|
continue
|
|
return ImageFont.load_default(), "PIL_DEFAULT"
|
|
|
|
|
|
def measure_text(draw, text, font):
|
|
bbox = draw.textbbox((0, 0), text, font=font)
|
|
return bbox[2] - bbox[0], bbox[3] - bbox[1]
|
|
|
|
|
|
def ellipse_line_max_width(y_offset, a, b):
|
|
if b <= 0:
|
|
return 0
|
|
t = 1.0 - (y_offset * y_offset) / (b * b)
|
|
t = max(0.0, t)
|
|
return 2.0 * a * math.sqrt(t)
|
|
|
|
|
|
def wrap_text(draw, text, font, max_width):
|
|
words = text.split()
|
|
lines = []
|
|
current = ""
|
|
for word in words:
|
|
test = (current + " " + word).strip()
|
|
w, _ = measure_text(draw, test, font)
|
|
if w <= max_width or not current:
|
|
current = test
|
|
else:
|
|
lines.append(current)
|
|
current = word
|
|
if current:
|
|
lines.append(current)
|
|
|
|
if not lines:
|
|
return [""], 0, 0
|
|
|
|
widths, heights = [], []
|
|
for ln in lines:
|
|
w, h = measure_text(draw, ln, font)
|
|
widths.append(w)
|
|
heights.append(h)
|
|
|
|
line_gap = max(heights[0] // 5, 2) if heights else 2
|
|
total_h = sum(heights) + line_gap * (len(lines) - 1)
|
|
return lines, total_h, max(widths) if widths else 0
|
|
|
|
|
|
def wrap_text_ellipse_aware(draw, text, font, safe_w, safe_h, tall_bubble=False):
|
|
target_w = safe_w * (0.85 if tall_bubble else 1.0)
|
|
lines, total_h, _ = wrap_text(draw, text, font, target_w)
|
|
if not lines:
|
|
return lines, total_h
|
|
|
|
heights = []
|
|
for ln in lines:
|
|
_, h = measure_text(draw, ln, font)
|
|
heights.append(h)
|
|
|
|
line_gap = max(heights[0] // 5, 2) if heights else 2
|
|
if tall_bubble:
|
|
line_gap += 1
|
|
|
|
block_h = sum(heights) + line_gap * (len(lines) - 1)
|
|
if block_h > safe_h:
|
|
return lines, block_h
|
|
|
|
a = target_w / 2.0
|
|
b = safe_h / 2.0
|
|
|
|
words = text.split()
|
|
refined = []
|
|
cursor_y = -block_h / 2.0
|
|
current = ""
|
|
idx_h = 0
|
|
|
|
for word in words:
|
|
h_line = heights[min(idx_h, len(heights) - 1)] if heights else 12
|
|
y_mid = cursor_y + h_line / 2.0
|
|
row_max = ellipse_line_max_width(y_mid, a, b) * 0.95
|
|
row_max = max(20, row_max)
|
|
|
|
candidate = (current + " " + word).strip()
|
|
w, _ = measure_text(draw, candidate, font)
|
|
|
|
if (w <= row_max) or (not current):
|
|
current = candidate
|
|
else:
|
|
refined.append(current)
|
|
cursor_y += h_line + line_gap
|
|
idx_h += 1
|
|
current = word
|
|
|
|
if current:
|
|
refined.append(current)
|
|
|
|
hs = []
|
|
for ln in refined:
|
|
_, h = measure_text(draw, ln, font)
|
|
hs.append(h)
|
|
|
|
total = sum(hs) + (max(hs[0] // 5, 2) + (1 if tall_bubble else 0)) * (len(refined) - 1) if hs else 0
|
|
return refined, total
|
|
|
|
|
|
def best_fit_font(draw, text, font_candidates, safe_w, safe_h, tall_bubble=False):
|
|
for size in range(MAX_FONT_SIZE, MIN_FONT_SIZE - 1, -FONT_SIZE_STEP):
|
|
font, path_used = load_font_from_candidates(font_candidates, size)
|
|
lines, total_h = wrap_text_ellipse_aware(draw, text, font, safe_w, safe_h, tall_bubble=tall_bubble)
|
|
|
|
max_lw = 0
|
|
for ln in lines:
|
|
lw, _ = measure_text(draw, ln, font)
|
|
max_lw = max(max_lw, lw)
|
|
|
|
if total_h <= safe_h and max_lw <= safe_w:
|
|
return font, lines, total_h, path_used
|
|
|
|
font, path_used = load_font_from_candidates(font_candidates, MIN_FONT_SIZE)
|
|
lines, total_h = wrap_text_ellipse_aware(draw, text, font, safe_w, safe_h, tall_bubble=tall_bubble)
|
|
return font, lines, total_h, path_used
|
|
|
|
|
|
def draw_text_with_stroke(draw, pos, text, font, fill, stroke_fill):
|
|
x, y = pos
|
|
_, h = measure_text(draw, text, font)
|
|
stroke_width = 2 if h <= 11 else 1
|
|
|
|
for dx in range(-stroke_width, stroke_width + 1):
|
|
for dy in range(-stroke_width, stroke_width + 1):
|
|
if dx == 0 and dy == 0:
|
|
continue
|
|
draw.text((x + dx, y + dy), text, font=font, fill=stroke_fill)
|
|
|
|
draw.text((x, y), text, font=font, fill=fill)
|
|
|
|
|
|
# ─────────────────────────────────────────────
|
|
# CLEAN MASK BUILDERS
|
|
# ─────────────────────────────────────────────
|
|
def draw_rounded_rect_mask(mask, x1, y1, x2, y2, radius, color=255):
|
|
x1, y1, x2, y2 = int(x1), int(y1), int(x2), int(y2)
|
|
if x2 <= x1 or y2 <= y1:
|
|
return mask
|
|
|
|
r = int(max(1, min(radius, (x2 - x1) // 2, (y2 - y1) // 2)))
|
|
|
|
cv2.rectangle(mask, (x1 + r, y1), (x2 - r, y2), color, -1)
|
|
cv2.rectangle(mask, (x1, y1 + r), (x2, y2 - r), color, -1)
|
|
|
|
cv2.circle(mask, (x1 + r, y1 + r), r, color, -1)
|
|
cv2.circle(mask, (x2 - r, y1 + r), r, color, -1)
|
|
cv2.circle(mask, (x1 + r, y2 - r), r, color, -1)
|
|
cv2.circle(mask, (x2 - r, y2 - r), r, color, -1)
|
|
return mask
|
|
|
|
|
|
def build_hybrid_clean_mask(img_h, img_w, cx, cy, sa_fill, sb_fill, angle, safe_w, safe_h, panel):
|
|
px1, py1, px2, py2 = panel
|
|
mask = np.zeros((img_h, img_w), dtype=np.uint8)
|
|
|
|
rw = max(8, int(safe_w * CLEAN_MASK_RECT_SCALE_W))
|
|
rh = max(8, int(safe_h * CLEAN_MASK_RECT_SCALE_H))
|
|
x1 = int(cx - rw / 2)
|
|
y1 = int(cy - rh / 2)
|
|
x2 = int(cx + rw / 2)
|
|
y2 = int(cy + rh / 2)
|
|
rr = int(min(rw, rh) * 0.22)
|
|
|
|
draw_rounded_rect_mask(mask, x1, y1, x2, y2, rr, color=255)
|
|
|
|
e_sa = max(3, int(sa_fill * CLEAN_MASK_ELLIPSE_SCALE))
|
|
e_sb = max(3, int(sb_fill * CLEAN_MASK_ELLIPSE_SCALE))
|
|
cv2.ellipse(mask, (int(round(cx)), int(round(cy))), (e_sa, e_sb), angle, 0, 360, 255, -1)
|
|
|
|
kernel = np.ones((3, 3), np.uint8)
|
|
mask = cv2.morphologyEx(mask, cv2.MORPH_CLOSE, kernel, iterations=1)
|
|
|
|
clip = np.zeros_like(mask)
|
|
clip[py1:py2, px1:px2] = 255
|
|
mask = cv2.bitwise_and(mask, clip)
|
|
|
|
if CLEAN_MASK_BLUR > 0:
|
|
mask = cv2.GaussianBlur(mask, (0, 0), CLEAN_MASK_BLUR)
|
|
_, mask = cv2.threshold(mask, 127, 255, cv2.THRESH_BINARY)
|
|
|
|
return mask
|
|
|
|
|
|
def build_ocr_union_clean_mask(img_h, img_w, bubble_data, panel):
|
|
"""
|
|
Build precise text cleanup mask from OCR quad bounding boxes.
|
|
"""
|
|
px1, py1, px2, py2 = panel
|
|
quad_boxes = bubble_data.get("quad_bboxes", [])
|
|
mask = np.zeros((img_h, img_w), dtype=np.uint8)
|
|
|
|
if not quad_boxes:
|
|
return mask
|
|
|
|
for qb in quad_boxes:
|
|
x = int(qb.get("x", 0))
|
|
y = int(qb.get("y", 0))
|
|
w = int(qb.get("w", 0))
|
|
h = int(qb.get("h", 0))
|
|
|
|
if w <= 0 or h <= 0:
|
|
continue
|
|
|
|
if w < OCR_CLEAN_MIN_W:
|
|
extra = (OCR_CLEAN_MIN_W - w) // 2
|
|
x -= extra
|
|
w += 2 * extra
|
|
|
|
if h < OCR_CLEAN_MIN_H:
|
|
extra = (OCR_CLEAN_MIN_H - h) // 2
|
|
y -= extra
|
|
h += 2 * extra
|
|
|
|
x1 = max(px1, x - OCR_CLEAN_PAD_X)
|
|
y1 = max(py1, y - OCR_CLEAN_PAD_Y)
|
|
x2 = min(px2, x + w + OCR_CLEAN_PAD_X)
|
|
y2 = min(py2, y + h + OCR_CLEAN_PAD_Y)
|
|
|
|
if x2 > x1 and y2 > y1:
|
|
cv2.rectangle(mask, (x1, y1), (x2, y2), 255, -1)
|
|
|
|
# Merge nearby fragments
|
|
ksize = max(3, int(OCR_CLEAN_CLOSE_KERNEL) | 1) # ensure odd and >=3
|
|
k = np.ones((ksize, ksize), np.uint8)
|
|
mask = cv2.morphologyEx(mask, cv2.MORPH_CLOSE, k, iterations=1)
|
|
|
|
if OCR_CLEAN_DILATE > 0:
|
|
mask = cv2.dilate(mask, np.ones((3, 3), np.uint8), iterations=OCR_CLEAN_DILATE)
|
|
|
|
# Clip to panel bounds
|
|
clip = np.zeros_like(mask)
|
|
clip[py1:py2, px1:px2] = 255
|
|
mask = cv2.bitwise_and(mask, clip)
|
|
|
|
return mask
|
|
|
|
|
|
# ─────────────────────────────────────────────
|
|
# PANEL DETECTION
|
|
# ─────────────────────────────────────────────
|
|
def merge_nested_panels(panels):
|
|
if len(panels) <= 1:
|
|
return panels
|
|
|
|
panels_sorted = sorted(panels, key=lambda p: (p[2] - p[0]) * (p[3] - p[1]), reverse=True)
|
|
keep = []
|
|
|
|
for panel in panels_sorted:
|
|
px1, py1, px2, py2 = panel
|
|
p_area = (px2 - px1) * (py2 - py1)
|
|
dominated = False
|
|
|
|
for kept in keep:
|
|
kx1, ky1, kx2, ky2 = kept
|
|
ix1 = max(px1, kx1)
|
|
iy1 = max(py1, ky1)
|
|
ix2 = min(px2, kx2)
|
|
iy2 = min(py2, ky2)
|
|
if ix2 > ix1 and iy2 > iy1:
|
|
inter = (ix2 - ix1) * (iy2 - iy1)
|
|
if inter / max(1, p_area) > 0.80:
|
|
dominated = True
|
|
break
|
|
|
|
if not dominated:
|
|
keep.append(panel)
|
|
|
|
return keep
|
|
|
|
|
|
def split_panels_on_internal_borders(panels, v_lines, img_w, img_h):
|
|
result = []
|
|
for (px1, py1, px2, py2) in panels:
|
|
pw = px2 - px1
|
|
if pw < img_w * 0.30:
|
|
result.append((px1, py1, px2, py2))
|
|
continue
|
|
|
|
margin = int(pw * 0.20)
|
|
search_x1 = px1 + margin
|
|
search_x2 = px2 - margin
|
|
if search_x2 <= search_x1:
|
|
result.append((px1, py1, px2, py2))
|
|
continue
|
|
|
|
panel_vlines = v_lines[py1:py2, search_x1:search_x2]
|
|
col_sums = panel_vlines.sum(axis=0)
|
|
|
|
panel_h = py2 - py1
|
|
threshold = panel_h * 255 * 0.40
|
|
split_cols = np.where(col_sums > threshold)[0]
|
|
|
|
if len(split_cols) == 0:
|
|
result.append((px1, py1, px2, py2))
|
|
continue
|
|
|
|
split_x = int(np.median(split_cols)) + search_x1
|
|
left_w = split_x - px1
|
|
right_w = px2 - split_x
|
|
|
|
if left_w > img_w * 0.10 and right_w > img_w * 0.10:
|
|
result.append((px1, py1, split_x, py2))
|
|
result.append((split_x, py1, px2, py2))
|
|
else:
|
|
result.append((px1, py1, px2, py2))
|
|
|
|
return result
|
|
|
|
|
|
def detect_panels(img_bgr):
|
|
img_h, img_w = img_bgr.shape[:2]
|
|
total_area = img_h * img_w
|
|
min_area = total_area * MIN_PANEL_AREA_RATIO
|
|
|
|
gray = cv2.cvtColor(img_bgr, cv2.COLOR_BGR2GRAY)
|
|
_, dark_mask = cv2.threshold(gray, 80, 255, cv2.THRESH_BINARY_INV)
|
|
|
|
h_len = max(40, img_w // 25)
|
|
h_kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (h_len, 1))
|
|
h_lines = cv2.morphologyEx(dark_mask, cv2.MORPH_OPEN, h_kernel)
|
|
|
|
v_len = max(40, img_h // 25)
|
|
v_kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (1, v_len))
|
|
v_lines = cv2.morphologyEx(dark_mask, cv2.MORPH_OPEN, v_kernel)
|
|
|
|
borders = cv2.bitwise_or(h_lines, v_lines)
|
|
borders = cv2.dilate(borders, np.ones((5, 5), np.uint8), iterations=2)
|
|
|
|
panel_interior = cv2.bitwise_not(borders)
|
|
num_labels, labels, stats, centroids = cv2.connectedComponentsWithStats(panel_interior, connectivity=8)
|
|
|
|
panels = []
|
|
for label_id in range(1, num_labels):
|
|
area = stats[label_id, cv2.CC_STAT_AREA]
|
|
if area < min_area:
|
|
continue
|
|
|
|
x = stats[label_id, cv2.CC_STAT_LEFT]
|
|
y = stats[label_id, cv2.CC_STAT_TOP]
|
|
w = stats[label_id, cv2.CC_STAT_WIDTH]
|
|
h = stats[label_id, cv2.CC_STAT_HEIGHT]
|
|
x2 = x + w
|
|
y2 = y + h
|
|
|
|
if w * h > total_area * 0.90:
|
|
continue
|
|
|
|
aspect = max(w, h) / max(min(w, h), 1)
|
|
if aspect > 15:
|
|
continue
|
|
|
|
if w < img_w * 0.05 or h < img_h * 0.05:
|
|
continue
|
|
|
|
panels.append((x, y, x2, y2))
|
|
|
|
panels = merge_nested_panels(panels)
|
|
panels = split_panels_on_internal_borders(panels, v_lines, img_w, img_h)
|
|
panels.sort(key=lambda p: (p[1] // 100, p[0]))
|
|
|
|
if not panels:
|
|
panels = [(0, 0, img_w, img_h)]
|
|
|
|
return panels
|
|
|
|
|
|
# ─────────────────────────────────────────────
|
|
# BUBBLE GEOMETRY
|
|
# ─────────────────────────────────────────────
|
|
def assign_panel(bubble_data, panels, img_w, img_h):
|
|
bx, bw = bubble_data["x"], bubble_data["w"]
|
|
by, bh = bubble_data["y"], bubble_data["h"]
|
|
bcx, bcy = bx + bw / 2.0, by + bh / 2.0
|
|
|
|
best_panel, best_overlap = None, 0
|
|
for (px1, py1, px2, py2) in panels:
|
|
ix1 = max(bx, px1)
|
|
iy1 = max(by, py1)
|
|
ix2 = min(bx + bw, px2)
|
|
iy2 = min(by + bh, py2)
|
|
if ix2 > ix1 and iy2 > iy1:
|
|
overlap = (ix2 - ix1) * (iy2 - iy1)
|
|
if overlap > best_overlap:
|
|
best_overlap = overlap
|
|
best_panel = (px1, py1, px2, py2)
|
|
|
|
if best_panel is None:
|
|
for (px1, py1, px2, py2) in panels:
|
|
if px1 <= bcx <= px2 and py1 <= bcy <= py2:
|
|
return (px1, py1, px2, py2)
|
|
return (0, 0, img_w, img_h)
|
|
|
|
return best_panel
|
|
|
|
|
|
def detect_bubble_ellipse(img_bgr, bubble_data, panel):
|
|
x, w = bubble_data["x"], bubble_data["w"]
|
|
y, h = bubble_data["y"], bubble_data["h"]
|
|
|
|
img_h, img_w = img_bgr.shape[:2]
|
|
px1, py1, px2, py2 = panel
|
|
|
|
seed_x = max(1, min(img_w - 2, int(x + w / 2.0)))
|
|
seed_y = max(1, min(img_h - 2, int(y + h / 2.0)))
|
|
|
|
gray = cv2.cvtColor(img_bgr, cv2.COLOR_BGR2GRAY)
|
|
_, binary = cv2.threshold(gray, 200, 255, cv2.THRESH_BINARY)
|
|
|
|
panel_mask = np.zeros_like(binary)
|
|
panel_mask[py1:py2, px1:px2] = binary[py1:py2, px1:px2]
|
|
|
|
if gray[seed_y, seed_x] < 150:
|
|
found = False
|
|
for r in range(1, max(2, min(w, h) // 3)):
|
|
for dy in range(-r, r + 1):
|
|
for dx in range(-r, r + 1):
|
|
nx, ny = seed_x + dx, seed_y + dy
|
|
if px1 <= nx < px2 and py1 <= ny < py2 and gray[ny, nx] >= 200:
|
|
seed_x, seed_y = nx, ny
|
|
found = True
|
|
break
|
|
if found:
|
|
break
|
|
if found:
|
|
break
|
|
if not found:
|
|
return None
|
|
|
|
flood_mask = np.zeros((img_h + 2, img_w + 2), dtype=np.uint8)
|
|
flood_fill_img = panel_mask.copy()
|
|
cv2.floodFill(
|
|
flood_fill_img,
|
|
flood_mask,
|
|
(seed_x, seed_y),
|
|
255,
|
|
loDiff=FLOOD_TOLERANCE,
|
|
upDiff=FLOOD_TOLERANCE,
|
|
flags=cv2.FLOODFILL_FIXED_RANGE
|
|
)
|
|
|
|
filled_region = flood_mask[1:-1, 1:-1] * 255
|
|
filled_region = cv2.morphologyEx(filled_region, cv2.MORPH_CLOSE, np.ones((3, 3), np.uint8), iterations=1)
|
|
|
|
contours, _ = cv2.findContours(filled_region, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
|
|
if not contours:
|
|
return None
|
|
|
|
bubble_contour = max(contours, key=cv2.contourArea)
|
|
if len(bubble_contour) < 5:
|
|
return None
|
|
if cv2.contourArea(bubble_contour) < 100:
|
|
return None
|
|
|
|
(ecx, ecy), (ew, eh), angle = cv2.fitEllipse(bubble_contour)
|
|
return float(ecx), float(ecy), float(ew / 2), float(eh / 2), float(angle)
|
|
|
|
|
|
def clip_ellipse_to_panel(cx, cy, sa, sb, angle, panel, shrink=BORDER_SHRINK_PX):
|
|
px1, py1, px2, py2 = panel
|
|
inner_x1, inner_y1 = px1 + shrink, py1 + shrink
|
|
inner_x2, inner_y2 = px2 - shrink, py2 - shrink
|
|
|
|
sa_s = max(sa - shrink, 1.0)
|
|
sb_s = max(sb - shrink, 1.0)
|
|
|
|
for _ in range(3):
|
|
rad = math.radians(angle)
|
|
hw = math.sqrt((sa_s * math.cos(rad))**2 + (sb_s * math.sin(rad))**2)
|
|
hh = math.sqrt((sa_s * math.sin(rad))**2 + (sb_s * math.cos(rad))**2)
|
|
|
|
ovf_l = max(0, inner_x1 - (cx - hw))
|
|
ovf_r = max(0, (cx + hw) - inner_x2)
|
|
ovf_t = max(0, inner_y1 - (cy - hh))
|
|
ovf_b = max(0, (cy + hh) - inner_y2)
|
|
|
|
if max(ovf_l, ovf_r, ovf_t, ovf_b) == 0:
|
|
break
|
|
|
|
max_nx = sa_s * MAX_NUDGE_RATIO
|
|
max_ny = sb_s * MAX_NUDGE_RATIO
|
|
cx += min(ovf_l, max_nx) - min(ovf_r, max_nx)
|
|
cy += min(ovf_t, max_ny) - min(ovf_b, max_ny)
|
|
|
|
rad = math.radians(angle)
|
|
hw = math.sqrt((sa_s * math.cos(rad))**2 + (sb_s * math.sin(rad))**2)
|
|
hh = math.sqrt((sa_s * math.sin(rad))**2 + (sb_s * math.cos(rad))**2)
|
|
|
|
ovf_l = max(0, inner_x1 - (cx - hw))
|
|
ovf_r = max(0, (cx + hw) - inner_x2)
|
|
ovf_t = max(0, inner_y1 - (cy - hh))
|
|
ovf_b = max(0, (cy + hh) - inner_y2)
|
|
max_ovf = max(ovf_l, ovf_r, ovf_t, ovf_b)
|
|
|
|
if max_ovf > 0:
|
|
sa_s = max(sa_s - max_ovf, 1.0)
|
|
sb_s = max(sb_s - max_ovf, 1.0)
|
|
|
|
return cx, cy, sa_s, sb_s
|
|
|
|
|
|
def get_render_ellipse(img_bgr, bubble_data, panel):
|
|
x, w = bubble_data["x"], bubble_data["w"]
|
|
y, h = bubble_data["y"], bubble_data["h"]
|
|
|
|
detected = detect_bubble_ellipse(img_bgr, bubble_data, panel)
|
|
if detected is not None:
|
|
ecx, ecy, sa, sb, angle = detected
|
|
ecx, ecy, sa_fill, sb_fill = clip_ellipse_to_panel(ecx, ecy, sa, sb, angle, panel)
|
|
safe_w = sa_fill * math.sqrt(2) * TEXT_RATIO
|
|
safe_h = sb_fill * math.sqrt(2) * TEXT_RATIO
|
|
return (ecx, ecy, sa_fill, sb_fill, angle, safe_w, safe_h, "detected")
|
|
else:
|
|
cx, cy = x + w / 2.0, y + h / 2.0
|
|
sa, sb = w / 2.0, h / 2.0
|
|
cx, cy, sa_fill, sb_fill = clip_ellipse_to_panel(cx, cy, sa, sb, 0.0, panel)
|
|
safe_w = sa_fill * math.sqrt(2) * TEXT_RATIO
|
|
safe_h = sb_fill * math.sqrt(2) * TEXT_RATIO
|
|
return (cx, cy, sa_fill, sb_fill, 0.0, safe_w, safe_h, "fallback")
|
|
|
|
|
|
# ─────────────────────────────────────────────
|
|
# DRAW ONE BUBBLE
|
|
# ─────────────────────────────────────────────
|
|
def draw_bubble(
|
|
pil_img,
|
|
img_bgr,
|
|
bubble_data,
|
|
original_text,
|
|
translated_text,
|
|
flags,
|
|
font_candidates,
|
|
font_color,
|
|
stroke_color,
|
|
panel
|
|
):
|
|
# skip unchanged SFX
|
|
if original_text and translated_text:
|
|
if normalize_text(original_text) == normalize_text(translated_text) and is_sfx_like(original_text):
|
|
return "skip_sfx", "NO_FONT"
|
|
|
|
(cx, cy, sa_fill, sb_fill, angle, safe_w, safe_h, method) = get_render_ellipse(img_bgr, bubble_data, panel)
|
|
|
|
cx_i, cy_i = int(round(cx)), int(round(cy))
|
|
img_h, img_w = img_bgr.shape[:2]
|
|
|
|
# choose cleaning mask
|
|
if CLEAN_MODE == "ocr_union":
|
|
mask = build_ocr_union_clean_mask(img_h, img_w, bubble_data, panel)
|
|
# robust fallback
|
|
if mask is None or int(mask.sum()) == 0:
|
|
mask = build_hybrid_clean_mask(
|
|
img_h=img_h, img_w=img_w,
|
|
cx=cx, cy=cy,
|
|
sa_fill=sa_fill, sb_fill=sb_fill, angle=angle,
|
|
safe_w=safe_w, safe_h=safe_h,
|
|
panel=panel
|
|
)
|
|
elif CLEAN_MODE == "hybrid":
|
|
mask = build_hybrid_clean_mask(
|
|
img_h=img_h, img_w=img_w,
|
|
cx=cx, cy=cy,
|
|
sa_fill=sa_fill, sb_fill=sb_fill, angle=angle,
|
|
safe_w=safe_w, safe_h=safe_h,
|
|
panel=panel
|
|
)
|
|
else: # ellipse
|
|
mask = np.zeros((img_h, img_w), dtype=np.uint8)
|
|
cv2.ellipse(mask, (cx_i, cy_i), (int(math.ceil(sa_fill)), int(math.ceil(sb_fill))), angle, 0, 360, 255, -1)
|
|
|
|
# paint white over mask
|
|
img_np = np.array(pil_img)
|
|
img_np[mask == 255] = [255, 255, 255]
|
|
pil_img.paste(Image.fromarray(img_np))
|
|
|
|
if not translated_text:
|
|
return method, "NO_FONT"
|
|
|
|
draw = ImageDraw.Draw(pil_img)
|
|
|
|
# Center text in the cleaned region bbox (red-box style target)
|
|
ys, xs = np.where(mask > 0)
|
|
if len(xs) > 0 and len(ys) > 0:
|
|
mx1, my1, mx2, my2 = xs.min(), ys.min(), xs.max(), ys.max()
|
|
text_cx = int((mx1 + mx2) / 2)
|
|
text_cy = int((my1 + my2) / 2)
|
|
sw = max(20, int((mx2 - mx1) * 0.92))
|
|
sh = max(20, int((my2 - my1) * 0.92))
|
|
else:
|
|
text_cx, text_cy = cx_i, cy_i
|
|
sw, sh = max(int(safe_w), 1), max(int(safe_h), 1)
|
|
|
|
bw = max(1, bubble_data.get("w", 1))
|
|
bh = max(1, bubble_data.get("h", 1))
|
|
tall_bubble = (bh / bw) > 1.25
|
|
|
|
font, lines, total_h, font_used = best_fit_font(
|
|
draw, translated_text, font_candidates, sw, sh, tall_bubble=tall_bubble
|
|
)
|
|
|
|
if not lines:
|
|
return method, font_used
|
|
|
|
y_cursor = int(round(text_cy - total_h / 2.0 - 0.02 * sh))
|
|
|
|
for line in lines:
|
|
lw, lh = measure_text(draw, line, font)
|
|
x = text_cx - lw // 2
|
|
draw_text_with_stroke(draw, (x, y_cursor), line, font, fill=font_color, stroke_fill=stroke_color)
|
|
y_cursor += lh + max(lh // 5, 2) + (1 if tall_bubble else 0)
|
|
|
|
return method, font_used
|
|
|
|
|
|
# ─────────────────────────────────────────────
|
|
# MAIN RENDER FUNCTION
|
|
# ─────────────────────────────────────────────
|
|
def render_translations(
|
|
input_image,
|
|
output_image,
|
|
translations_file,
|
|
bubbles_file,
|
|
font_candidates=DEFAULT_FONT_CANDIDATES,
|
|
font_color=DEFAULT_FONT_COLOR,
|
|
stroke_color=DEFAULT_STROKE_COLOR,
|
|
):
|
|
img_bgr = cv2.imread(input_image)
|
|
if img_bgr is None:
|
|
raise FileNotFoundError(f"Cannot load image: {input_image}")
|
|
|
|
img_h, img_w = img_bgr.shape[:2]
|
|
img_pil = Image.fromarray(cv2.cvtColor(img_bgr, cv2.COLOR_BGR2RGB))
|
|
|
|
translations, originals, flags_map = parse_translations(translations_file)
|
|
bubbles = parse_bubbles(bubbles_file)
|
|
panels = detect_panels(img_bgr)
|
|
|
|
rendered = 0
|
|
skipped = 0
|
|
|
|
def sort_key(item):
|
|
bid, _ = item
|
|
b = bubbles.get(bid, {})
|
|
return int(b.get("reading_order", bid))
|
|
|
|
for bubble_id, translated_text in sorted(translations.items(), key=sort_key):
|
|
if bubble_id not in bubbles:
|
|
skipped += 1
|
|
continue
|
|
|
|
bubble_data = bubbles[bubble_id]
|
|
panel = assign_panel(bubble_data, panels, img_w, img_h)
|
|
|
|
original_text = originals.get(bubble_id, "")
|
|
flags = flags_map.get(bubble_id, "-")
|
|
|
|
method, font_used = draw_bubble(
|
|
pil_img=img_pil,
|
|
img_bgr=img_bgr,
|
|
bubble_data=bubble_data,
|
|
original_text=original_text,
|
|
translated_text=translated_text,
|
|
flags=flags,
|
|
font_candidates=font_candidates,
|
|
font_color=font_color,
|
|
stroke_color=stroke_color,
|
|
panel=panel
|
|
)
|
|
|
|
if method == "skip_sfx":
|
|
skipped += 1
|
|
else:
|
|
rendered += 1
|
|
|
|
result_cv = cv2.cvtColor(np.array(img_pil), cv2.COLOR_RGB2BGR)
|
|
cv2.imwrite(output_image, result_cv)
|
|
|
|
print(f"✅ Done — {rendered} rendered, {skipped} skipped.")
|
|
print(f"📄 Output → {output_image}")
|
|
print(f"🧼 Clean mode: {CLEAN_MODE}")
|
|
|
|
|
|
# ─────────────────────────────────────────────
|
|
# ENTRY POINT
|
|
# ─────────────────────────────────────────────
|
|
if __name__ == "__main__":
|
|
render_translations(
|
|
input_image="001-page.png",
|
|
output_image="page_translated.png",
|
|
translations_file="output.txt",
|
|
bubbles_file="bubbles.json",
|
|
font_candidates=DEFAULT_FONT_CANDIDATES,
|
|
font_color=DEFAULT_FONT_COLOR,
|
|
stroke_color=DEFAULT_STROKE_COLOR,
|
|
)
|