""" manga-renderer.py ───────────────────────────────────────────────────────────────── Pipeline: 1. Detect panel boundaries 2. Assign bubble -> panel 3. Detect/fallback bubble ellipse 4. Clean original text region: - OCR union mask (default) - Hybrid mask fallback - Ellipse mode optional 5. Render translated text with ellipse-aware wrapping """ import os import math import json import re import cv2 import numpy as np from PIL import Image, ImageDraw, ImageFont # ───────────────────────────────────────────── # CONSTANTS # ───────────────────────────────────────────── DEFAULT_FONT_CANDIDATES = [ "fonts/AnimeAce2_reg.ttf", "fonts/WildWordsRoman.ttf", "fonts/ComicRelief-Regular.ttf", "fonts/NotoSans-Regular.ttf", ] DEFAULT_FONT_COLOR = (0, 0, 0) DEFAULT_STROKE_COLOR = (255, 255, 255) MAX_FONT_SIZE = 20 MIN_FONT_SIZE = 6 FONT_SIZE_STEP = 1 TEXT_RATIO = 0.76 FLOOD_TOLERANCE = 30 BORDER_SHRINK_PX = 4 MIN_PANEL_AREA_RATIO = 0.02 MAX_NUDGE_RATIO = 0.30 # Cleaning mode: # "ocr_union" -> precise cleanup from OCR quad boxes (recommended) # "hybrid" -> rounded-rect + inner ellipse # "ellipse" -> legacy large ellipse fill CLEAN_MODE = "ocr_union" # OCR-union cleaning tuning OCR_CLEAN_PAD_X = 12 OCR_CLEAN_PAD_Y = 10 OCR_CLEAN_MIN_W = 24 OCR_CLEAN_MIN_H = 24 OCR_CLEAN_CLOSE_KERNEL = 5 OCR_CLEAN_DILATE = 1 # Hybrid cleanup mask tuning CLEAN_MASK_RECT_SCALE_W = 1.08 CLEAN_MASK_RECT_SCALE_H = 1.20 CLEAN_MASK_ELLIPSE_SCALE = 0.84 CLEAN_MASK_BLUR = 0 # ───────────────────────────────────────────── # PARSERS # ───────────────────────────────────────────── def parse_translations(translations_file): translations = {} originals = {} flags_map = {} with open(translations_file, "r", encoding="utf-8") as f: for line in f: line = line.strip() if not line.startswith("#"): continue parts = line.split("|") # Format: # #ID|ORDER|ORIGINAL|TRANSLATED|FLAGS # backward-compatible with older variants try: bubble_id = int(parts[0].lstrip("#")) except Exception: continue if len(parts) >= 5: original = parts[2].strip() translated = parts[3].strip() flags = parts[4].strip() elif len(parts) >= 4: original = parts[2].strip() translated = parts[3].strip() flags = "-" elif len(parts) >= 3: original = "" translated = parts[2].strip() flags = "-" else: continue if translated.startswith("["): continue translations[bubble_id] = translated originals[bubble_id] = original flags_map[bubble_id] = flags return translations, originals, flags_map def parse_bubbles(bubbles_file): with open(bubbles_file, "r", encoding="utf-8") as f: raw = json.load(f) return {int(k): v for k, v in raw.items()} # ───────────────────────────────────────────── # HELPERS # ───────────────────────────────────────────── def normalize_text(s): t = s.upper().strip() t = re.sub(r"[^\w]+", "", t) return t def is_sfx_like(text): t = normalize_text(text) if len(t) <= 8 and re.fullmatch(r"(SHA+|BIP+|BEEP+|HN+|AH+|OH+)", t): return True return False # ───────────────────────────────────────────── # FONT HELPERS # ───────────────────────────────────────────── def load_font_from_candidates(candidates, size): for path in candidates: if path and os.path.exists(path): try: return ImageFont.truetype(path, size), path except Exception: continue return ImageFont.load_default(), "PIL_DEFAULT" def measure_text(draw, text, font): bbox = draw.textbbox((0, 0), text, font=font) return bbox[2] - bbox[0], bbox[3] - bbox[1] def ellipse_line_max_width(y_offset, a, b): if b <= 0: return 0 t = 1.0 - (y_offset * y_offset) / (b * b) t = max(0.0, t) return 2.0 * a * math.sqrt(t) def wrap_text(draw, text, font, max_width): words = text.split() lines = [] current = "" for word in words: test = (current + " " + word).strip() w, _ = measure_text(draw, test, font) if w <= max_width or not current: current = test else: lines.append(current) current = word if current: lines.append(current) if not lines: return [""], 0, 0 widths, heights = [], [] for ln in lines: w, h = measure_text(draw, ln, font) widths.append(w) heights.append(h) line_gap = max(heights[0] // 5, 2) if heights else 2 total_h = sum(heights) + line_gap * (len(lines) - 1) return lines, total_h, max(widths) if widths else 0 def wrap_text_ellipse_aware(draw, text, font, safe_w, safe_h, tall_bubble=False): target_w = safe_w * (0.85 if tall_bubble else 1.0) lines, total_h, _ = wrap_text(draw, text, font, target_w) if not lines: return lines, total_h heights = [] for ln in lines: _, h = measure_text(draw, ln, font) heights.append(h) line_gap = max(heights[0] // 5, 2) if heights else 2 if tall_bubble: line_gap += 1 block_h = sum(heights) + line_gap * (len(lines) - 1) if block_h > safe_h: return lines, block_h a = target_w / 2.0 b = safe_h / 2.0 words = text.split() refined = [] cursor_y = -block_h / 2.0 current = "" idx_h = 0 for word in words: h_line = heights[min(idx_h, len(heights) - 1)] if heights else 12 y_mid = cursor_y + h_line / 2.0 row_max = ellipse_line_max_width(y_mid, a, b) * 0.95 row_max = max(20, row_max) candidate = (current + " " + word).strip() w, _ = measure_text(draw, candidate, font) if (w <= row_max) or (not current): current = candidate else: refined.append(current) cursor_y += h_line + line_gap idx_h += 1 current = word if current: refined.append(current) hs = [] for ln in refined: _, h = measure_text(draw, ln, font) hs.append(h) total = sum(hs) + (max(hs[0] // 5, 2) + (1 if tall_bubble else 0)) * (len(refined) - 1) if hs else 0 return refined, total def best_fit_font(draw, text, font_candidates, safe_w, safe_h, tall_bubble=False): for size in range(MAX_FONT_SIZE, MIN_FONT_SIZE - 1, -FONT_SIZE_STEP): font, path_used = load_font_from_candidates(font_candidates, size) lines, total_h = wrap_text_ellipse_aware(draw, text, font, safe_w, safe_h, tall_bubble=tall_bubble) max_lw = 0 for ln in lines: lw, _ = measure_text(draw, ln, font) max_lw = max(max_lw, lw) if total_h <= safe_h and max_lw <= safe_w: return font, lines, total_h, path_used font, path_used = load_font_from_candidates(font_candidates, MIN_FONT_SIZE) lines, total_h = wrap_text_ellipse_aware(draw, text, font, safe_w, safe_h, tall_bubble=tall_bubble) return font, lines, total_h, path_used def draw_text_with_stroke(draw, pos, text, font, fill, stroke_fill): x, y = pos _, h = measure_text(draw, text, font) stroke_width = 2 if h <= 11 else 1 for dx in range(-stroke_width, stroke_width + 1): for dy in range(-stroke_width, stroke_width + 1): if dx == 0 and dy == 0: continue draw.text((x + dx, y + dy), text, font=font, fill=stroke_fill) draw.text((x, y), text, font=font, fill=fill) # ───────────────────────────────────────────── # CLEAN MASK BUILDERS # ───────────────────────────────────────────── def draw_rounded_rect_mask(mask, x1, y1, x2, y2, radius, color=255): x1, y1, x2, y2 = int(x1), int(y1), int(x2), int(y2) if x2 <= x1 or y2 <= y1: return mask r = int(max(1, min(radius, (x2 - x1) // 2, (y2 - y1) // 2))) cv2.rectangle(mask, (x1 + r, y1), (x2 - r, y2), color, -1) cv2.rectangle(mask, (x1, y1 + r), (x2, y2 - r), color, -1) cv2.circle(mask, (x1 + r, y1 + r), r, color, -1) cv2.circle(mask, (x2 - r, y1 + r), r, color, -1) cv2.circle(mask, (x1 + r, y2 - r), r, color, -1) cv2.circle(mask, (x2 - r, y2 - r), r, color, -1) return mask def build_hybrid_clean_mask(img_h, img_w, cx, cy, sa_fill, sb_fill, angle, safe_w, safe_h, panel): px1, py1, px2, py2 = panel mask = np.zeros((img_h, img_w), dtype=np.uint8) rw = max(8, int(safe_w * CLEAN_MASK_RECT_SCALE_W)) rh = max(8, int(safe_h * CLEAN_MASK_RECT_SCALE_H)) x1 = int(cx - rw / 2) y1 = int(cy - rh / 2) x2 = int(cx + rw / 2) y2 = int(cy + rh / 2) rr = int(min(rw, rh) * 0.22) draw_rounded_rect_mask(mask, x1, y1, x2, y2, rr, color=255) e_sa = max(3, int(sa_fill * CLEAN_MASK_ELLIPSE_SCALE)) e_sb = max(3, int(sb_fill * CLEAN_MASK_ELLIPSE_SCALE)) cv2.ellipse(mask, (int(round(cx)), int(round(cy))), (e_sa, e_sb), angle, 0, 360, 255, -1) kernel = np.ones((3, 3), np.uint8) mask = cv2.morphologyEx(mask, cv2.MORPH_CLOSE, kernel, iterations=1) clip = np.zeros_like(mask) clip[py1:py2, px1:px2] = 255 mask = cv2.bitwise_and(mask, clip) if CLEAN_MASK_BLUR > 0: mask = cv2.GaussianBlur(mask, (0, 0), CLEAN_MASK_BLUR) _, mask = cv2.threshold(mask, 127, 255, cv2.THRESH_BINARY) return mask def build_ocr_union_clean_mask(img_h, img_w, bubble_data, panel): """ Build precise text cleanup mask from OCR quad bounding boxes. """ px1, py1, px2, py2 = panel quad_boxes = bubble_data.get("quad_bboxes", []) mask = np.zeros((img_h, img_w), dtype=np.uint8) if not quad_boxes: return mask for qb in quad_boxes: x = int(qb.get("x", 0)) y = int(qb.get("y", 0)) w = int(qb.get("w", 0)) h = int(qb.get("h", 0)) if w <= 0 or h <= 0: continue if w < OCR_CLEAN_MIN_W: extra = (OCR_CLEAN_MIN_W - w) // 2 x -= extra w += 2 * extra if h < OCR_CLEAN_MIN_H: extra = (OCR_CLEAN_MIN_H - h) // 2 y -= extra h += 2 * extra x1 = max(px1, x - OCR_CLEAN_PAD_X) y1 = max(py1, y - OCR_CLEAN_PAD_Y) x2 = min(px2, x + w + OCR_CLEAN_PAD_X) y2 = min(py2, y + h + OCR_CLEAN_PAD_Y) if x2 > x1 and y2 > y1: cv2.rectangle(mask, (x1, y1), (x2, y2), 255, -1) # Merge nearby fragments ksize = max(3, int(OCR_CLEAN_CLOSE_KERNEL) | 1) # ensure odd and >=3 k = np.ones((ksize, ksize), np.uint8) mask = cv2.morphologyEx(mask, cv2.MORPH_CLOSE, k, iterations=1) if OCR_CLEAN_DILATE > 0: mask = cv2.dilate(mask, np.ones((3, 3), np.uint8), iterations=OCR_CLEAN_DILATE) # Clip to panel bounds clip = np.zeros_like(mask) clip[py1:py2, px1:px2] = 255 mask = cv2.bitwise_and(mask, clip) return mask # ───────────────────────────────────────────── # PANEL DETECTION # ───────────────────────────────────────────── def merge_nested_panels(panels): if len(panels) <= 1: return panels panels_sorted = sorted(panels, key=lambda p: (p[2] - p[0]) * (p[3] - p[1]), reverse=True) keep = [] for panel in panels_sorted: px1, py1, px2, py2 = panel p_area = (px2 - px1) * (py2 - py1) dominated = False for kept in keep: kx1, ky1, kx2, ky2 = kept ix1 = max(px1, kx1) iy1 = max(py1, ky1) ix2 = min(px2, kx2) iy2 = min(py2, ky2) if ix2 > ix1 and iy2 > iy1: inter = (ix2 - ix1) * (iy2 - iy1) if inter / max(1, p_area) > 0.80: dominated = True break if not dominated: keep.append(panel) return keep def split_panels_on_internal_borders(panels, v_lines, img_w, img_h): result = [] for (px1, py1, px2, py2) in panels: pw = px2 - px1 if pw < img_w * 0.30: result.append((px1, py1, px2, py2)) continue margin = int(pw * 0.20) search_x1 = px1 + margin search_x2 = px2 - margin if search_x2 <= search_x1: result.append((px1, py1, px2, py2)) continue panel_vlines = v_lines[py1:py2, search_x1:search_x2] col_sums = panel_vlines.sum(axis=0) panel_h = py2 - py1 threshold = panel_h * 255 * 0.40 split_cols = np.where(col_sums > threshold)[0] if len(split_cols) == 0: result.append((px1, py1, px2, py2)) continue split_x = int(np.median(split_cols)) + search_x1 left_w = split_x - px1 right_w = px2 - split_x if left_w > img_w * 0.10 and right_w > img_w * 0.10: result.append((px1, py1, split_x, py2)) result.append((split_x, py1, px2, py2)) else: result.append((px1, py1, px2, py2)) return result def detect_panels(img_bgr): img_h, img_w = img_bgr.shape[:2] total_area = img_h * img_w min_area = total_area * MIN_PANEL_AREA_RATIO gray = cv2.cvtColor(img_bgr, cv2.COLOR_BGR2GRAY) _, dark_mask = cv2.threshold(gray, 80, 255, cv2.THRESH_BINARY_INV) h_len = max(40, img_w // 25) h_kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (h_len, 1)) h_lines = cv2.morphologyEx(dark_mask, cv2.MORPH_OPEN, h_kernel) v_len = max(40, img_h // 25) v_kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (1, v_len)) v_lines = cv2.morphologyEx(dark_mask, cv2.MORPH_OPEN, v_kernel) borders = cv2.bitwise_or(h_lines, v_lines) borders = cv2.dilate(borders, np.ones((5, 5), np.uint8), iterations=2) panel_interior = cv2.bitwise_not(borders) num_labels, labels, stats, centroids = cv2.connectedComponentsWithStats(panel_interior, connectivity=8) panels = [] for label_id in range(1, num_labels): area = stats[label_id, cv2.CC_STAT_AREA] if area < min_area: continue x = stats[label_id, cv2.CC_STAT_LEFT] y = stats[label_id, cv2.CC_STAT_TOP] w = stats[label_id, cv2.CC_STAT_WIDTH] h = stats[label_id, cv2.CC_STAT_HEIGHT] x2 = x + w y2 = y + h if w * h > total_area * 0.90: continue aspect = max(w, h) / max(min(w, h), 1) if aspect > 15: continue if w < img_w * 0.05 or h < img_h * 0.05: continue panels.append((x, y, x2, y2)) panels = merge_nested_panels(panels) panels = split_panels_on_internal_borders(panels, v_lines, img_w, img_h) panels.sort(key=lambda p: (p[1] // 100, p[0])) if not panels: panels = [(0, 0, img_w, img_h)] return panels # ───────────────────────────────────────────── # BUBBLE GEOMETRY # ───────────────────────────────────────────── def assign_panel(bubble_data, panels, img_w, img_h): bx, bw = bubble_data["x"], bubble_data["w"] by, bh = bubble_data["y"], bubble_data["h"] bcx, bcy = bx + bw / 2.0, by + bh / 2.0 best_panel, best_overlap = None, 0 for (px1, py1, px2, py2) in panels: ix1 = max(bx, px1) iy1 = max(by, py1) ix2 = min(bx + bw, px2) iy2 = min(by + bh, py2) if ix2 > ix1 and iy2 > iy1: overlap = (ix2 - ix1) * (iy2 - iy1) if overlap > best_overlap: best_overlap = overlap best_panel = (px1, py1, px2, py2) if best_panel is None: for (px1, py1, px2, py2) in panels: if px1 <= bcx <= px2 and py1 <= bcy <= py2: return (px1, py1, px2, py2) return (0, 0, img_w, img_h) return best_panel def detect_bubble_ellipse(img_bgr, bubble_data, panel): x, w = bubble_data["x"], bubble_data["w"] y, h = bubble_data["y"], bubble_data["h"] img_h, img_w = img_bgr.shape[:2] px1, py1, px2, py2 = panel seed_x = max(1, min(img_w - 2, int(x + w / 2.0))) seed_y = max(1, min(img_h - 2, int(y + h / 2.0))) gray = cv2.cvtColor(img_bgr, cv2.COLOR_BGR2GRAY) _, binary = cv2.threshold(gray, 200, 255, cv2.THRESH_BINARY) panel_mask = np.zeros_like(binary) panel_mask[py1:py2, px1:px2] = binary[py1:py2, px1:px2] if gray[seed_y, seed_x] < 150: found = False for r in range(1, max(2, min(w, h) // 3)): for dy in range(-r, r + 1): for dx in range(-r, r + 1): nx, ny = seed_x + dx, seed_y + dy if px1 <= nx < px2 and py1 <= ny < py2 and gray[ny, nx] >= 200: seed_x, seed_y = nx, ny found = True break if found: break if found: break if not found: return None flood_mask = np.zeros((img_h + 2, img_w + 2), dtype=np.uint8) flood_fill_img = panel_mask.copy() cv2.floodFill( flood_fill_img, flood_mask, (seed_x, seed_y), 255, loDiff=FLOOD_TOLERANCE, upDiff=FLOOD_TOLERANCE, flags=cv2.FLOODFILL_FIXED_RANGE ) filled_region = flood_mask[1:-1, 1:-1] * 255 filled_region = cv2.morphologyEx(filled_region, cv2.MORPH_CLOSE, np.ones((3, 3), np.uint8), iterations=1) contours, _ = cv2.findContours(filled_region, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE) if not contours: return None bubble_contour = max(contours, key=cv2.contourArea) if len(bubble_contour) < 5: return None if cv2.contourArea(bubble_contour) < 100: return None (ecx, ecy), (ew, eh), angle = cv2.fitEllipse(bubble_contour) return float(ecx), float(ecy), float(ew / 2), float(eh / 2), float(angle) def clip_ellipse_to_panel(cx, cy, sa, sb, angle, panel, shrink=BORDER_SHRINK_PX): px1, py1, px2, py2 = panel inner_x1, inner_y1 = px1 + shrink, py1 + shrink inner_x2, inner_y2 = px2 - shrink, py2 - shrink sa_s = max(sa - shrink, 1.0) sb_s = max(sb - shrink, 1.0) for _ in range(3): rad = math.radians(angle) hw = math.sqrt((sa_s * math.cos(rad))**2 + (sb_s * math.sin(rad))**2) hh = math.sqrt((sa_s * math.sin(rad))**2 + (sb_s * math.cos(rad))**2) ovf_l = max(0, inner_x1 - (cx - hw)) ovf_r = max(0, (cx + hw) - inner_x2) ovf_t = max(0, inner_y1 - (cy - hh)) ovf_b = max(0, (cy + hh) - inner_y2) if max(ovf_l, ovf_r, ovf_t, ovf_b) == 0: break max_nx = sa_s * MAX_NUDGE_RATIO max_ny = sb_s * MAX_NUDGE_RATIO cx += min(ovf_l, max_nx) - min(ovf_r, max_nx) cy += min(ovf_t, max_ny) - min(ovf_b, max_ny) rad = math.radians(angle) hw = math.sqrt((sa_s * math.cos(rad))**2 + (sb_s * math.sin(rad))**2) hh = math.sqrt((sa_s * math.sin(rad))**2 + (sb_s * math.cos(rad))**2) ovf_l = max(0, inner_x1 - (cx - hw)) ovf_r = max(0, (cx + hw) - inner_x2) ovf_t = max(0, inner_y1 - (cy - hh)) ovf_b = max(0, (cy + hh) - inner_y2) max_ovf = max(ovf_l, ovf_r, ovf_t, ovf_b) if max_ovf > 0: sa_s = max(sa_s - max_ovf, 1.0) sb_s = max(sb_s - max_ovf, 1.0) return cx, cy, sa_s, sb_s def get_render_ellipse(img_bgr, bubble_data, panel): x, w = bubble_data["x"], bubble_data["w"] y, h = bubble_data["y"], bubble_data["h"] detected = detect_bubble_ellipse(img_bgr, bubble_data, panel) if detected is not None: ecx, ecy, sa, sb, angle = detected ecx, ecy, sa_fill, sb_fill = clip_ellipse_to_panel(ecx, ecy, sa, sb, angle, panel) safe_w = sa_fill * math.sqrt(2) * TEXT_RATIO safe_h = sb_fill * math.sqrt(2) * TEXT_RATIO return (ecx, ecy, sa_fill, sb_fill, angle, safe_w, safe_h, "detected") else: cx, cy = x + w / 2.0, y + h / 2.0 sa, sb = w / 2.0, h / 2.0 cx, cy, sa_fill, sb_fill = clip_ellipse_to_panel(cx, cy, sa, sb, 0.0, panel) safe_w = sa_fill * math.sqrt(2) * TEXT_RATIO safe_h = sb_fill * math.sqrt(2) * TEXT_RATIO return (cx, cy, sa_fill, sb_fill, 0.0, safe_w, safe_h, "fallback") # ───────────────────────────────────────────── # DRAW ONE BUBBLE # ───────────────────────────────────────────── def draw_bubble( pil_img, img_bgr, bubble_data, original_text, translated_text, flags, font_candidates, font_color, stroke_color, panel ): # skip unchanged SFX if original_text and translated_text: if normalize_text(original_text) == normalize_text(translated_text) and is_sfx_like(original_text): return "skip_sfx", "NO_FONT" (cx, cy, sa_fill, sb_fill, angle, safe_w, safe_h, method) = get_render_ellipse(img_bgr, bubble_data, panel) cx_i, cy_i = int(round(cx)), int(round(cy)) img_h, img_w = img_bgr.shape[:2] # choose cleaning mask if CLEAN_MODE == "ocr_union": mask = build_ocr_union_clean_mask(img_h, img_w, bubble_data, panel) # robust fallback if mask is None or int(mask.sum()) == 0: mask = build_hybrid_clean_mask( img_h=img_h, img_w=img_w, cx=cx, cy=cy, sa_fill=sa_fill, sb_fill=sb_fill, angle=angle, safe_w=safe_w, safe_h=safe_h, panel=panel ) elif CLEAN_MODE == "hybrid": mask = build_hybrid_clean_mask( img_h=img_h, img_w=img_w, cx=cx, cy=cy, sa_fill=sa_fill, sb_fill=sb_fill, angle=angle, safe_w=safe_w, safe_h=safe_h, panel=panel ) else: # ellipse mask = np.zeros((img_h, img_w), dtype=np.uint8) cv2.ellipse(mask, (cx_i, cy_i), (int(math.ceil(sa_fill)), int(math.ceil(sb_fill))), angle, 0, 360, 255, -1) # paint white over mask img_np = np.array(pil_img) img_np[mask == 255] = [255, 255, 255] pil_img.paste(Image.fromarray(img_np)) if not translated_text: return method, "NO_FONT" draw = ImageDraw.Draw(pil_img) # Center text in the cleaned region bbox (red-box style target) ys, xs = np.where(mask > 0) if len(xs) > 0 and len(ys) > 0: mx1, my1, mx2, my2 = xs.min(), ys.min(), xs.max(), ys.max() text_cx = int((mx1 + mx2) / 2) text_cy = int((my1 + my2) / 2) sw = max(20, int((mx2 - mx1) * 0.92)) sh = max(20, int((my2 - my1) * 0.92)) else: text_cx, text_cy = cx_i, cy_i sw, sh = max(int(safe_w), 1), max(int(safe_h), 1) bw = max(1, bubble_data.get("w", 1)) bh = max(1, bubble_data.get("h", 1)) tall_bubble = (bh / bw) > 1.25 font, lines, total_h, font_used = best_fit_font( draw, translated_text, font_candidates, sw, sh, tall_bubble=tall_bubble ) if not lines: return method, font_used y_cursor = int(round(text_cy - total_h / 2.0 - 0.02 * sh)) for line in lines: lw, lh = measure_text(draw, line, font) x = text_cx - lw // 2 draw_text_with_stroke(draw, (x, y_cursor), line, font, fill=font_color, stroke_fill=stroke_color) y_cursor += lh + max(lh // 5, 2) + (1 if tall_bubble else 0) return method, font_used # ───────────────────────────────────────────── # MAIN RENDER FUNCTION # ───────────────────────────────────────────── def render_translations( input_image, output_image, translations_file, bubbles_file, font_candidates=DEFAULT_FONT_CANDIDATES, font_color=DEFAULT_FONT_COLOR, stroke_color=DEFAULT_STROKE_COLOR, ): img_bgr = cv2.imread(input_image) if img_bgr is None: raise FileNotFoundError(f"Cannot load image: {input_image}") img_h, img_w = img_bgr.shape[:2] img_pil = Image.fromarray(cv2.cvtColor(img_bgr, cv2.COLOR_BGR2RGB)) translations, originals, flags_map = parse_translations(translations_file) bubbles = parse_bubbles(bubbles_file) panels = detect_panels(img_bgr) rendered = 0 skipped = 0 def sort_key(item): bid, _ = item b = bubbles.get(bid, {}) return int(b.get("reading_order", bid)) for bubble_id, translated_text in sorted(translations.items(), key=sort_key): if bubble_id not in bubbles: skipped += 1 continue bubble_data = bubbles[bubble_id] panel = assign_panel(bubble_data, panels, img_w, img_h) original_text = originals.get(bubble_id, "") flags = flags_map.get(bubble_id, "-") method, font_used = draw_bubble( pil_img=img_pil, img_bgr=img_bgr, bubble_data=bubble_data, original_text=original_text, translated_text=translated_text, flags=flags, font_candidates=font_candidates, font_color=font_color, stroke_color=stroke_color, panel=panel ) if method == "skip_sfx": skipped += 1 else: rendered += 1 result_cv = cv2.cvtColor(np.array(img_pil), cv2.COLOR_RGB2BGR) cv2.imwrite(output_image, result_cv) print(f"✅ Done — {rendered} rendered, {skipped} skipped.") print(f"📄 Output → {output_image}") print(f"🧼 Clean mode: {CLEAN_MODE}") # ───────────────────────────────────────────── # ENTRY POINT # ───────────────────────────────────────────── if __name__ == "__main__": render_translations( input_image="001-page.png", output_image="page_translated.png", translations_file="output.txt", bubbles_file="bubbles.json", font_candidates=DEFAULT_FONT_CANDIDATES, font_color=DEFAULT_FONT_COLOR, stroke_color=DEFAULT_STROKE_COLOR, )