import os import json import re import cv2 import numpy as np from PIL import Image, ImageDraw, ImageFont # ───────────────────────────────────────────── # CONFIG # ───────────────────────────────────────────── DEFAULT_FONT_CANDIDATES = [ "fonts/AnimeAce2_reg.ttf", "fonts/WildWordsRoman.ttf", "fonts/ComicRelief-Regular.ttf", "fonts/NotoSans-Regular.ttf", ] DEFAULT_FONT_COLOR = (0, 0, 0) DEFAULT_STROKE_COLOR = (255, 255, 255) MAX_FONT_SIZE = 20 MIN_FONT_SIZE = 6 # Guarantee full wipe of yellow squares YELLOW_BOX_PAD_X = 1 YELLOW_BOX_PAD_Y = 1 YELLOW_UNION_PAD_X = 4 YELLOW_UNION_PAD_Y = 4 # Optional extra cleanup expansion ENABLE_EXTRA_CLEAN = True EXTRA_DILATE_ITERS = 1 EXTRA_CLOSE_ITERS = 1 # Bubble detection (for optional extra mask / border preservation) FLOOD_TOL = 30 # Border restoration: keep very conservative ENABLE_EDGE_RESTORE = True EDGE_RESTORE_DILATE = 1 # Text layout inside yellow-union TEXT_INSET = 0.92 # ───────────────────────────────────────────── # PARSERS # ───────────────────────────────────────────── def parse_translations(translations_file): translations = {} originals = {} flags_map = {} with open(translations_file, "r", encoding="utf-8") as f: for line in f: line = line.strip() if not line.startswith("#"): continue parts = line.split("|") try: bubble_id = int(parts[0].lstrip("#")) except Exception: continue if len(parts) >= 5: original = parts[2].strip() translated = parts[3].strip() flags = parts[4].strip() elif len(parts) >= 4: original = parts[2].strip() translated = parts[3].strip() flags = "-" elif len(parts) >= 3: original = "" translated = parts[2].strip() flags = "-" else: continue if translated.startswith("["): continue translations[bubble_id] = translated originals[bubble_id] = original flags_map[bubble_id] = flags return translations, originals, flags_map def parse_bubbles(bubbles_file): with open(bubbles_file, "r", encoding="utf-8") as f: raw = json.load(f) return {int(k): v for k, v in raw.items()} # ───────────────────────────────────────────── # HELPERS # ───────────────────────────────────────────── def clamp(v, lo, hi): return max(lo, min(hi, v)) def xywh_to_xyxy(box): if not box: return None x = int(box.get("x", 0)) y = int(box.get("y", 0)) w = int(box.get("w", 0)) h = int(box.get("h", 0)) return (x, y, x + w, y + h) def union_xyxy(boxes): boxes = [b for b in boxes if b is not None] if not boxes: return None x1 = min(b[0] for b in boxes) y1 = min(b[1] for b in boxes) x2 = max(b[2] for b in boxes) y2 = max(b[3] for b in boxes) if x2 <= x1 or y2 <= y1: return None return (x1, y1, x2, y2) def bbox_from_mask(mask): ys, xs = np.where(mask > 0) if len(xs) == 0: return None return (int(xs.min()), int(ys.min()), int(xs.max()) + 1, int(ys.max()) + 1) def normalize_text(s): t = s.upper().strip() t = re.sub(r"[^\w]+", "", t) return t def is_sfx_like(text): t = normalize_text(text) return bool(len(t) <= 8 and re.fullmatch(r"(SHA+|BIP+|BEEP+|HN+|AH+|OH+)", t)) # ───────────────────────────────────────────── # FONT # ───────────────────────────────────────────── def load_font_from_candidates(candidates, size): for path in candidates: if path and os.path.exists(path): try: return ImageFont.truetype(path, size), path except Exception: continue return ImageFont.load_default(), "PIL_DEFAULT" def measure_text(draw, text, font): bb = draw.textbbox((0, 0), text, font=font) return bb[2] - bb[0], bb[3] - bb[1] def wrap_text(draw, text, font, max_width): words = text.split() lines = [] cur = "" for w in words: test = (cur + " " + w).strip() tw, _ = measure_text(draw, test, font) if tw <= max_width or not cur: cur = test else: lines.append(cur) cur = w if cur: lines.append(cur) if not lines: return [""], 0, 0 widths = [] heights = [] for ln in lines: lw, lh = measure_text(draw, ln, font) widths.append(lw) heights.append(lh) gap = max(2, heights[0] // 5) total_h = sum(heights) + gap * (len(lines) - 1) return lines, total_h, max(widths) def fit_font(draw, text, font_candidates, safe_w, safe_h): for size in range(MAX_FONT_SIZE, MIN_FONT_SIZE - 1, -1): font, _ = load_font_from_candidates(font_candidates, size) lines, total_h, max_w = wrap_text(draw, text, font, safe_w) if total_h <= safe_h and max_w <= safe_w: return font, lines, total_h font, _ = load_font_from_candidates(font_candidates, MIN_FONT_SIZE) lines, total_h, _ = wrap_text(draw, text, font, safe_w) return font, lines, total_h def draw_text_with_stroke(draw, pos, text, font, fill, stroke_fill): x, y = pos _, h = measure_text(draw, text, font) sw = 2 if h <= 11 else 1 for dx in range(-sw, sw + 1): for dy in range(-sw, sw + 1): if dx == 0 and dy == 0: continue draw.text((x + dx, y + dy), text, font=font, fill=stroke_fill) draw.text((x, y), text, font=font, fill=fill) # ───────────────────────────────────────────── # MASK BUILDERS # ───────────────────────────────────────────── def build_yellow_mask(bubble_data, img_h, img_w): """ HARD GUARANTEE: Returned mask always covers all yellow squares (line_bboxes). """ mask = np.zeros((img_h, img_w), dtype=np.uint8) # Preferred: exact line boxes line_boxes = bubble_data.get("line_bboxes", []) for lb in line_boxes: b = xywh_to_xyxy(lb) if not b: continue x1, y1, x2, y2 = b x1 -= YELLOW_BOX_PAD_X y1 -= YELLOW_BOX_PAD_Y x2 += YELLOW_BOX_PAD_X y2 += YELLOW_BOX_PAD_Y x1 = clamp(x1, 0, img_w - 1) y1 = clamp(y1, 0, img_h - 1) x2 = clamp(x2, 1, img_w) y2 = clamp(y2, 1, img_h) if x2 > x1 and y2 > y1: cv2.rectangle(mask, (x1, y1), (x2, y2), 255, -1) # If no line boxes available, use line_union fallback if np.count_nonzero(mask) == 0: ub = xywh_to_xyxy(bubble_data.get("line_union_bbox")) if ub: x1, y1, x2, y2 = ub x1 -= YELLOW_UNION_PAD_X y1 -= YELLOW_UNION_PAD_Y x2 += YELLOW_UNION_PAD_X y2 += YELLOW_UNION_PAD_Y x1 = clamp(x1, 0, img_w - 1) y1 = clamp(y1, 0, img_h - 1) x2 = clamp(x2, 1, img_w) y2 = clamp(y2, 1, img_h) if x2 > x1 and y2 > y1: cv2.rectangle(mask, (x1, y1), (x2, y2), 255, -1) # Last fallback: text_bbox if np.count_nonzero(mask) == 0: tb = xywh_to_xyxy(bubble_data.get("text_bbox")) if tb: x1, y1, x2, y2 = tb x1 -= YELLOW_UNION_PAD_X y1 -= YELLOW_UNION_PAD_Y x2 += YELLOW_UNION_PAD_X y2 += YELLOW_UNION_PAD_Y x1 = clamp(x1, 0, img_w - 1) y1 = clamp(y1, 0, img_h - 1) x2 = clamp(x2, 1, img_w) y2 = clamp(y2, 1, img_h) if x2 > x1 and y2 > y1: cv2.rectangle(mask, (x1, y1), (x2, y2), 255, -1) return mask def bubble_interior_mask(img_bgr, bubble_data): """ Optional helper to expand clean region safely; never used to shrink yellow coverage. """ h, w = img_bgr.shape[:2] panel = xywh_to_xyxy(bubble_data.get("panel_bbox")) if panel is None: panel = (0, 0, w, h) px1, py1, px2, py2 = panel seed = bubble_data.get("seed_point", {}) sx = int(seed.get("x", bubble_data.get("x", 0) + bubble_data.get("w", 1) // 2)) sy = int(seed.get("y", bubble_data.get("y", 0) + bubble_data.get("h", 1) // 2)) sx = clamp(sx, 1, w - 2) sy = clamp(sy, 1, h - 2) gray = cv2.cvtColor(img_bgr, cv2.COLOR_BGR2GRAY) _, binary = cv2.threshold(gray, 200, 255, cv2.THRESH_BINARY) panel_bin = np.zeros_like(binary) panel_bin[py1:py2, px1:px2] = binary[py1:py2, px1:px2] # if seed on dark pixel, search nearby white if gray[sy, sx] < 150: found = False search_r = max(2, min(bubble_data.get("w", 20), bubble_data.get("h", 20)) // 3) for r in range(1, search_r + 1): for dy in range(-r, r + 1): for dx in range(-r, r + 1): nx, ny = sx + dx, sy + dy if px1 <= nx < px2 and py1 <= ny < py2 and gray[ny, nx] >= 200: sx, sy = nx, ny found = True break if found: break if found: break if not found: m = np.zeros((h, w), dtype=np.uint8) bx = bubble_data.get("x", 0) by = bubble_data.get("y", 0) bw = bubble_data.get("w", 20) bh = bubble_data.get("h", 20) cv2.ellipse(m, (bx + bw // 2, by + bh // 2), (max(4, bw // 2), max(4, bh // 2)), 0, 0, 360, 255, -1) return m ff_mask = np.zeros((h + 2, w + 2), dtype=np.uint8) flood = panel_bin.copy() cv2.floodFill( flood, ff_mask, (sx, sy), 255, loDiff=FLOOD_TOL, upDiff=FLOOD_TOL, flags=cv2.FLOODFILL_FIXED_RANGE ) m = (ff_mask[1:-1, 1:-1] * 255).astype(np.uint8) m = cv2.morphologyEx(m, cv2.MORPH_CLOSE, np.ones((3, 3), np.uint8), iterations=1) return m def build_clean_mask(img_bgr, bubble_data): """ FINAL RULE: clean_mask MUST cover yellow_mask completely. """ h, w = img_bgr.shape[:2] yellow = build_yellow_mask(bubble_data, h, w) # start with guaranteed yellow clean = yellow.copy() if ENABLE_EXTRA_CLEAN: bubble_m = bubble_interior_mask(img_bgr, bubble_data) extra = cv2.dilate(yellow, np.ones((3, 3), np.uint8), iterations=EXTRA_DILATE_ITERS) extra = cv2.morphologyEx(extra, cv2.MORPH_CLOSE, np.ones((3, 3), np.uint8), iterations=EXTRA_CLOSE_ITERS) extra = cv2.bitwise_and(extra, bubble_m) # IMPORTANT: union with yellow (never subtract yellow) clean = cv2.bitwise_or(yellow, extra) # final guarantee (defensive) clean = cv2.bitwise_or(clean, yellow) return clean, yellow # ───────────────────────────────────────────── # DRAW BUBBLE # ───────────────────────────────────────────── def draw_bubble( pil_img, img_bgr_ref, bubble_data, original_text, translated_text, font_candidates, font_color, stroke_color ): if original_text and translated_text: if normalize_text(original_text) == normalize_text(translated_text) and is_sfx_like(original_text): return "skip_sfx" rgb = np.array(pil_img) h, w = rgb.shape[:2] clean_mask, yellow_mask = build_clean_mask(img_bgr_ref, bubble_data) if np.count_nonzero(clean_mask) == 0: return "skip_no_area" # 1) FORCE white fill on clean mask (includes full yellow by guarantee) rgb[clean_mask == 255] = [255, 255, 255] # 2) Optional edge restore, but NEVER overwrite yellow coverage if ENABLE_EDGE_RESTORE: bubble_m = bubble_interior_mask(img_bgr_ref, bubble_data) edge = cv2.morphologyEx(bubble_m, cv2.MORPH_GRADIENT, np.ones((3, 3), np.uint8)) edge = cv2.dilate(edge, np.ones((3, 3), np.uint8), iterations=EDGE_RESTORE_DILATE) # Don't restore where yellow exists (hard guarantee) edge[yellow_mask == 255] = 0 orig_rgb = cv2.cvtColor(img_bgr_ref, cv2.COLOR_BGR2RGB) rgb[edge == 255] = orig_rgb[edge == 255] pil_img.paste(Image.fromarray(rgb)) if not translated_text: return "clean_only" # text region based on yellow area (exact requirement) text_bbox = bbox_from_mask(yellow_mask) if text_bbox is None: text_bbox = bbox_from_mask(clean_mask) if text_bbox is None: return "skip_no_area" x1, y1, x2, y2 = text_bbox draw = ImageDraw.Draw(pil_img) text_cx = int((x1 + x2) / 2) text_cy = int((y1 + y2) / 2) safe_w = max(16, int((x2 - x1) * TEXT_INSET)) safe_h = max(16, int((y2 - y1) * TEXT_INSET)) font, lines, total_h = fit_font(draw, translated_text, font_candidates, safe_w, safe_h) y_cursor = int(round(text_cy - total_h / 2.0)) for line in lines: lw, lh = measure_text(draw, line, font) x = text_cx - lw // 2 draw_text_with_stroke(draw, (x, y_cursor), line, font, fill=font_color, stroke_fill=stroke_color) y_cursor += lh + max(lh // 5, 2) return "rendered" # ───────────────────────────────────────────── # MAIN # ───────────────────────────────────────────── def render_translations( input_image, output_image, translations_file, bubbles_file, font_candidates=DEFAULT_FONT_CANDIDATES, font_color=DEFAULT_FONT_COLOR, stroke_color=DEFAULT_STROKE_COLOR ): img_bgr = cv2.imread(input_image) if img_bgr is None: raise FileNotFoundError(f"Cannot load image: {input_image}") img_pil = Image.fromarray(cv2.cvtColor(img_bgr, cv2.COLOR_BGR2RGB)) translations, originals, flags_map = parse_translations(translations_file) bubbles = parse_bubbles(bubbles_file) rendered, skipped = 0, 0 def sort_key(item): bid, _ = item b = bubbles.get(bid, {}) return int(b.get("reading_order", bid)) for bubble_id, translated_text in sorted(translations.items(), key=sort_key): if bubble_id not in bubbles: skipped += 1 continue bubble_data = bubbles[bubble_id] original_text = originals.get(bubble_id, "") status = draw_bubble( pil_img=img_pil, img_bgr_ref=img_bgr, bubble_data=bubble_data, original_text=original_text, translated_text=translated_text, font_candidates=font_candidates, font_color=font_color, stroke_color=stroke_color ) if status.startswith("skip"): skipped += 1 else: rendered += 1 out_bgr = cv2.cvtColor(np.array(img_pil), cv2.COLOR_RGB2BGR) cv2.imwrite(output_image, out_bgr) print(f"✅ Done — {rendered} rendered, {skipped} skipped.") print(f"📄 Output → {output_image}") print("Guarantee: full yellow-square area is always white-cleaned before drawing text.") if __name__ == "__main__": render_translations( input_image="002-page.png", output_image="page_translated.png", translations_file="output.txt", bubbles_file="bubbles.json", font_candidates=DEFAULT_FONT_CANDIDATES, font_color=DEFAULT_FONT_COLOR, stroke_color=DEFAULT_STROKE_COLOR )