First beta
This commit is contained in:
@@ -1,509 +1,412 @@
|
||||
import os
|
||||
#!/usr/bin/env python3
|
||||
# -*- coding: utf-8 -*-
|
||||
"""
|
||||
manga-renderer.py
|
||||
|
||||
Inputs: 001.jpg + bubbles.json + output.txt
|
||||
Output: translated_page.png
|
||||
|
||||
Strategy:
|
||||
1. For every bubble, white-fill all its OCR quads (erases original text cleanly)
|
||||
2. Render the translated text centered inside the bubble bounding box
|
||||
3. Bubbles in SKIP_BUBBLE_IDS are erased but NOT re-rendered (left blank)
|
||||
"""
|
||||
|
||||
import json
|
||||
import re
|
||||
import textwrap
|
||||
import cv2
|
||||
import numpy as np
|
||||
from PIL import Image, ImageDraw, ImageFont
|
||||
from typing import Dict, List, Tuple, Optional, Set
|
||||
|
||||
# ============================================================
|
||||
# CONFIG — edit these paths to match your setup
|
||||
# ============================================================
|
||||
IMAGE_PATH = "003.jpg"
|
||||
BUBBLES_PATH = "bubbles.json"
|
||||
TRANSLATIONS_PATH = "output.txt"
|
||||
OUTPUT_PATH = "translated_page_003.png"
|
||||
|
||||
# ─────────────────────────────────────────────
|
||||
# CONFIG
|
||||
# ─────────────────────────────────────────────
|
||||
DEFAULT_FONT_CANDIDATES = [
|
||||
"fonts/ComicRelief-Regular.ttf",
|
||||
"fonts/ComicNeue-Regular.ttf",
|
||||
# Font candidates — first one that loads wins
|
||||
FONT_CANDIDATES = [
|
||||
"fonts/ComicNeue-Bold.ttf",
|
||||
]
|
||||
DEFAULT_FONT_COLOR = (0, 0, 0)
|
||||
DEFAULT_STROKE_COLOR = (255, 255, 255)
|
||||
|
||||
MAX_FONT_SIZE = 20
|
||||
MIN_FONT_SIZE = 6
|
||||
FONT_SIZE = 20
|
||||
MIN_FONT_SIZE = 10
|
||||
QUAD_PAD = 4 # extra pixels added around each quad before white-fill
|
||||
|
||||
# Guarantee full wipe of yellow squares
|
||||
YELLOW_BOX_PAD_X = 1
|
||||
YELLOW_BOX_PAD_Y = 1
|
||||
YELLOW_UNION_PAD_X = 4
|
||||
YELLOW_UNION_PAD_Y = 4
|
||||
|
||||
# Optional extra cleanup expansion
|
||||
ENABLE_EXTRA_CLEAN = True
|
||||
EXTRA_DILATE_ITERS = 1
|
||||
EXTRA_CLOSE_ITERS = 1
|
||||
|
||||
# Bubble detection (for optional extra mask / border preservation)
|
||||
FLOOD_TOL = 30
|
||||
|
||||
# Border restoration: keep very conservative
|
||||
ENABLE_EDGE_RESTORE = True
|
||||
EDGE_RESTORE_DILATE = 1
|
||||
|
||||
# Text layout inside yellow-union
|
||||
TEXT_INSET = 0.92
|
||||
# ============================================================
|
||||
# SKIP LIST
|
||||
# ── Add any bubble IDs you do NOT want rendered here.
|
||||
# ── The quads will still be erased (white-filled) but no
|
||||
# ── translated text will be drawn inside them.
|
||||
# ──
|
||||
# ── Examples of why you'd skip a bubble:
|
||||
# ── • Sound effects (BURP, BAM, POW …)
|
||||
# ── • Untranslatable single characters
|
||||
# ── • Bubbles with bad OCR you want to fix manually later
|
||||
# ── • Narrator boxes you want to leave in the source language
|
||||
# ============================================================
|
||||
SKIP_BUBBLE_IDS: Set[int] = {
|
||||
# 8, # BURP BURP — sound effect
|
||||
# 2, # example: bad OCR, fix manually
|
||||
}
|
||||
|
||||
|
||||
# ─────────────────────────────────────────────
|
||||
# PARSERS
|
||||
# ─────────────────────────────────────────────
|
||||
def parse_translations(translations_file):
|
||||
# ============================================================
|
||||
# FONT LOADER
|
||||
# ============================================================
|
||||
def load_font(path: str, size: int) -> Optional[ImageFont.FreeTypeFont]:
|
||||
"""Try every face index in a .ttc collection. Validate with getbbox."""
|
||||
indices = range(4) if path.lower().endswith(".ttc") else [0]
|
||||
for idx in indices:
|
||||
try:
|
||||
font = ImageFont.truetype(path, size, index=idx)
|
||||
font.getbbox("A") # raises if face metrics are broken
|
||||
return font
|
||||
except Exception:
|
||||
continue
|
||||
return None
|
||||
|
||||
|
||||
def resolve_font() -> Tuple[str, ImageFont.FreeTypeFont]:
|
||||
"""Return (path, font) for the first working candidate."""
|
||||
for candidate in FONT_CANDIDATES:
|
||||
font = load_font(candidate, FONT_SIZE)
|
||||
if font is not None:
|
||||
print(f" ✅ Font: {candidate}")
|
||||
return candidate, font
|
||||
print(" ⚠️ No TrueType font found — using Pillow bitmap fallback")
|
||||
return "", ImageFont.load_default()
|
||||
|
||||
|
||||
# ============================================================
|
||||
# PARSE output.txt → {bid: translated_string}
|
||||
# ============================================================
|
||||
def parse_translations(filepath: str) -> Dict[int, str]:
|
||||
"""
|
||||
Reads output.txt and returns {bubble_id: translated_text}.
|
||||
Lines look like: #2|1|vision-base|ORIGINAL|TRANSLATED|FLAGS
|
||||
"""
|
||||
translations = {}
|
||||
originals = {}
|
||||
flags_map = {}
|
||||
|
||||
with open(translations_file, "r", encoding="utf-8") as f:
|
||||
with open(filepath, "r", encoding="utf-8") as f:
|
||||
for line in f:
|
||||
line = line.strip()
|
||||
if not line.startswith("#"):
|
||||
continue
|
||||
|
||||
parts = line.split("|")
|
||||
if len(parts) < 5:
|
||||
continue
|
||||
try:
|
||||
bubble_id = int(parts[0].lstrip("#"))
|
||||
except Exception:
|
||||
bid = int(parts[0].lstrip("#"))
|
||||
translated = parts[4].strip()
|
||||
if translated and translated != "-":
|
||||
translations[bid] = translated
|
||||
except ValueError:
|
||||
continue
|
||||
|
||||
if len(parts) >= 5:
|
||||
original = parts[2].strip()
|
||||
translated = parts[3].strip()
|
||||
flags = parts[4].strip()
|
||||
elif len(parts) >= 4:
|
||||
original = parts[2].strip()
|
||||
translated = parts[3].strip()
|
||||
flags = "-"
|
||||
elif len(parts) >= 3:
|
||||
original = ""
|
||||
translated = parts[2].strip()
|
||||
flags = "-"
|
||||
else:
|
||||
continue
|
||||
|
||||
if translated.startswith("["):
|
||||
continue
|
||||
|
||||
translations[bubble_id] = translated
|
||||
originals[bubble_id] = original
|
||||
flags_map[bubble_id] = flags
|
||||
|
||||
return translations, originals, flags_map
|
||||
return translations
|
||||
|
||||
|
||||
def parse_bubbles(bubbles_file):
|
||||
with open(bubbles_file, "r", encoding="utf-8") as f:
|
||||
raw = json.load(f)
|
||||
return {int(k): v for k, v in raw.items()}
|
||||
|
||||
|
||||
# ─────────────────────────────────────────────
|
||||
# HELPERS
|
||||
# ─────────────────────────────────────────────
|
||||
def clamp(v, lo, hi):
|
||||
return max(lo, min(hi, v))
|
||||
|
||||
|
||||
def xywh_to_xyxy(box):
|
||||
if not box:
|
||||
return None
|
||||
x = int(box.get("x", 0))
|
||||
y = int(box.get("y", 0))
|
||||
w = int(box.get("w", 0))
|
||||
h = int(box.get("h", 0))
|
||||
return (x, y, x + w, y + h)
|
||||
|
||||
|
||||
def union_xyxy(boxes):
|
||||
boxes = [b for b in boxes if b is not None]
|
||||
if not boxes:
|
||||
return None
|
||||
x1 = min(b[0] for b in boxes)
|
||||
y1 = min(b[1] for b in boxes)
|
||||
x2 = max(b[2] for b in boxes)
|
||||
y2 = max(b[3] for b in boxes)
|
||||
if x2 <= x1 or y2 <= y1:
|
||||
return None
|
||||
return (x1, y1, x2, y2)
|
||||
|
||||
|
||||
def bbox_from_mask(mask):
|
||||
ys, xs = np.where(mask > 0)
|
||||
if len(xs) == 0:
|
||||
return None
|
||||
return (int(xs.min()), int(ys.min()), int(xs.max()) + 1, int(ys.max()) + 1)
|
||||
|
||||
|
||||
def normalize_text(s):
|
||||
t = s.upper().strip()
|
||||
t = re.sub(r"[^\w]+", "", t)
|
||||
return t
|
||||
|
||||
|
||||
def is_sfx_like(text):
|
||||
t = normalize_text(text)
|
||||
return bool(len(t) <= 8 and re.fullmatch(r"(SHA+|BIP+|BEEP+|HN+|AH+|OH+)", t))
|
||||
|
||||
|
||||
# ─────────────────────────────────────────────
|
||||
# FONT
|
||||
# ─────────────────────────────────────────────
|
||||
def load_font_from_candidates(candidates, size):
|
||||
for path in candidates:
|
||||
if path and os.path.exists(path):
|
||||
try:
|
||||
return ImageFont.truetype(path, size), path
|
||||
except Exception:
|
||||
continue
|
||||
return ImageFont.load_default(), "PIL_DEFAULT"
|
||||
|
||||
|
||||
def measure_text(draw, text, font):
|
||||
bb = draw.textbbox((0, 0), text, font=font)
|
||||
return bb[2] - bb[0], bb[3] - bb[1]
|
||||
|
||||
|
||||
def wrap_text(draw, text, font, max_width):
|
||||
words = text.split()
|
||||
lines = []
|
||||
cur = ""
|
||||
|
||||
for w in words:
|
||||
test = (cur + " " + w).strip()
|
||||
tw, _ = measure_text(draw, test, font)
|
||||
if tw <= max_width or not cur:
|
||||
cur = test
|
||||
else:
|
||||
lines.append(cur)
|
||||
cur = w
|
||||
if cur:
|
||||
lines.append(cur)
|
||||
|
||||
if not lines:
|
||||
return [""], 0, 0
|
||||
|
||||
widths = []
|
||||
heights = []
|
||||
for ln in lines:
|
||||
lw, lh = measure_text(draw, ln, font)
|
||||
widths.append(lw)
|
||||
heights.append(lh)
|
||||
|
||||
gap = max(2, heights[0] // 5)
|
||||
total_h = sum(heights) + gap * (len(lines) - 1)
|
||||
return lines, total_h, max(widths)
|
||||
|
||||
|
||||
def fit_font(draw, text, font_candidates, safe_w, safe_h):
|
||||
for size in range(MAX_FONT_SIZE, MIN_FONT_SIZE - 1, -1):
|
||||
font, _ = load_font_from_candidates(font_candidates, size)
|
||||
lines, total_h, max_w = wrap_text(draw, text, font, safe_w)
|
||||
if total_h <= safe_h and max_w <= safe_w:
|
||||
return font, lines, total_h
|
||||
|
||||
font, _ = load_font_from_candidates(font_candidates, MIN_FONT_SIZE)
|
||||
lines, total_h, _ = wrap_text(draw, text, font, safe_w)
|
||||
return font, lines, total_h
|
||||
|
||||
|
||||
def draw_text_with_stroke(draw, pos, text, font, fill, stroke_fill):
|
||||
x, y = pos
|
||||
_, h = measure_text(draw, text, font)
|
||||
sw = 2 if h <= 11 else 1
|
||||
|
||||
for dx in range(-sw, sw + 1):
|
||||
for dy in range(-sw, sw + 1):
|
||||
if dx == 0 and dy == 0:
|
||||
continue
|
||||
draw.text((x + dx, y + dy), text, font=font, fill=stroke_fill)
|
||||
|
||||
draw.text((x, y), text, font=font, fill=fill)
|
||||
|
||||
|
||||
# ─────────────────────────────────────────────
|
||||
# MASK BUILDERS
|
||||
# ─────────────────────────────────────────────
|
||||
def build_yellow_mask(bubble_data, img_h, img_w):
|
||||
# ============================================================
|
||||
# PARSE bubbles.json → bubble_boxes, quads_per_bubble
|
||||
# ============================================================
|
||||
def parse_bubbles(filepath: str):
|
||||
"""
|
||||
HARD GUARANTEE:
|
||||
Returned mask always covers all yellow squares (line_bboxes).
|
||||
Returns:
|
||||
bubble_boxes : {bid: (x1, y1, x2, y2)}
|
||||
quads_per_bubble : {bid: [ [[x,y],[x,y],[x,y],[x,y]], ... ]}
|
||||
"""
|
||||
mask = np.zeros((img_h, img_w), dtype=np.uint8)
|
||||
with open(filepath, "r", encoding="utf-8") as f:
|
||||
data = json.load(f)
|
||||
|
||||
# Preferred: exact line boxes
|
||||
line_boxes = bubble_data.get("line_bboxes", [])
|
||||
for lb in line_boxes:
|
||||
b = xywh_to_xyxy(lb)
|
||||
if not b:
|
||||
bubble_boxes = {}
|
||||
quads_per_bubble = {}
|
||||
|
||||
for key, val in data.items():
|
||||
bid = int(key)
|
||||
|
||||
x1 = val["x"]; y1 = val["y"]
|
||||
x2 = x1 + val["w"]; y2 = y1 + val["h"]
|
||||
bubble_boxes[bid] = (x1, y1, x2, y2)
|
||||
|
||||
quads_per_bubble[bid] = val.get("quads", [])
|
||||
|
||||
return bubble_boxes, quads_per_bubble
|
||||
|
||||
|
||||
# ============================================================
|
||||
# ERASE — white-fill every OCR quad (with small padding)
|
||||
# ============================================================
|
||||
def erase_quads(
|
||||
image_bgr,
|
||||
quads_per_bubble: Dict[int, List],
|
||||
translations: Dict[int, str], # ← NEW: only erase what we'll render
|
||||
skip_ids: Set[int],
|
||||
pad: int = QUAD_PAD
|
||||
):
|
||||
"""
|
||||
White-fills OCR quads ONLY for bubbles that:
|
||||
- have a translation in output.txt AND
|
||||
- are NOT in SKIP_BUBBLE_IDS
|
||||
Everything else is left completely untouched.
|
||||
"""
|
||||
ih, iw = image_bgr.shape[:2]
|
||||
result = image_bgr.copy()
|
||||
|
||||
erased_count = 0
|
||||
skipped_count = 0
|
||||
|
||||
for bid, quads in quads_per_bubble.items():
|
||||
|
||||
# ignore if explicitly skipped
|
||||
if bid in skip_ids:
|
||||
skipped_count += 1
|
||||
continue
|
||||
x1, y1, x2, y2 = b
|
||||
x1 -= YELLOW_BOX_PAD_X
|
||||
y1 -= YELLOW_BOX_PAD_Y
|
||||
x2 += YELLOW_BOX_PAD_X
|
||||
y2 += YELLOW_BOX_PAD_Y
|
||||
x1 = clamp(x1, 0, img_w - 1)
|
||||
y1 = clamp(y1, 0, img_h - 1)
|
||||
x2 = clamp(x2, 1, img_w)
|
||||
y2 = clamp(y2, 1, img_h)
|
||||
if x2 > x1 and y2 > y1:
|
||||
cv2.rectangle(mask, (x1, y1), (x2, y2), 255, -1)
|
||||
|
||||
# If no line boxes available, use line_union fallback
|
||||
if np.count_nonzero(mask) == 0:
|
||||
ub = xywh_to_xyxy(bubble_data.get("line_union_bbox"))
|
||||
if ub:
|
||||
x1, y1, x2, y2 = ub
|
||||
x1 -= YELLOW_UNION_PAD_X
|
||||
y1 -= YELLOW_UNION_PAD_Y
|
||||
x2 += YELLOW_UNION_PAD_X
|
||||
y2 += YELLOW_UNION_PAD_Y
|
||||
x1 = clamp(x1, 0, img_w - 1)
|
||||
y1 = clamp(y1, 0, img_h - 1)
|
||||
x2 = clamp(x2, 1, img_w)
|
||||
y2 = clamp(y2, 1, img_h)
|
||||
if x2 > x1 and y2 > y1:
|
||||
cv2.rectangle(mask, (x1, y1), (x2, y2), 255, -1)
|
||||
# ignore if no translation exists (deleted from output.txt)
|
||||
if bid not in translations:
|
||||
skipped_count += 1
|
||||
continue
|
||||
|
||||
# Last fallback: text_bbox
|
||||
if np.count_nonzero(mask) == 0:
|
||||
tb = xywh_to_xyxy(bubble_data.get("text_bbox"))
|
||||
if tb:
|
||||
x1, y1, x2, y2 = tb
|
||||
x1 -= YELLOW_UNION_PAD_X
|
||||
y1 -= YELLOW_UNION_PAD_Y
|
||||
x2 += YELLOW_UNION_PAD_X
|
||||
y2 += YELLOW_UNION_PAD_Y
|
||||
x1 = clamp(x1, 0, img_w - 1)
|
||||
y1 = clamp(y1, 0, img_h - 1)
|
||||
x2 = clamp(x2, 1, img_w)
|
||||
y2 = clamp(y2, 1, img_h)
|
||||
if x2 > x1 and y2 > y1:
|
||||
cv2.rectangle(mask, (x1, y1), (x2, y2), 255, -1)
|
||||
for quad in quads:
|
||||
pts = np.array(quad, dtype=np.int32)
|
||||
cv2.fillPoly(result, [pts], (255, 255, 255))
|
||||
|
||||
return mask
|
||||
xs = [p[0] for p in quad]; ys = [p[1] for p in quad]
|
||||
x1 = max(0, min(xs) - pad)
|
||||
y1 = max(0, min(ys) - pad)
|
||||
x2 = min(iw - 1, max(xs) + pad)
|
||||
y2 = min(ih - 1, max(ys) + pad)
|
||||
cv2.rectangle(result, (x1, y1), (x2, y2), (255, 255, 255), -1)
|
||||
|
||||
erased_count += 1
|
||||
|
||||
print(f" Erased : {erased_count} bubbles")
|
||||
print(f" Ignored: {skipped_count} bubbles (no translation or in skip list)")
|
||||
return result
|
||||
|
||||
|
||||
def bubble_interior_mask(img_bgr, bubble_data):
|
||||
# ============================================================
|
||||
# FONT SIZING + TEXT WRAP
|
||||
# ============================================================
|
||||
def fit_text(
|
||||
text: str,
|
||||
box_w: int,
|
||||
box_h: int,
|
||||
font_path: str,
|
||||
max_size: int = FONT_SIZE,
|
||||
min_size: int = MIN_FONT_SIZE
|
||||
) -> Tuple[int, ImageFont.FreeTypeFont, List[str]]:
|
||||
"""
|
||||
Optional helper to expand clean region safely; never used to shrink yellow coverage.
|
||||
Returns (fitted_size, font, wrapped_lines) — largest size where
|
||||
the text block fits inside box_w × box_h.
|
||||
"""
|
||||
h, w = img_bgr.shape[:2]
|
||||
for size in range(max_size, min_size - 1, -1):
|
||||
font = load_font(font_path, size) if font_path else None
|
||||
if font is None:
|
||||
return min_size, ImageFont.load_default(), [text]
|
||||
|
||||
panel = xywh_to_xyxy(bubble_data.get("panel_bbox"))
|
||||
if panel is None:
|
||||
panel = (0, 0, w, h)
|
||||
px1, py1, px2, py2 = panel
|
||||
chars_per_line = max(1, int(box_w / (size * 0.62)))
|
||||
wrapped = textwrap.fill(text, width=chars_per_line)
|
||||
lines = wrapped.split("\n")
|
||||
total_h = (size + 8) * len(lines)
|
||||
|
||||
seed = bubble_data.get("seed_point", {})
|
||||
sx = int(seed.get("x", bubble_data.get("x", 0) + bubble_data.get("w", 1) // 2))
|
||||
sy = int(seed.get("y", bubble_data.get("y", 0) + bubble_data.get("h", 1) // 2))
|
||||
sx = clamp(sx, 1, w - 2)
|
||||
sy = clamp(sy, 1, h - 2)
|
||||
if total_h <= box_h - 8:
|
||||
return size, font, lines
|
||||
|
||||
gray = cv2.cvtColor(img_bgr, cv2.COLOR_BGR2GRAY)
|
||||
_, binary = cv2.threshold(gray, 200, 255, cv2.THRESH_BINARY)
|
||||
# Nothing fit — use minimum size
|
||||
font = load_font(font_path, min_size) if font_path else None
|
||||
if font is None:
|
||||
font = ImageFont.load_default()
|
||||
chars_per_line = max(1, int(box_w / (min_size * 0.62)))
|
||||
lines = textwrap.fill(text, width=chars_per_line).split("\n")
|
||||
return min_size, font, lines
|
||||
|
||||
panel_bin = np.zeros_like(binary)
|
||||
panel_bin[py1:py2, px1:px2] = binary[py1:py2, px1:px2]
|
||||
|
||||
# if seed on dark pixel, search nearby white
|
||||
if gray[sy, sx] < 150:
|
||||
found = False
|
||||
search_r = max(2, min(bubble_data.get("w", 20), bubble_data.get("h", 20)) // 3)
|
||||
for r in range(1, search_r + 1):
|
||||
for dy in range(-r, r + 1):
|
||||
for dx in range(-r, r + 1):
|
||||
nx, ny = sx + dx, sy + dy
|
||||
if px1 <= nx < px2 and py1 <= ny < py2 and gray[ny, nx] >= 200:
|
||||
sx, sy = nx, ny
|
||||
found = True
|
||||
break
|
||||
if found:
|
||||
break
|
||||
if found:
|
||||
break
|
||||
|
||||
if not found:
|
||||
m = np.zeros((h, w), dtype=np.uint8)
|
||||
bx = bubble_data.get("x", 0)
|
||||
by = bubble_data.get("y", 0)
|
||||
bw = bubble_data.get("w", 20)
|
||||
bh = bubble_data.get("h", 20)
|
||||
cv2.ellipse(m, (bx + bw // 2, by + bh // 2), (max(4, bw // 2), max(4, bh // 2)), 0, 0, 360, 255, -1)
|
||||
return m
|
||||
|
||||
ff_mask = np.zeros((h + 2, w + 2), dtype=np.uint8)
|
||||
flood = panel_bin.copy()
|
||||
cv2.floodFill(
|
||||
flood, ff_mask, (sx, sy), 255,
|
||||
loDiff=FLOOD_TOL, upDiff=FLOOD_TOL,
|
||||
flags=cv2.FLOODFILL_FIXED_RANGE
|
||||
# ============================================================
|
||||
# COLOR HELPERS
|
||||
# ============================================================
|
||||
def sample_bg_color(
|
||||
image_bgr,
|
||||
x1: int, y1: int,
|
||||
x2: int, y2: int
|
||||
) -> Tuple[int, int, int]:
|
||||
"""Sample four corners of a bubble to estimate background color (R, G, B)."""
|
||||
ih, iw = image_bgr.shape[:2]
|
||||
samples = []
|
||||
for sx, sy in [(x1+4, y1+4), (x2-4, y1+4), (x1+4, y2-4), (x2-4, y2-4)]:
|
||||
sx = max(0, min(iw-1, sx)); sy = max(0, min(ih-1, sy))
|
||||
b, g, r = image_bgr[sy, sx]
|
||||
samples.append((int(r), int(g), int(b)))
|
||||
return (
|
||||
int(np.median([s[0] for s in samples])),
|
||||
int(np.median([s[1] for s in samples])),
|
||||
int(np.median([s[2] for s in samples])),
|
||||
)
|
||||
|
||||
m = (ff_mask[1:-1, 1:-1] * 255).astype(np.uint8)
|
||||
m = cv2.morphologyEx(m, cv2.MORPH_CLOSE, np.ones((3, 3), np.uint8), iterations=1)
|
||||
return m
|
||||
|
||||
def pick_fg_color(bg: Tuple[int, int, int]) -> Tuple[int, int, int]:
|
||||
lum = 0.299 * bg[0] + 0.587 * bg[1] + 0.114 * bg[2]
|
||||
return (0, 0, 0) if lum > 128 else (255, 255, 255)
|
||||
|
||||
|
||||
def build_clean_mask(img_bgr, bubble_data):
|
||||
"""
|
||||
FINAL RULE:
|
||||
clean_mask MUST cover yellow_mask completely.
|
||||
"""
|
||||
h, w = img_bgr.shape[:2]
|
||||
yellow = build_yellow_mask(bubble_data, h, w)
|
||||
|
||||
# start with guaranteed yellow
|
||||
clean = yellow.copy()
|
||||
|
||||
if ENABLE_EXTRA_CLEAN:
|
||||
bubble_m = bubble_interior_mask(img_bgr, bubble_data)
|
||||
extra = cv2.dilate(yellow, np.ones((3, 3), np.uint8), iterations=EXTRA_DILATE_ITERS)
|
||||
extra = cv2.morphologyEx(extra, cv2.MORPH_CLOSE, np.ones((3, 3), np.uint8), iterations=EXTRA_CLOSE_ITERS)
|
||||
extra = cv2.bitwise_and(extra, bubble_m)
|
||||
|
||||
# IMPORTANT: union with yellow (never subtract yellow)
|
||||
clean = cv2.bitwise_or(yellow, extra)
|
||||
|
||||
# final guarantee (defensive)
|
||||
clean = cv2.bitwise_or(clean, yellow)
|
||||
|
||||
return clean, yellow
|
||||
def safe_textbbox(
|
||||
draw, pos, text, font
|
||||
) -> Tuple[int, int, int, int]:
|
||||
try:
|
||||
return draw.textbbox(pos, text, font=font)
|
||||
except Exception:
|
||||
size = getattr(font, "size", 12)
|
||||
return (
|
||||
pos[0], pos[1],
|
||||
pos[0] + int(len(text) * size * 0.6),
|
||||
pos[1] + int(size * 1.2)
|
||||
)
|
||||
|
||||
|
||||
# ─────────────────────────────────────────────
|
||||
# DRAW BUBBLE
|
||||
# ─────────────────────────────────────────────
|
||||
def draw_bubble(
|
||||
pil_img,
|
||||
img_bgr_ref,
|
||||
bubble_data,
|
||||
original_text,
|
||||
translated_text,
|
||||
font_candidates,
|
||||
font_color,
|
||||
stroke_color
|
||||
):
|
||||
if original_text and translated_text:
|
||||
if normalize_text(original_text) == normalize_text(translated_text) and is_sfx_like(original_text):
|
||||
return "skip_sfx"
|
||||
|
||||
rgb = np.array(pil_img)
|
||||
h, w = rgb.shape[:2]
|
||||
|
||||
clean_mask, yellow_mask = build_clean_mask(img_bgr_ref, bubble_data)
|
||||
if np.count_nonzero(clean_mask) == 0:
|
||||
return "skip_no_area"
|
||||
|
||||
# 1) FORCE white fill on clean mask (includes full yellow by guarantee)
|
||||
rgb[clean_mask == 255] = [255, 255, 255]
|
||||
|
||||
# 2) Optional edge restore, but NEVER overwrite yellow coverage
|
||||
if ENABLE_EDGE_RESTORE:
|
||||
bubble_m = bubble_interior_mask(img_bgr_ref, bubble_data)
|
||||
edge = cv2.morphologyEx(bubble_m, cv2.MORPH_GRADIENT, np.ones((3, 3), np.uint8))
|
||||
edge = cv2.dilate(edge, np.ones((3, 3), np.uint8), iterations=EDGE_RESTORE_DILATE)
|
||||
|
||||
# Don't restore where yellow exists (hard guarantee)
|
||||
edge[yellow_mask == 255] = 0
|
||||
|
||||
orig_rgb = cv2.cvtColor(img_bgr_ref, cv2.COLOR_BGR2RGB)
|
||||
rgb[edge == 255] = orig_rgb[edge == 255]
|
||||
|
||||
pil_img.paste(Image.fromarray(rgb))
|
||||
|
||||
if not translated_text:
|
||||
return "clean_only"
|
||||
|
||||
# text region based on yellow area (exact requirement)
|
||||
text_bbox = bbox_from_mask(yellow_mask)
|
||||
if text_bbox is None:
|
||||
text_bbox = bbox_from_mask(clean_mask)
|
||||
if text_bbox is None:
|
||||
return "skip_no_area"
|
||||
|
||||
x1, y1, x2, y2 = text_bbox
|
||||
|
||||
draw = ImageDraw.Draw(pil_img)
|
||||
text_cx = int((x1 + x2) / 2)
|
||||
text_cy = int((y1 + y2) / 2)
|
||||
safe_w = max(16, int((x2 - x1) * TEXT_INSET))
|
||||
safe_h = max(16, int((y2 - y1) * TEXT_INSET))
|
||||
|
||||
font, lines, total_h = fit_font(draw, translated_text, font_candidates, safe_w, safe_h)
|
||||
|
||||
y_cursor = int(round(text_cy - total_h / 2.0))
|
||||
for line in lines:
|
||||
lw, lh = measure_text(draw, line, font)
|
||||
x = text_cx - lw // 2
|
||||
draw_text_with_stroke(draw, (x, y_cursor), line, font, fill=font_color, stroke_fill=stroke_color)
|
||||
y_cursor += lh + max(lh // 5, 2)
|
||||
|
||||
return "rendered"
|
||||
|
||||
|
||||
# ─────────────────────────────────────────────
|
||||
# MAIN
|
||||
# ─────────────────────────────────────────────
|
||||
# ============================================================
|
||||
# RENDER
|
||||
# ============================================================
|
||||
def render_translations(
|
||||
input_image,
|
||||
output_image,
|
||||
translations_file,
|
||||
bubbles_file,
|
||||
font_candidates=DEFAULT_FONT_CANDIDATES,
|
||||
font_color=DEFAULT_FONT_COLOR,
|
||||
stroke_color=DEFAULT_STROKE_COLOR
|
||||
image_bgr,
|
||||
bubble_boxes: Dict[int, Tuple],
|
||||
translations: Dict[int, str],
|
||||
skip_ids: Set[int],
|
||||
font_path: str,
|
||||
font_size: int = FONT_SIZE,
|
||||
bold_outline: bool = True,
|
||||
auto_color: bool = True,
|
||||
output_path: str = OUTPUT_PATH
|
||||
):
|
||||
img_bgr = cv2.imread(input_image)
|
||||
if img_bgr is None:
|
||||
raise FileNotFoundError(f"Cannot load image: {input_image}")
|
||||
image_rgb = cv2.cvtColor(image_bgr, cv2.COLOR_BGR2RGB)
|
||||
pil_img = Image.fromarray(image_rgb)
|
||||
draw = ImageDraw.Draw(pil_img)
|
||||
|
||||
img_pil = Image.fromarray(cv2.cvtColor(img_bgr, cv2.COLOR_BGR2RGB))
|
||||
rendered = 0
|
||||
skipped = 0
|
||||
missing = 0
|
||||
|
||||
translations, originals, flags_map = parse_translations(translations_file)
|
||||
bubbles = parse_bubbles(bubbles_file)
|
||||
for bid, (x1, y1, x2, y2) in sorted(bubble_boxes.items()):
|
||||
|
||||
rendered, skipped = 0, 0
|
||||
|
||||
def sort_key(item):
|
||||
bid, _ = item
|
||||
b = bubbles.get(bid, {})
|
||||
return int(b.get("reading_order", bid))
|
||||
|
||||
for bubble_id, translated_text in sorted(translations.items(), key=sort_key):
|
||||
if bubble_id not in bubbles:
|
||||
# ── skip list check ────────────────────────────────────────
|
||||
if bid in skip_ids:
|
||||
print(f" ⏭️ Bubble #{bid:<3} — skipped (in SKIP_BUBBLE_IDS)")
|
||||
skipped += 1
|
||||
continue
|
||||
|
||||
bubble_data = bubbles[bubble_id]
|
||||
original_text = originals.get(bubble_id, "")
|
||||
text = translations.get(bid, "").strip()
|
||||
if not text:
|
||||
print(f" ⚠️ Bubble #{bid:<3} — no translation found, left blank")
|
||||
missing += 1
|
||||
continue
|
||||
|
||||
status = draw_bubble(
|
||||
pil_img=img_pil,
|
||||
img_bgr_ref=img_bgr,
|
||||
bubble_data=bubble_data,
|
||||
original_text=original_text,
|
||||
translated_text=translated_text,
|
||||
font_candidates=font_candidates,
|
||||
font_color=font_color,
|
||||
stroke_color=stroke_color
|
||||
box_w = x2 - x1
|
||||
box_h = y2 - y1
|
||||
if box_w < 10 or box_h < 10:
|
||||
continue
|
||||
|
||||
# ── fit font + wrap ────────────────────────────────────────
|
||||
size, font, lines = fit_text(
|
||||
text, box_w, box_h, font_path, max_size=font_size
|
||||
)
|
||||
|
||||
if status.startswith("skip"):
|
||||
skipped += 1
|
||||
# ── colors ─────────────────────────────────────────────────
|
||||
if auto_color:
|
||||
bg = sample_bg_color(image_bgr, x1, y1, x2, y2)
|
||||
fg = pick_fg_color(bg)
|
||||
ol = (255, 255, 255) if fg == (0, 0, 0) else (0, 0, 0)
|
||||
else:
|
||||
rendered += 1
|
||||
fg, ol = (0, 0, 0), (255, 255, 255)
|
||||
|
||||
out_bgr = cv2.cvtColor(np.array(img_pil), cv2.COLOR_RGB2BGR)
|
||||
cv2.imwrite(output_image, out_bgr)
|
||||
# ── vertical center ────────────────────────────────────────
|
||||
line_h = size + 8
|
||||
total_h = line_h * len(lines)
|
||||
y_cur = y1 + max(4, (box_h - total_h) // 2)
|
||||
|
||||
print(f"✅ Done — {rendered} rendered, {skipped} skipped.")
|
||||
print(f"📄 Output → {output_image}")
|
||||
print("Guarantee: full yellow-square area is always white-cleaned before drawing text.")
|
||||
for line in lines:
|
||||
bb = safe_textbbox(draw, (0, 0), line, font)
|
||||
line_w = bb[2] - bb[0]
|
||||
x_cur = x1 + max(2, (box_w - line_w) // 2)
|
||||
|
||||
if bold_outline:
|
||||
for dx, dy in [(-1, 0), (1, 0), (0, -1), (0, 1)]:
|
||||
try:
|
||||
draw.text((x_cur + dx, y_cur + dy), line, font=font, fill=ol)
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
try:
|
||||
draw.text((x_cur, y_cur), line, font=font, fill=fg)
|
||||
except Exception as e:
|
||||
print(f" ❌ Draw error bubble #{bid}: {e}")
|
||||
|
||||
y_cur += line_h
|
||||
|
||||
print(f" ✅ Bubble #{bid:<3} — rendered ({len(lines)} lines, size {size}px)")
|
||||
rendered += 1
|
||||
|
||||
pil_img.save(output_path)
|
||||
|
||||
print()
|
||||
print(f"{'─'*50}")
|
||||
print(f" Rendered : {rendered}")
|
||||
print(f" Skipped : {skipped} (SKIP_BUBBLE_IDS)")
|
||||
print(f" No text : {missing} (not in output.txt)")
|
||||
print(f"{'─'*50}")
|
||||
print(f"✅ Saved → {output_path}")
|
||||
|
||||
return pil_img
|
||||
|
||||
|
||||
# ============================================================
|
||||
# MAIN
|
||||
# ============================================================
|
||||
def main():
|
||||
print(f"📖 Loading image : {IMAGE_PATH}")
|
||||
image = cv2.imread(IMAGE_PATH)
|
||||
if image is None:
|
||||
print(f"❌ Cannot load: {IMAGE_PATH}"); return
|
||||
|
||||
print(f"📦 Loading bubbles : {BUBBLES_PATH}")
|
||||
bubble_boxes, quads_per_bubble = parse_bubbles(BUBBLES_PATH)
|
||||
print(f" {len(bubble_boxes)} bubbles | "
|
||||
f"{sum(len(v) for v in quads_per_bubble.values())} quads total")
|
||||
|
||||
print(f"🌐 Loading translations : {TRANSLATIONS_PATH}")
|
||||
translations = parse_translations(TRANSLATIONS_PATH)
|
||||
print(f" {len(translations)} translations found")
|
||||
|
||||
if SKIP_BUBBLE_IDS:
|
||||
print(f"⏭️ Skip list : bubbles {sorted(SKIP_BUBBLE_IDS)}")
|
||||
else:
|
||||
print(f"⏭️ Skip list : (empty — all bubbles will be rendered)")
|
||||
|
||||
print("🔤 Resolving font...")
|
||||
font_path, _ = resolve_font()
|
||||
|
||||
print(f"🧹 Erasing original text (quad fill + pad={QUAD_PAD}px)...")
|
||||
clean_image = erase_quads(
|
||||
image,
|
||||
quads_per_bubble,
|
||||
translations = translations, # ← pass translations here
|
||||
skip_ids = SKIP_BUBBLE_IDS,
|
||||
pad = QUAD_PAD
|
||||
)
|
||||
|
||||
print("✍️ Rendering translated text...")
|
||||
render_translations(
|
||||
image_bgr = clean_image,
|
||||
bubble_boxes = bubble_boxes,
|
||||
translations = translations,
|
||||
skip_ids = SKIP_BUBBLE_IDS,
|
||||
font_path = font_path,
|
||||
font_size = FONT_SIZE,
|
||||
bold_outline = True,
|
||||
auto_color = True,
|
||||
output_path = OUTPUT_PATH
|
||||
)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
render_translations(
|
||||
input_image="001-page.png",
|
||||
output_image="page_translated.png",
|
||||
translations_file="output.txt",
|
||||
bubbles_file="bubbles.json",
|
||||
font_candidates=DEFAULT_FONT_CANDIDATES,
|
||||
font_color=DEFAULT_FONT_COLOR,
|
||||
stroke_color=DEFAULT_STROKE_COLOR
|
||||
)
|
||||
main()
|
||||
|
||||
Reference in New Issue
Block a user