Added new rendered

This commit is contained in:
Guillem Hernandez Sola
2026-04-21 18:53:34 +02:00
parent bd475d8f01
commit dfa52f54eb
2 changed files with 277 additions and 267 deletions

View File

@@ -3,13 +3,16 @@
"""
manga-renderer.py
Inputs: 001.jpg + bubbles.json + output.txt
Output: translated_page.png
Inputs: 001.jpg + bubbles.json + output_001.txt
Output: translated_page_001.png
Strategy:
1. For every bubble, white-fill all its OCR quads (erases original text cleanly)
2. Render the translated text centered inside the bubble bounding box
3. Bubbles in SKIP_BUBBLE_IDS are erased but NOT re-rendered (left blank)
1. For every bubble, white-fill all its OCR quads (erases original text cleanly).
2. Detect the original font size from the OCR bounding boxes.
3. Dynamically wrap and scale down the translated text if it exceeds the bubble dimensions.
4. Render the translated text centered inside the bubble bounding box.
5. Uses uniform line heights to prevent accent collisions.
6. Adds a white stroke to the text to cover any residual original characters.
"""
import json
@@ -17,43 +20,40 @@ import textwrap
import cv2
import numpy as np
from PIL import Image, ImageDraw, ImageFont
from typing import Dict, List, Tuple, Optional, Set
from typing import Dict, List, Tuple, Optional, Set, Any
# ============================================================
# CONFIG — edit these paths to match your setup
# ============================================================
IMAGE_PATH = "004.png"
IMAGE_PATH = "003.jpg"
BUBBLES_PATH = "bubbles.json"
TRANSLATIONS_PATH = "output_004.txt"
OUTPUT_PATH = "translated_page_004.png"
TRANSLATIONS_PATH = "output_003.txt"
OUTPUT_PATH = "translated_page_003.png"
# Font candidates — first one that loads wins
FONT_CANDIDATES = [
"fonts/ComicNeue-Bold.ttf",
# Mac fallbacks
"/System/Library/Fonts/Supplemental/Comic Sans MS Bold.ttf",
"/System/Library/Fonts/Supplemental/Arial Bold.ttf",
# Windows fallbacks
"C:\\Windows\\Fonts\\comicbd.ttf",
"C:\\Windows\\Fonts\\arialbd.ttf",
# Linux fallbacks
"/usr/share/fonts/truetype/liberation/LiberationSans-Bold.ttf",
]
FONT_SIZE = 24
MIN_FONT_SIZE = 18
QUAD_PAD = 4 # extra pixels added around each quad before white-fill
DEFAULT_FONT_SIZE = 24
MIN_FONT_SIZE = 12
QUAD_PAD = 4 # extra pixels added around each quad before white-fill
# ============================================================
# SKIP LIST
# ── Add any bubble IDs you do NOT want rendered here.
# ── The quads will still be erased (white-filled) but no
# ── translated text will be drawn inside them.
# ──
# ── Examples of why you'd skip a bubble:
# ── • Sound effects (BURP, BAM, POW …)
# ── • Untranslatable single characters
# ── • Bubbles with bad OCR you want to fix manually later
# ── • Narrator boxes you want to leave in the source language
# ============================================================
SKIP_BUBBLE_IDS: Set[int] = {
# 8, # BURP BURP — sound effect
# 2, # example: bad OCR, fix manually
# Add any bubble IDs you do NOT want rendered here.
}
# ============================================================
# FONT LOADER
# ============================================================
@@ -69,20 +69,17 @@ def load_font(path: str, size: int) -> Optional[ImageFont.FreeTypeFont]:
continue
return None
def resolve_font() -> Tuple[str, ImageFont.FreeTypeFont]:
"""Return (path, font) for the first working candidate."""
def resolve_font_path() -> str:
"""Return the path for the first working candidate."""
for candidate in FONT_CANDIDATES:
font = load_font(candidate, FONT_SIZE)
if font is not None:
if load_font(candidate, DEFAULT_FONT_SIZE) is not None:
print(f" ✅ Font: {candidate}")
return candidate, font
return candidate
print(" ⚠️ No TrueType font found — using Pillow bitmap fallback")
return "", ImageFont.load_default()
return ""
# ============================================================
# PARSE output.txt → {bid: translated_string}
# PARSERS
# ============================================================
def parse_translations(filepath: str) -> Dict[int, str]:
"""
@@ -107,41 +104,21 @@ def parse_translations(filepath: str) -> Dict[int, str]:
continue
return translations
# ============================================================
# PARSE bubbles.json → bubble_boxes, quads_per_bubble
# ============================================================
def parse_bubbles(filepath: str):
"""
Returns:
bubble_boxes : {bid: (x1, y1, x2, y2)}
quads_per_bubble : {bid: [ [[x,y],[x,y],[x,y],[x,y]], ... ]}
Returns the full JSON data.
"""
with open(filepath, "r", encoding="utf-8") as f:
data = json.load(f)
bubble_boxes = {}
quads_per_bubble = {}
for key, val in data.items():
bid = int(key)
x1 = val["x"]; y1 = val["y"]
x2 = x1 + val["w"]; y2 = y1 + val["h"]
bubble_boxes[bid] = (x1, y1, x2, y2)
quads_per_bubble[bid] = val.get("quads", [])
return bubble_boxes, quads_per_bubble
return data
# ============================================================
# ERASE — white-fill every OCR quad (with small padding)
# ============================================================
def erase_quads(
image_bgr,
quads_per_bubble: Dict[int, List],
translations: Dict[int, str], # ← NEW: only erase what we'll render
bubbles_data: Dict[str, dict],
translations: Dict[int, str],
skip_ids: Set[int],
pad: int = QUAD_PAD
):
@@ -149,7 +126,6 @@ def erase_quads(
White-fills OCR quads ONLY for bubbles that:
- have a translation in output.txt AND
- are NOT in SKIP_BUBBLE_IDS
Everything else is left completely untouched.
"""
ih, iw = image_bgr.shape[:2]
result = image_bgr.copy()
@@ -157,15 +133,11 @@ def erase_quads(
erased_count = 0
skipped_count = 0
for bid, quads in quads_per_bubble.items():
for bid_str, val in bubbles_data.items():
bid = int(bid_str)
quads = val.get("quads", [])
# ignore if explicitly skipped
if bid in skip_ids:
skipped_count += 1
continue
# ignore if no translation exists (deleted from output.txt)
if bid not in translations:
if bid in skip_ids or bid not in translations:
skipped_count += 1
continue
@@ -186,227 +158,206 @@ def erase_quads(
print(f" Ignored: {skipped_count} bubbles (no translation or in skip list)")
return result
# ============================================================
# FONT SIZING + TEXT WRAP
# DYNAMIC TEXT FITTING
# ============================================================
def fit_text(
text: str,
box_w: int,
box_h: int,
font_path: str,
max_size: int = FONT_SIZE,
min_size: int = MIN_FONT_SIZE
) -> Tuple[int, ImageFont.FreeTypeFont, List[str]]:
def get_original_font_size(bubble_data: dict, fallback_size: int = DEFAULT_FONT_SIZE) -> int:
"""Calculates the original font size based on the OCR bounding boxes."""
line_bboxes = bubble_data.get("line_bboxes", [])
if not line_bboxes:
return fallback_size
heights = [box["h"] for box in line_bboxes]
median_h = int(np.median(heights))
estimated_size = int(median_h * 0.85)
return max(MIN_FONT_SIZE, min(estimated_size, 60))
def fit_text_dynamically(
text: str,
font_path: str,
max_w: int,
max_h: int,
target_font_size: int
) -> Tuple[List[str], Any, int, int]:
"""
Returns (fitted_size, font, wrapped_lines) — largest size where
the text block fits inside box_w × box_h.
Wraps text and scales down font size if it exceeds the bubble dimensions.
Returns: (wrapped_lines, font_object, line_spacing, final_font_size)
"""
for size in range(max_size, min_size - 1, -1):
font = load_font(font_path, size) if font_path else None
if font is None:
return min_size, ImageFont.load_default(), [text]
chars_per_line = max(1, int(box_w / (size * 0.62)))
wrapped = textwrap.fill(text, width=chars_per_line)
lines = wrapped.split("\n")
total_h = (size + 8) * len(lines)
if total_h <= box_h - 8:
return size, font, lines
# Nothing fit — use minimum size
font = load_font(font_path, min_size) if font_path else None
if font is None:
font_size = target_font_size
if not font_path:
font = ImageFont.load_default()
chars_per_line = max(1, int(box_w / (min_size * 0.62)))
lines = textwrap.fill(text, width=chars_per_line).split("\n")
return min_size, font, lines
# ============================================================
# COLOR HELPERS
# ============================================================
def sample_bg_color(
image_bgr,
x1: int, y1: int,
x2: int, y2: int
) -> Tuple[int, int, int]:
"""Sample four corners of a bubble to estimate background color (R, G, B)."""
ih, iw = image_bgr.shape[:2]
samples = []
for sx, sy in [(x1+4, y1+4), (x2-4, y1+4), (x1+4, y2-4), (x2-4, y2-4)]:
sx = max(0, min(iw-1, sx)); sy = max(0, min(ih-1, sy))
b, g, r = image_bgr[sy, sx]
samples.append((int(r), int(g), int(b)))
return (
int(np.median([s[0] for s in samples])),
int(np.median([s[1] for s in samples])),
int(np.median([s[2] for s in samples])),
)
def pick_fg_color(bg: Tuple[int, int, int]) -> Tuple[int, int, int]:
lum = 0.299 * bg[0] + 0.587 * bg[1] + 0.114 * bg[2]
return (0, 0, 0) if lum > 128 else (255, 255, 255)
def safe_textbbox(
draw, pos, text, font
) -> Tuple[int, int, int, int]:
try:
return draw.textbbox(pos, text, font=font)
except Exception:
size = getattr(font, "size", 12)
return (
pos[0], pos[1],
pos[0] + int(len(text) * size * 0.6),
pos[1] + int(size * 1.2)
)
char_w = 6
chars_per_line = max(1, int(max_w / char_w))
wrapped_lines = textwrap.wrap(text, width=chars_per_line)
return wrapped_lines, font, 4, 10
while font_size >= MIN_FONT_SIZE:
font = load_font(font_path, font_size)
if font is None:
font = ImageFont.load_default()
return [text], font, 4, 10
char_bbox = font.getbbox("A")
char_w = (char_bbox[2] - char_bbox[0]) or 10
chars_per_line = max(1, int((max_w * 0.95) / char_w))
wrapped_lines = textwrap.wrap(text, width=chars_per_line)
# Use uniform font metrics for height instead of per-line bounding boxes
line_spacing = max(2, int(font_size * 0.15))
if hasattr(font, 'getmetrics'):
ascent, descent = font.getmetrics()
line_h = ascent + descent
else:
line_h = font_size
total_h = (line_h * len(wrapped_lines)) + (line_spacing * max(0, len(wrapped_lines) - 1))
max_line_w = 0
for line in wrapped_lines:
bbox = font.getbbox(line)
lw = bbox[2] - bbox[0]
max_line_w = max(max_line_w, lw)
if max_line_w <= max_w and total_h <= max_h:
return wrapped_lines, font, line_spacing, font_size
font_size -= 2
font = load_font(font_path, MIN_FONT_SIZE) or ImageFont.load_default()
char_bbox = font.getbbox("A") if hasattr(font, 'getbbox') else (0,0,6,10)
char_w = (char_bbox[2] - char_bbox[0]) or 6
chars_per_line = max(1, int(max_w / char_w))
wrapped_lines = textwrap.wrap(text, width=chars_per_line)
return wrapped_lines, font, max(2, int(MIN_FONT_SIZE * 0.15)), MIN_FONT_SIZE
# ============================================================
# RENDER
# ============================================================
def render_translations(
def render_text(
image_bgr,
bubble_boxes: Dict[int, Tuple],
bubbles_data: Dict[str, dict],
translations: Dict[int, str],
skip_ids: Set[int],
font_path: str,
font_size: int = FONT_SIZE,
bold_outline: bool = True,
auto_color: bool = True,
output_path: str = OUTPUT_PATH
skip_ids: Set[int]
):
"""
Draws the translated text centered in the line_union_bbox of each bubble.
Adds a white stroke (outline) to cover any residual original characters.
"""
image_rgb = cv2.cvtColor(image_bgr, cv2.COLOR_BGR2RGB)
pil_img = Image.fromarray(image_rgb)
draw = ImageDraw.Draw(pil_img)
pil_img = Image.fromarray(image_rgb)
draw = ImageDraw.Draw(pil_img)
rendered = 0
skipped = 0
missing = 0
rendered_count = 0
for bid, (x1, y1, x2, y2) in sorted(bubble_boxes.items()):
for bid_str, val in bubbles_data.items():
bid = int(bid_str)
# ── skip list check ────────────────────────────────────────
if bid in skip_ids:
print(f" ⏭️ Bubble #{bid:<3} — skipped (in SKIP_BUBBLE_IDS)")
skipped += 1
if bid in skip_ids or bid not in translations:
continue
text = translations.get(bid, "").strip()
if not text:
print(f" ⚠️ Bubble #{bid:<3} — no translation found, left blank")
missing += 1
continue
box_w = x2 - x1
box_h = y2 - y1
if box_w < 10 or box_h < 10:
continue
# ── fit font + wrap ────────────────────────────────────────
size, font, lines = fit_text(
text, box_w, box_h, font_path, max_size=font_size
)
# ── colors ─────────────────────────────────────────────────
if auto_color:
bg = sample_bg_color(image_bgr, x1, y1, x2, y2)
fg = pick_fg_color(bg)
ol = (255, 255, 255) if fg == (0, 0, 0) else (0, 0, 0)
text = translations[bid]
union_box = val.get("line_union_bbox")
if not union_box:
union_box = val.get("text_bbox")
if not union_box:
continue
bx, by, bw, bh = union_box["x"], union_box["y"], union_box["w"], union_box["h"]
pad_x = int(bw * 0.1)
pad_y = int(bh * 0.1)
bx -= pad_x // 2
by -= pad_y // 2
bw += pad_x
bh += pad_y
target_size = get_original_font_size(val)
wrapped_lines, font, line_spacing, final_size = fit_text_dynamically(text, font_path, bw, bh, target_size)
# Use uniform typographic line height for rendering
if hasattr(font, 'getmetrics'):
ascent, descent = font.getmetrics()
line_h = ascent + descent
else:
fg, ol = (0, 0, 0), (255, 255, 255)
line_h = final_size
total_text_height = (line_h * len(wrapped_lines)) + (line_spacing * max(0, len(wrapped_lines) - 1))
current_y = by + (bh - total_text_height) // 2
outline_thickness = max(2, int(final_size * 0.10))
# ── vertical center ────────────────────────────────────────
line_h = size + 8
total_h = line_h * len(lines)
y_cur = y1 + max(4, (box_h - total_h) // 2)
for i, line in enumerate(wrapped_lines):
if hasattr(font, 'getbbox'):
bbox = font.getbbox(line)
lw = bbox[2] - bbox[0]
else:
lw = len(line) * 6
current_x = bx + (bw - lw) // 2
draw.text(
(current_x, current_y),
line,
fill=(0, 0, 0),
font=font,
stroke_width=outline_thickness,
stroke_fill=(255, 255, 255)
)
# Advance Y by the uniform line height + spacing
current_y += line_h + line_spacing
for line in lines:
bb = safe_textbbox(draw, (0, 0), line, font)
line_w = bb[2] - bb[0]
x_cur = x1 + max(2, (box_w - line_w) // 2)
if bold_outline:
for dx, dy in [(-1, 0), (1, 0), (0, -1), (0, 1)]:
try:
draw.text((x_cur + dx, y_cur + dy), line, font=font, fill=ol)
except Exception:
pass
try:
draw.text((x_cur, y_cur), line, font=font, fill=fg)
except Exception as e:
print(f" ❌ Draw error bubble #{bid}: {e}")
y_cur += line_h
print(f" ✅ Bubble #{bid:<3} — rendered ({len(lines)} lines, size {size}px)")
rendered += 1
pil_img.save(output_path)
print()
print(f"{''*50}")
print(f" Rendered : {rendered}")
print(f" Skipped : {skipped} (SKIP_BUBBLE_IDS)")
print(f" No text : {missing} (not in output.txt)")
print(f"{''*50}")
print(f"✅ Saved → {output_path}")
return pil_img
rendered_count += 1
print(f" Rendered: {rendered_count} bubbles (with uniform line spacing & outlines)")
return cv2.cvtColor(np.array(pil_img), cv2.COLOR_RGB2BGR)
# ============================================================
# MAIN
# ============================================================
def main():
print(f"📖 Loading image : {IMAGE_PATH}")
image = cv2.imread(IMAGE_PATH)
if image is None:
print(f"Cannot load: {IMAGE_PATH}"); return
print(f"Loading image: {IMAGE_PATH}")
image_bgr = cv2.imread(IMAGE_PATH)
if image_bgr is None:
print(f"Error: Could not load {IMAGE_PATH}")
return
print(f"📦 Loading bubbles : {BUBBLES_PATH}")
bubble_boxes, quads_per_bubble = parse_bubbles(BUBBLES_PATH)
print(f" {len(bubble_boxes)} bubbles | "
f"{sum(len(v) for v in quads_per_bubble.values())} quads total")
print(f"🌐 Loading translations : {TRANSLATIONS_PATH}")
print(f"Loading translations: {TRANSLATIONS_PATH}")
translations = parse_translations(TRANSLATIONS_PATH)
print(f" {len(translations)} translations found")
print(f"Loading bubble data: {BUBBLES_PATH}")
bubbles_data = parse_bubbles(BUBBLES_PATH)
if SKIP_BUBBLE_IDS:
print(f"⏭️ Skip list : bubbles {sorted(SKIP_BUBBLE_IDS)}")
else:
print(f"⏭️ Skip list : (empty — all bubbles will be rendered)")
print("Resolving font...")
font_path = resolve_font_path()
print("🔤 Resolving font...")
font_path, _ = resolve_font()
print(f"🧹 Erasing original text (quad fill + pad={QUAD_PAD}px)...")
clean_image = erase_quads(
image,
quads_per_bubble,
translations = translations, # ← pass translations here
skip_ids = SKIP_BUBBLE_IDS,
pad = QUAD_PAD
print("\n--- Step 1: Erasing original text ---")
erased_bgr = erase_quads(
image_bgr=image_bgr,
bubbles_data=bubbles_data,
translations=translations,
skip_ids=SKIP_BUBBLE_IDS,
pad=QUAD_PAD
)
print("✍️ Rendering translated text...")
render_translations(
image_bgr = clean_image,
bubble_boxes = bubble_boxes,
translations = translations,
skip_ids = SKIP_BUBBLE_IDS,
font_path = font_path,
font_size = FONT_SIZE,
bold_outline = True,
auto_color = True,
output_path = OUTPUT_PATH
print("\n--- Step 2: Rendering translated text ---")
final_bgr = render_text(
image_bgr=erased_bgr,
bubbles_data=bubbles_data,
translations=translations,
font_path=font_path,
skip_ids=SKIP_BUBBLE_IDS
)
print(f"\nSaving final image to: {OUTPUT_PATH}")
cv2.imwrite(OUTPUT_PATH, final_bgr)
print("✅ Done!")
if __name__ == "__main__":
main()
main()