413 lines
14 KiB
Python
413 lines
14 KiB
Python
#!/usr/bin/env python3
|
||
# -*- coding: utf-8 -*-
|
||
"""
|
||
manga-renderer.py
|
||
|
||
Inputs: 001.jpg + bubbles.json + output.txt
|
||
Output: translated_page.png
|
||
|
||
Strategy:
|
||
1. For every bubble, white-fill all its OCR quads (erases original text cleanly)
|
||
2. Render the translated text centered inside the bubble bounding box
|
||
3. Bubbles in SKIP_BUBBLE_IDS are erased but NOT re-rendered (left blank)
|
||
"""
|
||
|
||
import json
|
||
import textwrap
|
||
import cv2
|
||
import numpy as np
|
||
from PIL import Image, ImageDraw, ImageFont
|
||
from typing import Dict, List, Tuple, Optional, Set
|
||
|
||
# ============================================================
|
||
# CONFIG — edit these paths to match your setup
|
||
# ============================================================
|
||
IMAGE_PATH = "004.png"
|
||
BUBBLES_PATH = "bubbles.json"
|
||
TRANSLATIONS_PATH = "output_004.txt"
|
||
OUTPUT_PATH = "translated_page_004.png"
|
||
|
||
# Font candidates — first one that loads wins
|
||
FONT_CANDIDATES = [
|
||
"fonts/ComicNeue-Bold.ttf",
|
||
]
|
||
|
||
FONT_SIZE = 24
|
||
MIN_FONT_SIZE = 18
|
||
QUAD_PAD = 4 # extra pixels added around each quad before white-fill
|
||
|
||
# ============================================================
|
||
# SKIP LIST
|
||
# ── Add any bubble IDs you do NOT want rendered here.
|
||
# ── The quads will still be erased (white-filled) but no
|
||
# ── translated text will be drawn inside them.
|
||
# ──
|
||
# ── Examples of why you'd skip a bubble:
|
||
# ── • Sound effects (BURP, BAM, POW …)
|
||
# ── • Untranslatable single characters
|
||
# ── • Bubbles with bad OCR you want to fix manually later
|
||
# ── • Narrator boxes you want to leave in the source language
|
||
# ============================================================
|
||
SKIP_BUBBLE_IDS: Set[int] = {
|
||
# 8, # BURP BURP — sound effect
|
||
# 2, # example: bad OCR, fix manually
|
||
}
|
||
|
||
|
||
# ============================================================
|
||
# FONT LOADER
|
||
# ============================================================
|
||
def load_font(path: str, size: int) -> Optional[ImageFont.FreeTypeFont]:
|
||
"""Try every face index in a .ttc collection. Validate with getbbox."""
|
||
indices = range(4) if path.lower().endswith(".ttc") else [0]
|
||
for idx in indices:
|
||
try:
|
||
font = ImageFont.truetype(path, size, index=idx)
|
||
font.getbbox("A") # raises if face metrics are broken
|
||
return font
|
||
except Exception:
|
||
continue
|
||
return None
|
||
|
||
|
||
def resolve_font() -> Tuple[str, ImageFont.FreeTypeFont]:
|
||
"""Return (path, font) for the first working candidate."""
|
||
for candidate in FONT_CANDIDATES:
|
||
font = load_font(candidate, FONT_SIZE)
|
||
if font is not None:
|
||
print(f" ✅ Font: {candidate}")
|
||
return candidate, font
|
||
print(" ⚠️ No TrueType font found — using Pillow bitmap fallback")
|
||
return "", ImageFont.load_default()
|
||
|
||
|
||
# ============================================================
|
||
# PARSE output.txt → {bid: translated_string}
|
||
# ============================================================
|
||
def parse_translations(filepath: str) -> Dict[int, str]:
|
||
"""
|
||
Reads output.txt and returns {bubble_id: translated_text}.
|
||
Lines look like: #2|1|vision-base|ORIGINAL|TRANSLATED|FLAGS
|
||
"""
|
||
translations = {}
|
||
with open(filepath, "r", encoding="utf-8") as f:
|
||
for line in f:
|
||
line = line.strip()
|
||
if not line.startswith("#"):
|
||
continue
|
||
parts = line.split("|")
|
||
if len(parts) < 5:
|
||
continue
|
||
try:
|
||
bid = int(parts[0].lstrip("#"))
|
||
translated = parts[4].strip()
|
||
if translated and translated != "-":
|
||
translations[bid] = translated
|
||
except ValueError:
|
||
continue
|
||
return translations
|
||
|
||
|
||
# ============================================================
|
||
# PARSE bubbles.json → bubble_boxes, quads_per_bubble
|
||
# ============================================================
|
||
def parse_bubbles(filepath: str):
|
||
"""
|
||
Returns:
|
||
bubble_boxes : {bid: (x1, y1, x2, y2)}
|
||
quads_per_bubble : {bid: [ [[x,y],[x,y],[x,y],[x,y]], ... ]}
|
||
"""
|
||
with open(filepath, "r", encoding="utf-8") as f:
|
||
data = json.load(f)
|
||
|
||
bubble_boxes = {}
|
||
quads_per_bubble = {}
|
||
|
||
for key, val in data.items():
|
||
bid = int(key)
|
||
|
||
x1 = val["x"]; y1 = val["y"]
|
||
x2 = x1 + val["w"]; y2 = y1 + val["h"]
|
||
bubble_boxes[bid] = (x1, y1, x2, y2)
|
||
|
||
quads_per_bubble[bid] = val.get("quads", [])
|
||
|
||
return bubble_boxes, quads_per_bubble
|
||
|
||
|
||
# ============================================================
|
||
# ERASE — white-fill every OCR quad (with small padding)
|
||
# ============================================================
|
||
def erase_quads(
|
||
image_bgr,
|
||
quads_per_bubble: Dict[int, List],
|
||
translations: Dict[int, str], # ← NEW: only erase what we'll render
|
||
skip_ids: Set[int],
|
||
pad: int = QUAD_PAD
|
||
):
|
||
"""
|
||
White-fills OCR quads ONLY for bubbles that:
|
||
- have a translation in output.txt AND
|
||
- are NOT in SKIP_BUBBLE_IDS
|
||
Everything else is left completely untouched.
|
||
"""
|
||
ih, iw = image_bgr.shape[:2]
|
||
result = image_bgr.copy()
|
||
|
||
erased_count = 0
|
||
skipped_count = 0
|
||
|
||
for bid, quads in quads_per_bubble.items():
|
||
|
||
# ignore if explicitly skipped
|
||
if bid in skip_ids:
|
||
skipped_count += 1
|
||
continue
|
||
|
||
# ignore if no translation exists (deleted from output.txt)
|
||
if bid not in translations:
|
||
skipped_count += 1
|
||
continue
|
||
|
||
for quad in quads:
|
||
pts = np.array(quad, dtype=np.int32)
|
||
cv2.fillPoly(result, [pts], (255, 255, 255))
|
||
|
||
xs = [p[0] for p in quad]; ys = [p[1] for p in quad]
|
||
x1 = max(0, min(xs) - pad)
|
||
y1 = max(0, min(ys) - pad)
|
||
x2 = min(iw - 1, max(xs) + pad)
|
||
y2 = min(ih - 1, max(ys) + pad)
|
||
cv2.rectangle(result, (x1, y1), (x2, y2), (255, 255, 255), -1)
|
||
|
||
erased_count += 1
|
||
|
||
print(f" Erased : {erased_count} bubbles")
|
||
print(f" Ignored: {skipped_count} bubbles (no translation or in skip list)")
|
||
return result
|
||
|
||
|
||
# ============================================================
|
||
# FONT SIZING + TEXT WRAP
|
||
# ============================================================
|
||
def fit_text(
|
||
text: str,
|
||
box_w: int,
|
||
box_h: int,
|
||
font_path: str,
|
||
max_size: int = FONT_SIZE,
|
||
min_size: int = MIN_FONT_SIZE
|
||
) -> Tuple[int, ImageFont.FreeTypeFont, List[str]]:
|
||
"""
|
||
Returns (fitted_size, font, wrapped_lines) — largest size where
|
||
the text block fits inside box_w × box_h.
|
||
"""
|
||
for size in range(max_size, min_size - 1, -1):
|
||
font = load_font(font_path, size) if font_path else None
|
||
if font is None:
|
||
return min_size, ImageFont.load_default(), [text]
|
||
|
||
chars_per_line = max(1, int(box_w / (size * 0.62)))
|
||
wrapped = textwrap.fill(text, width=chars_per_line)
|
||
lines = wrapped.split("\n")
|
||
total_h = (size + 8) * len(lines)
|
||
|
||
if total_h <= box_h - 8:
|
||
return size, font, lines
|
||
|
||
# Nothing fit — use minimum size
|
||
font = load_font(font_path, min_size) if font_path else None
|
||
if font is None:
|
||
font = ImageFont.load_default()
|
||
chars_per_line = max(1, int(box_w / (min_size * 0.62)))
|
||
lines = textwrap.fill(text, width=chars_per_line).split("\n")
|
||
return min_size, font, lines
|
||
|
||
|
||
# ============================================================
|
||
# COLOR HELPERS
|
||
# ============================================================
|
||
def sample_bg_color(
|
||
image_bgr,
|
||
x1: int, y1: int,
|
||
x2: int, y2: int
|
||
) -> Tuple[int, int, int]:
|
||
"""Sample four corners of a bubble to estimate background color (R, G, B)."""
|
||
ih, iw = image_bgr.shape[:2]
|
||
samples = []
|
||
for sx, sy in [(x1+4, y1+4), (x2-4, y1+4), (x1+4, y2-4), (x2-4, y2-4)]:
|
||
sx = max(0, min(iw-1, sx)); sy = max(0, min(ih-1, sy))
|
||
b, g, r = image_bgr[sy, sx]
|
||
samples.append((int(r), int(g), int(b)))
|
||
return (
|
||
int(np.median([s[0] for s in samples])),
|
||
int(np.median([s[1] for s in samples])),
|
||
int(np.median([s[2] for s in samples])),
|
||
)
|
||
|
||
|
||
def pick_fg_color(bg: Tuple[int, int, int]) -> Tuple[int, int, int]:
|
||
lum = 0.299 * bg[0] + 0.587 * bg[1] + 0.114 * bg[2]
|
||
return (0, 0, 0) if lum > 128 else (255, 255, 255)
|
||
|
||
|
||
def safe_textbbox(
|
||
draw, pos, text, font
|
||
) -> Tuple[int, int, int, int]:
|
||
try:
|
||
return draw.textbbox(pos, text, font=font)
|
||
except Exception:
|
||
size = getattr(font, "size", 12)
|
||
return (
|
||
pos[0], pos[1],
|
||
pos[0] + int(len(text) * size * 0.6),
|
||
pos[1] + int(size * 1.2)
|
||
)
|
||
|
||
|
||
# ============================================================
|
||
# RENDER
|
||
# ============================================================
|
||
def render_translations(
|
||
image_bgr,
|
||
bubble_boxes: Dict[int, Tuple],
|
||
translations: Dict[int, str],
|
||
skip_ids: Set[int],
|
||
font_path: str,
|
||
font_size: int = FONT_SIZE,
|
||
bold_outline: bool = True,
|
||
auto_color: bool = True,
|
||
output_path: str = OUTPUT_PATH
|
||
):
|
||
image_rgb = cv2.cvtColor(image_bgr, cv2.COLOR_BGR2RGB)
|
||
pil_img = Image.fromarray(image_rgb)
|
||
draw = ImageDraw.Draw(pil_img)
|
||
|
||
rendered = 0
|
||
skipped = 0
|
||
missing = 0
|
||
|
||
for bid, (x1, y1, x2, y2) in sorted(bubble_boxes.items()):
|
||
|
||
# ── skip list check ────────────────────────────────────────
|
||
if bid in skip_ids:
|
||
print(f" ⏭️ Bubble #{bid:<3} — skipped (in SKIP_BUBBLE_IDS)")
|
||
skipped += 1
|
||
continue
|
||
|
||
text = translations.get(bid, "").strip()
|
||
if not text:
|
||
print(f" ⚠️ Bubble #{bid:<3} — no translation found, left blank")
|
||
missing += 1
|
||
continue
|
||
|
||
box_w = x2 - x1
|
||
box_h = y2 - y1
|
||
if box_w < 10 or box_h < 10:
|
||
continue
|
||
|
||
# ── fit font + wrap ────────────────────────────────────────
|
||
size, font, lines = fit_text(
|
||
text, box_w, box_h, font_path, max_size=font_size
|
||
)
|
||
|
||
# ── colors ─────────────────────────────────────────────────
|
||
if auto_color:
|
||
bg = sample_bg_color(image_bgr, x1, y1, x2, y2)
|
||
fg = pick_fg_color(bg)
|
||
ol = (255, 255, 255) if fg == (0, 0, 0) else (0, 0, 0)
|
||
else:
|
||
fg, ol = (0, 0, 0), (255, 255, 255)
|
||
|
||
# ── vertical center ────────────────────────────────────────
|
||
line_h = size + 8
|
||
total_h = line_h * len(lines)
|
||
y_cur = y1 + max(4, (box_h - total_h) // 2)
|
||
|
||
for line in lines:
|
||
bb = safe_textbbox(draw, (0, 0), line, font)
|
||
line_w = bb[2] - bb[0]
|
||
x_cur = x1 + max(2, (box_w - line_w) // 2)
|
||
|
||
if bold_outline:
|
||
for dx, dy in [(-1, 0), (1, 0), (0, -1), (0, 1)]:
|
||
try:
|
||
draw.text((x_cur + dx, y_cur + dy), line, font=font, fill=ol)
|
||
except Exception:
|
||
pass
|
||
|
||
try:
|
||
draw.text((x_cur, y_cur), line, font=font, fill=fg)
|
||
except Exception as e:
|
||
print(f" ❌ Draw error bubble #{bid}: {e}")
|
||
|
||
y_cur += line_h
|
||
|
||
print(f" ✅ Bubble #{bid:<3} — rendered ({len(lines)} lines, size {size}px)")
|
||
rendered += 1
|
||
|
||
pil_img.save(output_path)
|
||
|
||
print()
|
||
print(f"{'─'*50}")
|
||
print(f" Rendered : {rendered}")
|
||
print(f" Skipped : {skipped} (SKIP_BUBBLE_IDS)")
|
||
print(f" No text : {missing} (not in output.txt)")
|
||
print(f"{'─'*50}")
|
||
print(f"✅ Saved → {output_path}")
|
||
|
||
return pil_img
|
||
|
||
|
||
# ============================================================
|
||
# MAIN
|
||
# ============================================================
|
||
def main():
|
||
print(f"📖 Loading image : {IMAGE_PATH}")
|
||
image = cv2.imread(IMAGE_PATH)
|
||
if image is None:
|
||
print(f"❌ Cannot load: {IMAGE_PATH}"); return
|
||
|
||
print(f"📦 Loading bubbles : {BUBBLES_PATH}")
|
||
bubble_boxes, quads_per_bubble = parse_bubbles(BUBBLES_PATH)
|
||
print(f" {len(bubble_boxes)} bubbles | "
|
||
f"{sum(len(v) for v in quads_per_bubble.values())} quads total")
|
||
|
||
print(f"🌐 Loading translations : {TRANSLATIONS_PATH}")
|
||
translations = parse_translations(TRANSLATIONS_PATH)
|
||
print(f" {len(translations)} translations found")
|
||
|
||
if SKIP_BUBBLE_IDS:
|
||
print(f"⏭️ Skip list : bubbles {sorted(SKIP_BUBBLE_IDS)}")
|
||
else:
|
||
print(f"⏭️ Skip list : (empty — all bubbles will be rendered)")
|
||
|
||
print("🔤 Resolving font...")
|
||
font_path, _ = resolve_font()
|
||
|
||
print(f"🧹 Erasing original text (quad fill + pad={QUAD_PAD}px)...")
|
||
clean_image = erase_quads(
|
||
image,
|
||
quads_per_bubble,
|
||
translations = translations, # ← pass translations here
|
||
skip_ids = SKIP_BUBBLE_IDS,
|
||
pad = QUAD_PAD
|
||
)
|
||
|
||
print("✍️ Rendering translated text...")
|
||
render_translations(
|
||
image_bgr = clean_image,
|
||
bubble_boxes = bubble_boxes,
|
||
translations = translations,
|
||
skip_ids = SKIP_BUBBLE_IDS,
|
||
font_path = font_path,
|
||
font_size = FONT_SIZE,
|
||
bold_outline = True,
|
||
auto_color = True,
|
||
output_path = OUTPUT_PATH
|
||
)
|
||
|
||
|
||
if __name__ == "__main__":
|
||
main()
|