Files
manga-translator/manga-renderer.py
Guillem Hernandez Sola 5aa79d986a First beta 2
2026-04-15 21:41:01 +02:00

413 lines
14 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
manga-renderer.py
Inputs: 001.jpg + bubbles.json + output.txt
Output: translated_page.png
Strategy:
1. For every bubble, white-fill all its OCR quads (erases original text cleanly)
2. Render the translated text centered inside the bubble bounding box
3. Bubbles in SKIP_BUBBLE_IDS are erased but NOT re-rendered (left blank)
"""
import json
import textwrap
import cv2
import numpy as np
from PIL import Image, ImageDraw, ImageFont
from typing import Dict, List, Tuple, Optional, Set
# ============================================================
# CONFIG — edit these paths to match your setup
# ============================================================
IMAGE_PATH = "004.png"
BUBBLES_PATH = "bubbles.json"
TRANSLATIONS_PATH = "output_004.txt"
OUTPUT_PATH = "translated_page_004.png"
# Font candidates — first one that loads wins
FONT_CANDIDATES = [
"fonts/ComicNeue-Bold.ttf",
]
FONT_SIZE = 24
MIN_FONT_SIZE = 18
QUAD_PAD = 4 # extra pixels added around each quad before white-fill
# ============================================================
# SKIP LIST
# ── Add any bubble IDs you do NOT want rendered here.
# ── The quads will still be erased (white-filled) but no
# ── translated text will be drawn inside them.
# ──
# ── Examples of why you'd skip a bubble:
# ── • Sound effects (BURP, BAM, POW …)
# ── • Untranslatable single characters
# ── • Bubbles with bad OCR you want to fix manually later
# ── • Narrator boxes you want to leave in the source language
# ============================================================
SKIP_BUBBLE_IDS: Set[int] = {
# 8, # BURP BURP — sound effect
# 2, # example: bad OCR, fix manually
}
# ============================================================
# FONT LOADER
# ============================================================
def load_font(path: str, size: int) -> Optional[ImageFont.FreeTypeFont]:
"""Try every face index in a .ttc collection. Validate with getbbox."""
indices = range(4) if path.lower().endswith(".ttc") else [0]
for idx in indices:
try:
font = ImageFont.truetype(path, size, index=idx)
font.getbbox("A") # raises if face metrics are broken
return font
except Exception:
continue
return None
def resolve_font() -> Tuple[str, ImageFont.FreeTypeFont]:
"""Return (path, font) for the first working candidate."""
for candidate in FONT_CANDIDATES:
font = load_font(candidate, FONT_SIZE)
if font is not None:
print(f" ✅ Font: {candidate}")
return candidate, font
print(" ⚠️ No TrueType font found — using Pillow bitmap fallback")
return "", ImageFont.load_default()
# ============================================================
# PARSE output.txt → {bid: translated_string}
# ============================================================
def parse_translations(filepath: str) -> Dict[int, str]:
"""
Reads output.txt and returns {bubble_id: translated_text}.
Lines look like: #2|1|vision-base|ORIGINAL|TRANSLATED|FLAGS
"""
translations = {}
with open(filepath, "r", encoding="utf-8") as f:
for line in f:
line = line.strip()
if not line.startswith("#"):
continue
parts = line.split("|")
if len(parts) < 5:
continue
try:
bid = int(parts[0].lstrip("#"))
translated = parts[4].strip()
if translated and translated != "-":
translations[bid] = translated
except ValueError:
continue
return translations
# ============================================================
# PARSE bubbles.json → bubble_boxes, quads_per_bubble
# ============================================================
def parse_bubbles(filepath: str):
"""
Returns:
bubble_boxes : {bid: (x1, y1, x2, y2)}
quads_per_bubble : {bid: [ [[x,y],[x,y],[x,y],[x,y]], ... ]}
"""
with open(filepath, "r", encoding="utf-8") as f:
data = json.load(f)
bubble_boxes = {}
quads_per_bubble = {}
for key, val in data.items():
bid = int(key)
x1 = val["x"]; y1 = val["y"]
x2 = x1 + val["w"]; y2 = y1 + val["h"]
bubble_boxes[bid] = (x1, y1, x2, y2)
quads_per_bubble[bid] = val.get("quads", [])
return bubble_boxes, quads_per_bubble
# ============================================================
# ERASE — white-fill every OCR quad (with small padding)
# ============================================================
def erase_quads(
image_bgr,
quads_per_bubble: Dict[int, List],
translations: Dict[int, str], # ← NEW: only erase what we'll render
skip_ids: Set[int],
pad: int = QUAD_PAD
):
"""
White-fills OCR quads ONLY for bubbles that:
- have a translation in output.txt AND
- are NOT in SKIP_BUBBLE_IDS
Everything else is left completely untouched.
"""
ih, iw = image_bgr.shape[:2]
result = image_bgr.copy()
erased_count = 0
skipped_count = 0
for bid, quads in quads_per_bubble.items():
# ignore if explicitly skipped
if bid in skip_ids:
skipped_count += 1
continue
# ignore if no translation exists (deleted from output.txt)
if bid not in translations:
skipped_count += 1
continue
for quad in quads:
pts = np.array(quad, dtype=np.int32)
cv2.fillPoly(result, [pts], (255, 255, 255))
xs = [p[0] for p in quad]; ys = [p[1] for p in quad]
x1 = max(0, min(xs) - pad)
y1 = max(0, min(ys) - pad)
x2 = min(iw - 1, max(xs) + pad)
y2 = min(ih - 1, max(ys) + pad)
cv2.rectangle(result, (x1, y1), (x2, y2), (255, 255, 255), -1)
erased_count += 1
print(f" Erased : {erased_count} bubbles")
print(f" Ignored: {skipped_count} bubbles (no translation or in skip list)")
return result
# ============================================================
# FONT SIZING + TEXT WRAP
# ============================================================
def fit_text(
text: str,
box_w: int,
box_h: int,
font_path: str,
max_size: int = FONT_SIZE,
min_size: int = MIN_FONT_SIZE
) -> Tuple[int, ImageFont.FreeTypeFont, List[str]]:
"""
Returns (fitted_size, font, wrapped_lines) — largest size where
the text block fits inside box_w × box_h.
"""
for size in range(max_size, min_size - 1, -1):
font = load_font(font_path, size) if font_path else None
if font is None:
return min_size, ImageFont.load_default(), [text]
chars_per_line = max(1, int(box_w / (size * 0.62)))
wrapped = textwrap.fill(text, width=chars_per_line)
lines = wrapped.split("\n")
total_h = (size + 8) * len(lines)
if total_h <= box_h - 8:
return size, font, lines
# Nothing fit — use minimum size
font = load_font(font_path, min_size) if font_path else None
if font is None:
font = ImageFont.load_default()
chars_per_line = max(1, int(box_w / (min_size * 0.62)))
lines = textwrap.fill(text, width=chars_per_line).split("\n")
return min_size, font, lines
# ============================================================
# COLOR HELPERS
# ============================================================
def sample_bg_color(
image_bgr,
x1: int, y1: int,
x2: int, y2: int
) -> Tuple[int, int, int]:
"""Sample four corners of a bubble to estimate background color (R, G, B)."""
ih, iw = image_bgr.shape[:2]
samples = []
for sx, sy in [(x1+4, y1+4), (x2-4, y1+4), (x1+4, y2-4), (x2-4, y2-4)]:
sx = max(0, min(iw-1, sx)); sy = max(0, min(ih-1, sy))
b, g, r = image_bgr[sy, sx]
samples.append((int(r), int(g), int(b)))
return (
int(np.median([s[0] for s in samples])),
int(np.median([s[1] for s in samples])),
int(np.median([s[2] for s in samples])),
)
def pick_fg_color(bg: Tuple[int, int, int]) -> Tuple[int, int, int]:
lum = 0.299 * bg[0] + 0.587 * bg[1] + 0.114 * bg[2]
return (0, 0, 0) if lum > 128 else (255, 255, 255)
def safe_textbbox(
draw, pos, text, font
) -> Tuple[int, int, int, int]:
try:
return draw.textbbox(pos, text, font=font)
except Exception:
size = getattr(font, "size", 12)
return (
pos[0], pos[1],
pos[0] + int(len(text) * size * 0.6),
pos[1] + int(size * 1.2)
)
# ============================================================
# RENDER
# ============================================================
def render_translations(
image_bgr,
bubble_boxes: Dict[int, Tuple],
translations: Dict[int, str],
skip_ids: Set[int],
font_path: str,
font_size: int = FONT_SIZE,
bold_outline: bool = True,
auto_color: bool = True,
output_path: str = OUTPUT_PATH
):
image_rgb = cv2.cvtColor(image_bgr, cv2.COLOR_BGR2RGB)
pil_img = Image.fromarray(image_rgb)
draw = ImageDraw.Draw(pil_img)
rendered = 0
skipped = 0
missing = 0
for bid, (x1, y1, x2, y2) in sorted(bubble_boxes.items()):
# ── skip list check ────────────────────────────────────────
if bid in skip_ids:
print(f" ⏭️ Bubble #{bid:<3} — skipped (in SKIP_BUBBLE_IDS)")
skipped += 1
continue
text = translations.get(bid, "").strip()
if not text:
print(f" ⚠️ Bubble #{bid:<3} — no translation found, left blank")
missing += 1
continue
box_w = x2 - x1
box_h = y2 - y1
if box_w < 10 or box_h < 10:
continue
# ── fit font + wrap ────────────────────────────────────────
size, font, lines = fit_text(
text, box_w, box_h, font_path, max_size=font_size
)
# ── colors ─────────────────────────────────────────────────
if auto_color:
bg = sample_bg_color(image_bgr, x1, y1, x2, y2)
fg = pick_fg_color(bg)
ol = (255, 255, 255) if fg == (0, 0, 0) else (0, 0, 0)
else:
fg, ol = (0, 0, 0), (255, 255, 255)
# ── vertical center ────────────────────────────────────────
line_h = size + 8
total_h = line_h * len(lines)
y_cur = y1 + max(4, (box_h - total_h) // 2)
for line in lines:
bb = safe_textbbox(draw, (0, 0), line, font)
line_w = bb[2] - bb[0]
x_cur = x1 + max(2, (box_w - line_w) // 2)
if bold_outline:
for dx, dy in [(-1, 0), (1, 0), (0, -1), (0, 1)]:
try:
draw.text((x_cur + dx, y_cur + dy), line, font=font, fill=ol)
except Exception:
pass
try:
draw.text((x_cur, y_cur), line, font=font, fill=fg)
except Exception as e:
print(f" ❌ Draw error bubble #{bid}: {e}")
y_cur += line_h
print(f" ✅ Bubble #{bid:<3} — rendered ({len(lines)} lines, size {size}px)")
rendered += 1
pil_img.save(output_path)
print()
print(f"{''*50}")
print(f" Rendered : {rendered}")
print(f" Skipped : {skipped} (SKIP_BUBBLE_IDS)")
print(f" No text : {missing} (not in output.txt)")
print(f"{''*50}")
print(f"✅ Saved → {output_path}")
return pil_img
# ============================================================
# MAIN
# ============================================================
def main():
print(f"📖 Loading image : {IMAGE_PATH}")
image = cv2.imread(IMAGE_PATH)
if image is None:
print(f"❌ Cannot load: {IMAGE_PATH}"); return
print(f"📦 Loading bubbles : {BUBBLES_PATH}")
bubble_boxes, quads_per_bubble = parse_bubbles(BUBBLES_PATH)
print(f" {len(bubble_boxes)} bubbles | "
f"{sum(len(v) for v in quads_per_bubble.values())} quads total")
print(f"🌐 Loading translations : {TRANSLATIONS_PATH}")
translations = parse_translations(TRANSLATIONS_PATH)
print(f" {len(translations)} translations found")
if SKIP_BUBBLE_IDS:
print(f"⏭️ Skip list : bubbles {sorted(SKIP_BUBBLE_IDS)}")
else:
print(f"⏭️ Skip list : (empty — all bubbles will be rendered)")
print("🔤 Resolving font...")
font_path, _ = resolve_font()
print(f"🧹 Erasing original text (quad fill + pad={QUAD_PAD}px)...")
clean_image = erase_quads(
image,
quads_per_bubble,
translations = translations, # ← pass translations here
skip_ids = SKIP_BUBBLE_IDS,
pad = QUAD_PAD
)
print("✍️ Rendering translated text...")
render_translations(
image_bgr = clean_image,
bubble_boxes = bubble_boxes,
translations = translations,
skip_ids = SKIP_BUBBLE_IDS,
font_path = font_path,
font_size = FONT_SIZE,
bold_outline = True,
auto_color = True,
output_path = OUTPUT_PATH
)
if __name__ == "__main__":
main()