Files
manga-translator/manga-renderer.py
Guillem Hernandez Sola dfa52f54eb Added new rendered
2026-04-21 18:53:34 +02:00

363 lines
12 KiB
Python

#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
manga-renderer.py
Inputs: 001.jpg + bubbles.json + output_001.txt
Output: translated_page_001.png
Strategy:
1. For every bubble, white-fill all its OCR quads (erases original text cleanly).
2. Detect the original font size from the OCR bounding boxes.
3. Dynamically wrap and scale down the translated text if it exceeds the bubble dimensions.
4. Render the translated text centered inside the bubble bounding box.
5. Uses uniform line heights to prevent accent collisions.
6. Adds a white stroke to the text to cover any residual original characters.
"""
import json
import textwrap
import cv2
import numpy as np
from PIL import Image, ImageDraw, ImageFont
from typing import Dict, List, Tuple, Optional, Set, Any
# ============================================================
# CONFIG — edit these paths to match your setup
# ============================================================
IMAGE_PATH = "003.jpg"
BUBBLES_PATH = "bubbles.json"
TRANSLATIONS_PATH = "output_003.txt"
OUTPUT_PATH = "translated_page_003.png"
# Font candidates — first one that loads wins
FONT_CANDIDATES = [
"fonts/ComicNeue-Bold.ttf",
# Mac fallbacks
"/System/Library/Fonts/Supplemental/Comic Sans MS Bold.ttf",
"/System/Library/Fonts/Supplemental/Arial Bold.ttf",
# Windows fallbacks
"C:\\Windows\\Fonts\\comicbd.ttf",
"C:\\Windows\\Fonts\\arialbd.ttf",
# Linux fallbacks
"/usr/share/fonts/truetype/liberation/LiberationSans-Bold.ttf",
]
DEFAULT_FONT_SIZE = 24
MIN_FONT_SIZE = 12
QUAD_PAD = 4 # extra pixels added around each quad before white-fill
# ============================================================
# SKIP LIST
# ============================================================
SKIP_BUBBLE_IDS: Set[int] = {
# Add any bubble IDs you do NOT want rendered here.
}
# ============================================================
# FONT LOADER
# ============================================================
def load_font(path: str, size: int) -> Optional[ImageFont.FreeTypeFont]:
"""Try every face index in a .ttc collection. Validate with getbbox."""
indices = range(4) if path.lower().endswith(".ttc") else [0]
for idx in indices:
try:
font = ImageFont.truetype(path, size, index=idx)
font.getbbox("A") # raises if face metrics are broken
return font
except Exception:
continue
return None
def resolve_font_path() -> str:
"""Return the path for the first working candidate."""
for candidate in FONT_CANDIDATES:
if load_font(candidate, DEFAULT_FONT_SIZE) is not None:
print(f" ✅ Font: {candidate}")
return candidate
print(" ⚠️ No TrueType font found — using Pillow bitmap fallback")
return ""
# ============================================================
# PARSERS
# ============================================================
def parse_translations(filepath: str) -> Dict[int, str]:
"""
Reads output.txt and returns {bubble_id: translated_text}.
Lines look like: #2|1|vision-base|ORIGINAL|TRANSLATED|FLAGS
"""
translations = {}
with open(filepath, "r", encoding="utf-8") as f:
for line in f:
line = line.strip()
if not line.startswith("#"):
continue
parts = line.split("|")
if len(parts) < 5:
continue
try:
bid = int(parts[0].lstrip("#"))
translated = parts[4].strip()
if translated and translated != "-":
translations[bid] = translated
except ValueError:
continue
return translations
def parse_bubbles(filepath: str):
"""
Returns the full JSON data.
"""
with open(filepath, "r", encoding="utf-8") as f:
data = json.load(f)
return data
# ============================================================
# ERASE — white-fill every OCR quad (with small padding)
# ============================================================
def erase_quads(
image_bgr,
bubbles_data: Dict[str, dict],
translations: Dict[int, str],
skip_ids: Set[int],
pad: int = QUAD_PAD
):
"""
White-fills OCR quads ONLY for bubbles that:
- have a translation in output.txt AND
- are NOT in SKIP_BUBBLE_IDS
"""
ih, iw = image_bgr.shape[:2]
result = image_bgr.copy()
erased_count = 0
skipped_count = 0
for bid_str, val in bubbles_data.items():
bid = int(bid_str)
quads = val.get("quads", [])
if bid in skip_ids or bid not in translations:
skipped_count += 1
continue
for quad in quads:
pts = np.array(quad, dtype=np.int32)
cv2.fillPoly(result, [pts], (255, 255, 255))
xs = [p[0] for p in quad]; ys = [p[1] for p in quad]
x1 = max(0, min(xs) - pad)
y1 = max(0, min(ys) - pad)
x2 = min(iw - 1, max(xs) + pad)
y2 = min(ih - 1, max(ys) + pad)
cv2.rectangle(result, (x1, y1), (x2, y2), (255, 255, 255), -1)
erased_count += 1
print(f" Erased : {erased_count} bubbles")
print(f" Ignored: {skipped_count} bubbles (no translation or in skip list)")
return result
# ============================================================
# DYNAMIC TEXT FITTING
# ============================================================
def get_original_font_size(bubble_data: dict, fallback_size: int = DEFAULT_FONT_SIZE) -> int:
"""Calculates the original font size based on the OCR bounding boxes."""
line_bboxes = bubble_data.get("line_bboxes", [])
if not line_bboxes:
return fallback_size
heights = [box["h"] for box in line_bboxes]
median_h = int(np.median(heights))
estimated_size = int(median_h * 0.85)
return max(MIN_FONT_SIZE, min(estimated_size, 60))
def fit_text_dynamically(
text: str,
font_path: str,
max_w: int,
max_h: int,
target_font_size: int
) -> Tuple[List[str], Any, int, int]:
"""
Wraps text and scales down font size if it exceeds the bubble dimensions.
Returns: (wrapped_lines, font_object, line_spacing, final_font_size)
"""
font_size = target_font_size
if not font_path:
font = ImageFont.load_default()
char_w = 6
chars_per_line = max(1, int(max_w / char_w))
wrapped_lines = textwrap.wrap(text, width=chars_per_line)
return wrapped_lines, font, 4, 10
while font_size >= MIN_FONT_SIZE:
font = load_font(font_path, font_size)
if font is None:
font = ImageFont.load_default()
return [text], font, 4, 10
char_bbox = font.getbbox("A")
char_w = (char_bbox[2] - char_bbox[0]) or 10
chars_per_line = max(1, int((max_w * 0.95) / char_w))
wrapped_lines = textwrap.wrap(text, width=chars_per_line)
# Use uniform font metrics for height instead of per-line bounding boxes
line_spacing = max(2, int(font_size * 0.15))
if hasattr(font, 'getmetrics'):
ascent, descent = font.getmetrics()
line_h = ascent + descent
else:
line_h = font_size
total_h = (line_h * len(wrapped_lines)) + (line_spacing * max(0, len(wrapped_lines) - 1))
max_line_w = 0
for line in wrapped_lines:
bbox = font.getbbox(line)
lw = bbox[2] - bbox[0]
max_line_w = max(max_line_w, lw)
if max_line_w <= max_w and total_h <= max_h:
return wrapped_lines, font, line_spacing, font_size
font_size -= 2
font = load_font(font_path, MIN_FONT_SIZE) or ImageFont.load_default()
char_bbox = font.getbbox("A") if hasattr(font, 'getbbox') else (0,0,6,10)
char_w = (char_bbox[2] - char_bbox[0]) or 6
chars_per_line = max(1, int(max_w / char_w))
wrapped_lines = textwrap.wrap(text, width=chars_per_line)
return wrapped_lines, font, max(2, int(MIN_FONT_SIZE * 0.15)), MIN_FONT_SIZE
# ============================================================
# RENDER
# ============================================================
def render_text(
image_bgr,
bubbles_data: Dict[str, dict],
translations: Dict[int, str],
font_path: str,
skip_ids: Set[int]
):
"""
Draws the translated text centered in the line_union_bbox of each bubble.
Adds a white stroke (outline) to cover any residual original characters.
"""
image_rgb = cv2.cvtColor(image_bgr, cv2.COLOR_BGR2RGB)
pil_img = Image.fromarray(image_rgb)
draw = ImageDraw.Draw(pil_img)
rendered_count = 0
for bid_str, val in bubbles_data.items():
bid = int(bid_str)
if bid in skip_ids or bid not in translations:
continue
text = translations[bid]
union_box = val.get("line_union_bbox")
if not union_box:
union_box = val.get("text_bbox")
if not union_box:
continue
bx, by, bw, bh = union_box["x"], union_box["y"], union_box["w"], union_box["h"]
pad_x = int(bw * 0.1)
pad_y = int(bh * 0.1)
bx -= pad_x // 2
by -= pad_y // 2
bw += pad_x
bh += pad_y
target_size = get_original_font_size(val)
wrapped_lines, font, line_spacing, final_size = fit_text_dynamically(text, font_path, bw, bh, target_size)
# Use uniform typographic line height for rendering
if hasattr(font, 'getmetrics'):
ascent, descent = font.getmetrics()
line_h = ascent + descent
else:
line_h = final_size
total_text_height = (line_h * len(wrapped_lines)) + (line_spacing * max(0, len(wrapped_lines) - 1))
current_y = by + (bh - total_text_height) // 2
outline_thickness = max(2, int(final_size * 0.10))
for i, line in enumerate(wrapped_lines):
if hasattr(font, 'getbbox'):
bbox = font.getbbox(line)
lw = bbox[2] - bbox[0]
else:
lw = len(line) * 6
current_x = bx + (bw - lw) // 2
draw.text(
(current_x, current_y),
line,
fill=(0, 0, 0),
font=font,
stroke_width=outline_thickness,
stroke_fill=(255, 255, 255)
)
# Advance Y by the uniform line height + spacing
current_y += line_h + line_spacing
rendered_count += 1
print(f" Rendered: {rendered_count} bubbles (with uniform line spacing & outlines)")
return cv2.cvtColor(np.array(pil_img), cv2.COLOR_RGB2BGR)
# ============================================================
# MAIN
# ============================================================
def main():
print(f"Loading image: {IMAGE_PATH}")
image_bgr = cv2.imread(IMAGE_PATH)
if image_bgr is None:
print(f"❌ Error: Could not load {IMAGE_PATH}")
return
print(f"Loading translations: {TRANSLATIONS_PATH}")
translations = parse_translations(TRANSLATIONS_PATH)
print(f"Loading bubble data: {BUBBLES_PATH}")
bubbles_data = parse_bubbles(BUBBLES_PATH)
print("Resolving font...")
font_path = resolve_font_path()
print("\n--- Step 1: Erasing original text ---")
erased_bgr = erase_quads(
image_bgr=image_bgr,
bubbles_data=bubbles_data,
translations=translations,
skip_ids=SKIP_BUBBLE_IDS,
pad=QUAD_PAD
)
print("\n--- Step 2: Rendering translated text ---")
final_bgr = render_text(
image_bgr=erased_bgr,
bubbles_data=bubbles_data,
translations=translations,
font_path=font_path,
skip_ids=SKIP_BUBBLE_IDS
)
print(f"\nSaving final image to: {OUTPUT_PATH}")
cv2.imwrite(OUTPUT_PATH, final_bgr)
print("✅ Done!")
if __name__ == "__main__":
main()