357 lines
12 KiB
Python
357 lines
12 KiB
Python
#!/usr/bin/env python3
|
|
# -*- coding: utf-8 -*-
|
|
"""
|
|
manga-renderer.py
|
|
|
|
Inputs: 001.jpg + bubbles.json + output_001.txt
|
|
Output: translated_page_001.png
|
|
|
|
Strategy:
|
|
1. For every bubble, white-fill all its OCR quads (erases original text cleanly).
|
|
2. Detect the original font size from the OCR bounding boxes.
|
|
3. Dynamically wrap and scale down the translated text if it exceeds the bubble dimensions.
|
|
4. Render the translated text centered inside the bubble bounding box.
|
|
5. Uses uniform line heights (ascent + descent) to prevent Catalan accent collisions (È, À).
|
|
6. Adds a dynamic white stroke to the text to cover any residual original characters.
|
|
"""
|
|
|
|
import json
|
|
import textwrap
|
|
import cv2
|
|
import numpy as np
|
|
from PIL import Image, ImageDraw, ImageFont
|
|
from typing import Dict, List, Tuple, Optional, Set, Any
|
|
|
|
# ============================================================
|
|
# CONFIG — edit these paths to match your setup
|
|
# ============================================================
|
|
IMAGE_PATH = "004.png"
|
|
BUBBLES_PATH = "bubbles_004.json"
|
|
TRANSLATIONS_PATH = "output_004.txt"
|
|
OUTPUT_PATH = "translated_page_004.png"
|
|
|
|
# Font candidates — Prioritizes Laffayette for Catalan, with safe fallbacks
|
|
FONT_CANDIDATES = [
|
|
"fonts/animeace2_reg.ttf",
|
|
"fonts/ComicNeue-Bold.ttf",
|
|
]
|
|
|
|
DEFAULT_FONT_SIZE = 18
|
|
MIN_FONT_SIZE = 8
|
|
QUAD_PAD = 4 # extra pixels added around each quad before white-fill
|
|
|
|
# ============================================================
|
|
# SKIP LIST
|
|
# ============================================================
|
|
SKIP_BUBBLE_IDS: Set[int] = {
|
|
# Add any bubble IDs you do NOT want rendered here.
|
|
}
|
|
|
|
# ============================================================
|
|
# FONT LOADER
|
|
# ============================================================
|
|
def load_font(path: str, size: int) -> Optional[ImageFont.FreeTypeFont]:
|
|
"""Try every face index in a .ttc collection. Validate with getbbox."""
|
|
indices = range(4) if path.lower().endswith(".ttc") else [0]
|
|
for idx in indices:
|
|
try:
|
|
font = ImageFont.truetype(path, size, index=idx)
|
|
font.getbbox("A") # raises if face metrics are broken
|
|
return font
|
|
except Exception:
|
|
continue
|
|
return None
|
|
|
|
def resolve_font_path() -> str:
|
|
"""Return the path for the first working candidate."""
|
|
for candidate in FONT_CANDIDATES:
|
|
if load_font(candidate, DEFAULT_FONT_SIZE) is not None:
|
|
print(f" ✅ Font: {candidate}")
|
|
return candidate
|
|
print(" ⚠️ No TrueType font found — using Pillow bitmap fallback")
|
|
return ""
|
|
|
|
# ============================================================
|
|
# PARSERS
|
|
# ============================================================
|
|
def parse_translations(filepath: str) -> Dict[int, str]:
|
|
"""
|
|
Reads output.txt and returns {bubble_id: translated_text}.
|
|
Lines look like: #2|1|vision-base|ORIGINAL|TRANSLATED|FLAGS
|
|
"""
|
|
translations = {}
|
|
with open(filepath, "r", encoding="utf-8") as f:
|
|
for line in f:
|
|
line = line.strip()
|
|
if not line.startswith("#"):
|
|
continue
|
|
parts = line.split("|")
|
|
if len(parts) < 5:
|
|
continue
|
|
try:
|
|
bid = int(parts[0].lstrip("#"))
|
|
translated = parts[4].strip()
|
|
if translated and translated != "-":
|
|
translations[bid] = translated
|
|
except ValueError:
|
|
continue
|
|
return translations
|
|
|
|
def parse_bubbles(filepath: str):
|
|
"""Returns the full JSON data."""
|
|
with open(filepath, "r", encoding="utf-8") as f:
|
|
data = json.load(f)
|
|
return data
|
|
|
|
# ============================================================
|
|
# ERASE — white-fill every OCR quad (with small padding)
|
|
# ============================================================
|
|
def erase_quads(
|
|
image_bgr,
|
|
bubbles_data: Dict[str, dict],
|
|
translations: Dict[int, str],
|
|
skip_ids: Set[int],
|
|
pad: int = QUAD_PAD
|
|
):
|
|
"""
|
|
White-fills OCR quads ONLY for bubbles that:
|
|
- have a translation in output.txt AND
|
|
- are NOT in SKIP_BUBBLE_IDS
|
|
"""
|
|
ih, iw = image_bgr.shape[:2]
|
|
result = image_bgr.copy()
|
|
|
|
erased_count = 0
|
|
skipped_count = 0
|
|
|
|
for bid_str, val in bubbles_data.items():
|
|
bid = int(bid_str)
|
|
quads = val.get("quads", [])
|
|
|
|
if bid in skip_ids or bid not in translations:
|
|
skipped_count += 1
|
|
continue
|
|
|
|
for quad in quads:
|
|
pts = np.array(quad, dtype=np.int32)
|
|
cv2.fillPoly(result, [pts], (255, 255, 255))
|
|
|
|
xs = [p[0] for p in quad]; ys = [p[1] for p in quad]
|
|
x1 = max(0, min(xs) - pad)
|
|
y1 = max(0, min(ys) - pad)
|
|
x2 = min(iw - 1, max(xs) + pad)
|
|
y2 = min(ih - 1, max(ys) + pad)
|
|
cv2.rectangle(result, (x1, y1), (x2, y2), (255, 255, 255), -1)
|
|
|
|
erased_count += 1
|
|
|
|
print(f" Erased : {erased_count} bubbles")
|
|
print(f" Ignored: {skipped_count} bubbles (no translation or in skip list)")
|
|
return result
|
|
|
|
# ============================================================
|
|
# DYNAMIC TEXT FITTING
|
|
# ============================================================
|
|
def get_original_font_size(bubble_data: dict, fallback_size: int = DEFAULT_FONT_SIZE) -> int:
|
|
"""Calculates the original font size based on the OCR bounding boxes."""
|
|
line_bboxes = bubble_data.get("line_bboxes", [])
|
|
if not line_bboxes:
|
|
return fallback_size
|
|
|
|
heights = [box["h"] for box in line_bboxes]
|
|
median_h = int(np.median(heights))
|
|
|
|
estimated_size = int(median_h * 0.85)
|
|
return max(MIN_FONT_SIZE, min(estimated_size, 60))
|
|
|
|
def fit_text_dynamically(
|
|
text: str,
|
|
font_path: str,
|
|
max_w: int,
|
|
max_h: int,
|
|
target_font_size: int
|
|
) -> Tuple[List[str], Any, int, int]:
|
|
"""
|
|
Wraps text and scales down font size if it exceeds the bubble dimensions.
|
|
Returns: (wrapped_lines, font_object, line_spacing, final_font_size)
|
|
"""
|
|
font_size = target_font_size
|
|
|
|
if not font_path:
|
|
font = ImageFont.load_default()
|
|
char_w = 6
|
|
chars_per_line = max(1, int(max_w / char_w))
|
|
wrapped_lines = textwrap.wrap(text, width=chars_per_line)
|
|
return wrapped_lines, font, 4, 10
|
|
|
|
while font_size >= MIN_FONT_SIZE:
|
|
font = load_font(font_path, font_size)
|
|
if font is None:
|
|
font = ImageFont.load_default()
|
|
return [text], font, 4, 10
|
|
|
|
char_bbox = font.getbbox("A")
|
|
char_w = (char_bbox[2] - char_bbox[0]) or 10
|
|
chars_per_line = max(1, int((max_w * 0.95) / char_w))
|
|
|
|
wrapped_lines = textwrap.wrap(text, width=chars_per_line)
|
|
|
|
# Use uniform font metrics for height to protect accents like È
|
|
line_spacing = max(2, int(font_size * 0.15))
|
|
if hasattr(font, 'getmetrics'):
|
|
ascent, descent = font.getmetrics()
|
|
line_h = ascent + descent
|
|
else:
|
|
line_h = font_size
|
|
|
|
total_h = (line_h * len(wrapped_lines)) + (line_spacing * max(0, len(wrapped_lines) - 1))
|
|
|
|
max_line_w = 0
|
|
for line in wrapped_lines:
|
|
bbox = font.getbbox(line)
|
|
lw = bbox[2] - bbox[0]
|
|
max_line_w = max(max_line_w, lw)
|
|
|
|
if max_line_w <= max_w and total_h <= max_h:
|
|
return wrapped_lines, font, line_spacing, font_size
|
|
|
|
font_size -= 2
|
|
|
|
font = load_font(font_path, MIN_FONT_SIZE) or ImageFont.load_default()
|
|
char_bbox = font.getbbox("A") if hasattr(font, 'getbbox') else (0,0,6,10)
|
|
char_w = (char_bbox[2] - char_bbox[0]) or 6
|
|
chars_per_line = max(1, int(max_w / char_w))
|
|
wrapped_lines = textwrap.wrap(text, width=chars_per_line)
|
|
|
|
return wrapped_lines, font, max(2, int(MIN_FONT_SIZE * 0.15)), MIN_FONT_SIZE
|
|
|
|
# ============================================================
|
|
# RENDER
|
|
# ============================================================
|
|
def render_text(
|
|
image_bgr,
|
|
bubbles_data: Dict[str, dict],
|
|
translations: Dict[int, str],
|
|
font_path: str,
|
|
skip_ids: Set[int]
|
|
):
|
|
"""
|
|
Draws the translated text centered in the line_union_bbox of each bubble.
|
|
Adds a dynamic white stroke (outline) to cover any residual original characters.
|
|
"""
|
|
image_rgb = cv2.cvtColor(image_bgr, cv2.COLOR_BGR2RGB)
|
|
pil_img = Image.fromarray(image_rgb)
|
|
draw = ImageDraw.Draw(pil_img)
|
|
|
|
rendered_count = 0
|
|
|
|
for bid_str, val in bubbles_data.items():
|
|
bid = int(bid_str)
|
|
|
|
if bid in skip_ids or bid not in translations:
|
|
continue
|
|
|
|
text = translations[bid]
|
|
|
|
union_box = val.get("line_union_bbox")
|
|
if not union_box:
|
|
union_box = val.get("text_bbox")
|
|
if not union_box:
|
|
continue
|
|
|
|
bx, by, bw, bh = union_box["x"], union_box["y"], union_box["w"], union_box["h"]
|
|
|
|
pad_x = int(bw * 0.1)
|
|
pad_y = int(bh * 0.1)
|
|
bx -= pad_x // 2
|
|
by -= pad_y // 2
|
|
bw += pad_x
|
|
bh += pad_y
|
|
|
|
target_size = get_original_font_size(val)
|
|
wrapped_lines, font, line_spacing, final_size = fit_text_dynamically(text, font_path, bw, bh, target_size)
|
|
|
|
# Use uniform typographic line height for rendering to protect accents
|
|
if hasattr(font, 'getmetrics'):
|
|
ascent, descent = font.getmetrics()
|
|
line_h = ascent + descent
|
|
else:
|
|
line_h = final_size
|
|
|
|
total_text_height = (line_h * len(wrapped_lines)) + (line_spacing * max(0, len(wrapped_lines) - 1))
|
|
|
|
current_y = by + (bh - total_text_height) // 2
|
|
|
|
# Dynamic outline thickness based on the final scaled font size
|
|
outline_thickness = max(2, int(final_size * 0.10))
|
|
|
|
for i, line in enumerate(wrapped_lines):
|
|
if hasattr(font, 'getbbox'):
|
|
bbox = font.getbbox(line)
|
|
lw = bbox[2] - bbox[0]
|
|
else:
|
|
lw = len(line) * 6
|
|
|
|
current_x = bx + (bw - lw) // 2
|
|
|
|
# Draw text with white stroke for artifact coverage
|
|
draw.text(
|
|
(current_x, current_y),
|
|
line,
|
|
fill=(0, 0, 0),
|
|
font=font,
|
|
stroke_width=outline_thickness,
|
|
stroke_fill=(255, 255, 255)
|
|
)
|
|
|
|
# Advance Y by the uniform line height + spacing
|
|
current_y += line_h + line_spacing
|
|
|
|
rendered_count += 1
|
|
|
|
print(f" Rendered: {rendered_count} bubbles (with uniform line spacing & outlines)")
|
|
return cv2.cvtColor(np.array(pil_img), cv2.COLOR_RGB2BGR)
|
|
|
|
# ============================================================
|
|
# MAIN
|
|
# ============================================================
|
|
def main():
|
|
print(f"Loading image: {IMAGE_PATH}")
|
|
image_bgr = cv2.imread(IMAGE_PATH)
|
|
if image_bgr is None:
|
|
print(f"❌ Error: Could not load {IMAGE_PATH}")
|
|
return
|
|
|
|
print(f"Loading translations: {TRANSLATIONS_PATH}")
|
|
translations = parse_translations(TRANSLATIONS_PATH)
|
|
|
|
print(f"Loading bubble data: {BUBBLES_PATH}")
|
|
bubbles_data = parse_bubbles(BUBBLES_PATH)
|
|
|
|
print("Resolving font...")
|
|
font_path = resolve_font_path()
|
|
|
|
print("\n--- Step 1: Erasing original text ---")
|
|
erased_bgr = erase_quads(
|
|
image_bgr=image_bgr,
|
|
bubbles_data=bubbles_data,
|
|
translations=translations,
|
|
skip_ids=SKIP_BUBBLE_IDS,
|
|
pad=QUAD_PAD
|
|
)
|
|
|
|
print("\n--- Step 2: Rendering translated text ---")
|
|
final_bgr = render_text(
|
|
image_bgr=erased_bgr,
|
|
bubbles_data=bubbles_data,
|
|
translations=translations,
|
|
font_path=font_path,
|
|
skip_ids=SKIP_BUBBLE_IDS
|
|
)
|
|
|
|
print(f"\nSaving final image to: {OUTPUT_PATH}")
|
|
cv2.imwrite(OUTPUT_PATH, final_bgr)
|
|
print("✅ Done!")
|
|
|
|
if __name__ == "__main__":
|
|
main() |