Files
manga-translator/manga-renderer.py
Guillem Hernandez Sola 727b052e93 Added good stuff
2026-04-11 14:34:18 +02:00

458 lines
17 KiB
Python
Raw Blame History

This file contains invisible Unicode characters
This file contains invisible Unicode characters that are indistinguishable to humans but may be processed differently by a computer. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
import re
import json
import cv2
import numpy as np
from PIL import Image, ImageDraw, ImageFont
import os
# ─────────────────────────────────────────────
# CONFIG
# ─────────────────────────────────────────────
INPUT_IMAGE = "page.png"
OUTPUT_IMAGE = "page_translated.png"
TRANSLATIONS_FILE = "output.txt"
BUBBLES_FILE = "bubbles.json"
FONT_PATH = "font.ttf"
FONT_FALLBACK = "/System/Library/Fonts/Helvetica.ttc"
FONT_COLOR = (0, 0, 0)
# ─────────────────────────────────────────────
# WORD-ONLY WRAP
#
# Breaks ONLY at space boundaries.
# Returns (lines, overflow) where overflow=True
# means a single word is wider than max_w at
# this font size → caller must try smaller.
# ─────────────────────────────────────────────
def wrap_text_words(draw, text, max_w, font):
"""
Word-wraps text to fit within max_w pixels.
Never inserts hyphens or breaks mid-word.
Returns:
(lines, overflow)
lines : list of strings, each ≤ max_w px wide
overflow : True if any single word exceeds max_w
"""
def measure(s):
bb = draw.textbbox((0, 0), s, font=font)
return bb[2] - bb[0]
words = text.split()
lines = []
current = ""
overflow = False
for word in words:
if measure(word) > max_w:
overflow = True
break
test = (current + " " + word).strip()
if measure(test) <= max_w:
current = test
else:
if current:
lines.append(current)
current = word
if not overflow and current:
lines.append(current)
return lines, overflow
# ─────────────────────────────────────────────
# PARSE output.txt
# ─────────────────────────────────────────────
def parse_translations(filepath):
"""
Parses output.txt → {bubble_id: translated_text}.
Uses header line as column ruler to find the exact
char position of the TRANSLATED column.
Immune to commas, ellipses, spaces in translated text.
"""
translations = {}
header_pos = None
with open(filepath, "r", encoding="utf-8") as f:
lines = f.readlines()
for raw_line in lines:
line = raw_line.rstrip("\n")
if re.match(r"^BUBBLE\s+ORIGINAL", line):
m = re.search(r"TRANSLATED", line)
if m:
header_pos = m.start()
print(f" TRANSLATED column at char {header_pos}")
continue
stripped = line.strip()
if re.match(r"^[─\-=]{3,}$", stripped):
continue
if stripped.startswith("") or stripped.startswith("Done"):
continue
if not re.match(r"^\s*#\d+", line):
continue
m_id = re.match(r"^\s*#(\d+)", line)
if not m_id:
continue
bubble_id = int(m_id.group(1))
if header_pos is not None and len(line) > header_pos:
translated = line[header_pos:].strip()
else:
parts = re.split(r" {2,}", stripped)
translated = parts[-1].strip() if len(parts) >= 3 else ""
if not translated or translated.startswith("["):
print(f" ⚠️ #{bubble_id}: no translation found")
continue
translations[bubble_id] = translated
print(f"{len(translations)} bubble(s) to translate: "
f"{sorted(translations.keys())}")
for bid, text in sorted(translations.items()):
print(f" #{bid}: {text}")
return translations
# ─────────────────────────────────────────────
# LOAD bubbles.json
# ─────────────────────────────────────────────
def load_bubble_boxes(filepath):
with open(filepath, "r", encoding="utf-8") as f:
raw = json.load(f)
boxes = {int(k): v for k, v in raw.items()}
print(f" ✅ Loaded {len(boxes)} bubble(s)")
for bid, val in sorted(boxes.items()):
print(f" #{bid}: ({val['x']},{val['y']}) "
f"{val['w']}×{val['h']}px")
return boxes
# ─────────────────────────────────────────────
# SAMPLE BACKGROUND COLOR
# ─────────────────────────────────────────────
def sample_bubble_background(cv_image, bubble_data):
x = max(0, bubble_data["x"])
y = max(0, bubble_data["y"])
x2 = min(cv_image.shape[1], x + bubble_data["w"])
y2 = min(cv_image.shape[0], y + bubble_data["h"])
region = cv_image[y:y2, x:x2]
if region.size == 0:
return (255, 255, 255)
gray = cv2.cvtColor(region, cv2.COLOR_BGR2GRAY)
threshold = np.percentile(gray, 90)
bg_mask = gray >= threshold
if not np.any(bg_mask):
return (255, 255, 255)
return tuple(int(c) for c in region[bg_mask].mean(axis=0))
# ─────────────────────────────────────────────
# ERASE ORIGINAL TEXT
# ─────────────────────────────────────────────
def erase_bubble_text(cv_image, bubble_data,
bg_color=(255, 255, 255)):
img_h, img_w = cv_image.shape[:2]
x = max(0, bubble_data["x"])
y = max(0, bubble_data["y"])
x2 = min(img_w, bubble_data["x"] + bubble_data["w"])
y2 = min(img_h, bubble_data["y"] + bubble_data["h"])
cv_image[y:y2, x:x2] = list(bg_color)
# ─────────────────────────────────────────────
# LINE HEIGHT (tight)
#
# Uses actual ascender+descender of the font
# at the given size, with a minimal 1px gap.
# Much tighter than the old flat "+2" approach.
# ─────────────────────────────────────────────
def get_line_height(draw, font):
"""
Returns the line height in pixels for the given font.
Measured from actual glyph bounds of "Ay" (covers
ascenders and descenders) plus 1px breathing room.
"""
bb = draw.textbbox((0, 0), "Ay", font=font)
return (bb[3] - bb[1]) + 1
# ─────────────────────────────────────────────
# FIT FONT SIZE (dynamic ceiling, word-wrap)
#
# max_size is derived from the box itself:
# min(MAX_FONT_CAP, inner_h)
# so a tall box can use a large font and a
# small box won't waste iterations on huge sizes.
#
# Rejects a size if:
# • any single word is wider than inner_w, OR
# • total wrapped height exceeds inner_h
# ─────────────────────────────────────────────
MAX_FONT_CAP = 120 # absolute ceiling across all boxes
def fit_font_size(draw, text, max_w, max_h, font_path,
min_size=7):
"""
Finds the largest font size where word-wrapped text
fits inside max_w × max_h with NO mid-word breaking.
max_size is computed dynamically as min(MAX_FONT_CAP, max_h)
so the search always starts from a sensible upper bound
relative to the actual box height.
Args:
draw : ImageDraw instance
text : Full text string
max_w : Available width in pixels
max_h : Available height in pixels
font_path : Path to .ttf (or None for PIL default)
min_size : Minimum font pt (default: 7)
Returns:
(font, lines)
"""
# Dynamic ceiling: no point trying a font taller than the box
max_size = min(MAX_FONT_CAP, max_h)
max_size = max(max_size, min_size) # safety: never below min
best_font = None
best_lines = [text]
for size in range(max_size, min_size - 1, -1):
try:
font = (ImageFont.truetype(font_path, size)
if font_path else ImageFont.load_default())
except Exception:
font = ImageFont.load_default()
lines, overflow = wrap_text_words(draw, text, max_w, font)
if overflow:
continue # a word is wider than the box → too big
line_h = get_line_height(draw, font)
total_h = line_h * len(lines)
if total_h <= max_h:
best_font = font
best_lines = lines
break # largest size that fits — done
# Guaranteed fallback at min_size
if best_font is None:
try:
best_font = (ImageFont.truetype(font_path, min_size)
if font_path else ImageFont.load_default())
except Exception:
best_font = ImageFont.load_default()
best_lines, _ = wrap_text_words(
draw, text, max_w, best_font)
if not best_lines:
best_lines = [text]
return best_font, best_lines
# ─────────────────────────────────────────────
# RENDER TEXT INTO BUBBLE
#
# Text is centered both horizontally and
# vertically inside the padded bbox.
# Line height uses get_line_height() (tight).
# ─────────────────────────────────────────────
def render_text_in_bubble(pil_image, bubble_data, text,
font_path, padding=6,
font_color=(0, 0, 0)):
"""
Renders translated text centered inside the bbox.
Font auto-sizes to fill the box as much as possible.
Word-wrap only — no mid-word hyphens.
"""
x, y = bubble_data["x"], bubble_data["y"]
w, h = bubble_data["w"], bubble_data["h"]
draw = ImageDraw.Draw(pil_image)
inner_w = max(1, w - padding * 2)
inner_h = max(1, h - padding * 2)
font, lines = fit_font_size(
draw, text, inner_w, inner_h, font_path
)
line_h = get_line_height(draw, font)
total_h = line_h * len(lines)
# Center block vertically
start_y = y + padding + max(0, (inner_h - total_h) // 2)
for line in lines:
bb = draw.textbbox((0, 0), line, font=font)
line_w = bb[2] - bb[0]
# Center each line horizontally
start_x = x + padding + max(0, (inner_w - line_w) // 2)
draw.text((start_x, start_y), line,
font=font, fill=font_color)
start_y += line_h
# ─────────────────────────────────────────────
# RESOLVE FONT
# ─────────────────────────────────────────────
def resolve_font(font_path, fallback):
if font_path and os.path.exists(font_path):
print(f" ✅ Using font: {font_path}")
return font_path
if fallback and os.path.exists(fallback):
print(f" ⚠️ Fallback: {fallback}")
return fallback
print(" ⚠️ No font found. Using PIL default.")
return None
# ─────────────────────────────────────────────
# MAIN RENDERER
# ─────────────────────────────────────────────
def render_translated_page(
input_image = INPUT_IMAGE,
output_image = OUTPUT_IMAGE,
translations_file = TRANSLATIONS_FILE,
bubbles_file = BUBBLES_FILE,
font_path = FONT_PATH,
font_fallback = FONT_FALLBACK,
font_color = FONT_COLOR,
text_padding = 6,
debug = False,
):
print("=" * 55)
print(" MANGA TRANSLATOR — RENDERER")
print("=" * 55)
print("\n📄 Parsing translations...")
translations = parse_translations(translations_file)
if not translations:
print("❌ No translations found. Aborting.")
return
print(f"\n📦 Loading bubble data...")
bubble_boxes = load_bubble_boxes(bubbles_file)
if not bubble_boxes:
print("❌ No bubble data. Re-run manga-translator.py.")
return
translate_ids = set(translations.keys())
box_ids = set(bubble_boxes.keys())
to_process = sorted(translate_ids & box_ids)
untouched = sorted(box_ids - translate_ids)
missing = sorted(translate_ids - box_ids)
print(f"\n🔗 To process : {to_process}")
print(f" Untouched : {untouched}")
if missing:
print(f" ⚠️ In output.txt but no box: {missing}")
if not to_process:
print("❌ No matching IDs. Aborting.")
return
print(f"\n🖼️ Loading: {input_image}")
cv_image = cv2.imread(input_image)
if cv_image is None:
print(f"❌ Could not load: {input_image}")
return
print(f" {cv_image.shape[1]}×{cv_image.shape[0]}px")
# Sample backgrounds BEFORE erasing
print("\n🎨 Sampling backgrounds...")
bg_colors = {}
for bid in to_process:
bg_bgr = sample_bubble_background(
cv_image, bubble_boxes[bid])
bg_colors[bid] = bg_bgr
bg_rgb = (bg_bgr[2], bg_bgr[1], bg_bgr[0])
brightness = sum(bg_rgb) / 3
ink = "black" if brightness > 128 else "white"
print(f" #{bid}: RGB{bg_rgb} ink→{ink}")
# Erase
print("\n🧹 Erasing original text...")
for bid in to_process:
bd = bubble_boxes[bid]
erase_bubble_text(cv_image, bd, bg_color=bg_colors[bid])
print(f" ✅ #{bid} ({bd['w']}×{bd['h']}px)")
pil_image = Image.fromarray(
cv2.cvtColor(cv_image, cv2.COLOR_BGR2RGB))
print("\n🔤 Resolving font...")
resolved_font = resolve_font(font_path, font_fallback)
# Render
print("\n✍️ Rendering...")
for bid in to_process:
text = translations[bid]
bd = bubble_boxes[bid]
bg_rgb = (bg_colors[bid][2],
bg_colors[bid][1],
bg_colors[bid][0])
brightness = sum(bg_rgb) / 3
txt_color = (0, 0, 0) if brightness > 128 \
else (255, 255, 255)
render_text_in_bubble(
pil_image, bd, text,
font_path = resolved_font,
padding = text_padding,
font_color = txt_color,
)
print(f" ✅ #{bid}: '{text}' "
f"({bd['x']},{bd['y']}) {bd['w']}×{bd['h']}px")
if debug:
dbg = pil_image.copy()
dbg_draw = ImageDraw.Draw(dbg)
for bid, bd in sorted(bubble_boxes.items()):
color = (0, 200, 0) if bid in translate_ids \
else (160, 160, 160)
dbg_draw.rectangle(
[bd["x"], bd["y"],
bd["x"] + bd["w"], bd["y"] + bd["h"]],
outline=color, width=2)
dbg_draw.text((bd["x"] + 3, bd["y"] + 3),
f"#{bid}", fill=color)
dbg.save("debug_render.png")
print("\n 🐛 debug_render.png saved "
"(green=translated, grey=untouched)")
print(f"\n💾 Saving → {output_image}")
pil_image.save(output_image, "PNG")
print(" ✅ Done!")
print("=" * 55)
# ─────────────────────────────────────────────
# ENTRY POINT
# ─────────────────────────────────────────────
if __name__ == "__main__":
render_translated_page(
input_image = "page.png",
output_image = "page_translated.png",
translations_file = "output.txt",
bubbles_file = "bubbles.json",
font_path = "fonts/ComicRelief-Regular.ttf",
font_fallback = "/System/Library/Fonts/Helvetica.ttc",
font_color = (0, 0, 0),
text_padding = 6,
debug = True,
)