Files
manga-translator/manga-renderer.py
Guillem Hernandez Sola 555892348f Added new
2026-04-11 14:00:07 +02:00

352 lines
13 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
import re
import json
import cv2
import numpy as np
from PIL import Image, ImageDraw, ImageFont
import os
# ─────────────────────────────────────────────
# CONFIG
# ─────────────────────────────────────────────
INPUT_IMAGE = "page.png"
OUTPUT_IMAGE = "page_translated.png"
TRANSLATIONS_FILE = "output.txt"
BUBBLES_FILE = "bubbles.json"
FONT_PATH = "font.ttf"
FONT_FALLBACK = "/System/Library/Fonts/Helvetica.ttc"
FONT_COLOR = (0, 0, 0)
# ─────────────────────────────────────────────
# PARSE output.txt
# ─────────────────────────────────────────────
def parse_translations(filepath):
"""
Parses output.txt → {bubble_id: translated_text}.
Only bubbles present in the file are returned.
Absent IDs are left completely untouched on the page.
"""
translations = {}
with open(filepath, "r", encoding="utf-8") as f:
for line in f:
line = line.rstrip("\n")
if not re.match(r"^\s*#\d+", line):
continue
parts = re.split(r" {2,}", line.strip())
if len(parts) < 3:
continue
bubble_id = int(re.sub(r"[^0-9]", "", parts[0]))
translated = parts[-1].strip()
if translated.startswith("["):
continue
translations[bubble_id] = translated
print(f"{len(translations)} bubble(s) to translate: "
f"{sorted(translations.keys())}")
for bid, text in sorted(translations.items()):
print(f" #{bid}: {text}")
return translations
# ─────────────────────────────────────────────
# LOAD bubbles.json
# ─────────────────────────────────────────────
def load_bubble_boxes(filepath):
with open(filepath, "r", encoding="utf-8") as f:
raw = json.load(f)
boxes = {int(k): v for k, v in raw.items()}
print(f" ✅ Loaded {len(boxes)} bubble(s)")
for bid, val in sorted(boxes.items()):
print(f" #{bid}: ({val['x']},{val['y']}) "
f"{val['w']}×{val['h']}px")
return boxes
# ─────────────────────────────────────────────
# SAMPLE BACKGROUND COLOR
# ─────────────────────────────────────────────
def sample_bubble_background(cv_image, bubble_data):
"""
Samples the dominant background color inside the bbox
by averaging the brightest 10% of pixels.
Returns (B, G, R).
"""
x = max(0, bubble_data["x"])
y = max(0, bubble_data["y"])
x2 = min(cv_image.shape[1], x + bubble_data["w"])
y2 = min(cv_image.shape[0], y + bubble_data["h"])
region = cv_image[y:y2, x:x2]
if region.size == 0:
return (255, 255, 255)
gray = cv2.cvtColor(region, cv2.COLOR_BGR2GRAY)
threshold = np.percentile(gray, 90)
bg_mask = gray >= threshold
if not np.any(bg_mask):
return (255, 255, 255)
return tuple(int(c) for c in region[bg_mask].mean(axis=0))
# ─────────────────────────────────────────────
# ERASE ORIGINAL TEXT
# Fills the tight OCR bbox with the sampled
# background color. No extra expansion —
# the bbox from bubbles.json is already the
# exact size of the red squares.
# ─────────────────────────────────────────────
def erase_bubble_text(cv_image, bubble_data,
bg_color=(255, 255, 255)):
"""
Fills the bubble bounding box with bg_color.
Args:
cv_image : BGR numpy array (modified in place)
bubble_data : Dict with 'x','y','w','h'
bg_color : (B,G,R) fill color
"""
img_h, img_w = cv_image.shape[:2]
x = max(0, bubble_data["x"])
y = max(0, bubble_data["y"])
x2 = min(img_w, bubble_data["x"] + bubble_data["w"])
y2 = min(img_h, bubble_data["y"] + bubble_data["h"])
cv_image[y:y2, x:x2] = list(bg_color)
# ─────────────────────────────────────────────
# FIT FONT SIZE
# ─────────────────────────────────────────────
def fit_font_size(draw, text, max_w, max_h, font_path,
min_size=7, max_size=48):
"""
Finds the largest font size where word-wrapped text
fits inside (max_w × max_h).
"""
best_font = None
best_lines = [text]
for size in range(max_size, min_size - 1, -1):
try:
font = (ImageFont.truetype(font_path, size)
if font_path else ImageFont.load_default())
except Exception:
font = ImageFont.load_default()
words, lines, current = text.split(), [], ""
for word in words:
test = (current + " " + word).strip()
bb = draw.textbbox((0, 0), test, font=font)
if (bb[2] - bb[0]) <= max_w:
current = test
else:
if current:
lines.append(current)
current = word
if current:
lines.append(current)
lh = draw.textbbox((0, 0), "Ay", font=font)
line_h = (lh[3] - lh[1]) + 2
if line_h * len(lines) <= max_h:
best_font = font
best_lines = lines
break
return best_font or ImageFont.load_default(), best_lines
# ─────────────────────────────────────────────
# RENDER TEXT INTO BUBBLE
# ─────────────────────────────────────────────
def render_text_in_bubble(pil_image, bubble_data, text,
font_path, padding=8,
font_color=(0, 0, 0)):
"""
Renders translated text centered inside the tight bbox.
Font auto-sizes to fill the same w×h the original occupied.
"""
x, y = bubble_data["x"], bubble_data["y"]
w, h = bubble_data["w"], bubble_data["h"]
draw = ImageDraw.Draw(pil_image)
inner_w = max(1, w - padding * 2)
inner_h = max(1, h - padding * 2)
font, lines = fit_font_size(draw, text, inner_w, inner_h,
font_path)
lh_bb = draw.textbbox((0, 0), "Ay", font=font)
line_h = (lh_bb[3] - lh_bb[1]) + 2
total_h = line_h * len(lines)
start_y = y + padding + max(0, (inner_h - total_h) // 2)
for line in lines:
lb = draw.textbbox((0, 0), line, font=font)
line_w = lb[2] - lb[0]
start_x = x + padding + max(0, (inner_w - line_w) // 2)
draw.text((start_x, start_y), line,
font=font, fill=font_color)
start_y += line_h
# ─────────────────────────────────────────────
# RESOLVE FONT
# ─────────────────────────────────────────────
def resolve_font(font_path, fallback):
if font_path and os.path.exists(font_path):
print(f" ✅ Using font: {font_path}")
return font_path
if fallback and os.path.exists(fallback):
print(f" ⚠️ Fallback: {fallback}")
return fallback
print(" ⚠️ No font found. Using PIL default.")
return None
# ─────────────────────────────────────────────
# MAIN RENDERER
# ─────────────────────────────────────────────
def render_translated_page(
input_image = INPUT_IMAGE,
output_image = OUTPUT_IMAGE,
translations_file = TRANSLATIONS_FILE,
bubbles_file = BUBBLES_FILE,
font_path = FONT_PATH,
font_fallback = FONT_FALLBACK,
font_color = FONT_COLOR,
text_padding = 8,
debug = False,
):
"""
Pipeline:
1. Parse translations (only present IDs processed)
2. Load bubble boxes from bubbles.json
3. Cross-check IDs — absent ones left untouched
4. Sample background color per bubble
5. Erase original text (fill tight bbox)
6. Render translated text sized to fit the bbox
7. Save output
"""
print("=" * 55)
print(" MANGA TRANSLATOR — RENDERER")
print("=" * 55)
print("\n📄 Parsing translations...")
translations = parse_translations(translations_file)
if not translations:
print("❌ No translations found. Aborting.")
return
print(f"\n📦 Loading bubble data...")
bubble_boxes = load_bubble_boxes(bubbles_file)
if not bubble_boxes:
print("❌ No bubble data. Re-run manga-translator.py.")
return
translate_ids = set(translations.keys())
box_ids = set(bubble_boxes.keys())
to_process = sorted(translate_ids & box_ids)
untouched = sorted(box_ids - translate_ids)
missing = sorted(translate_ids - box_ids)
print(f"\n🔗 To process : {to_process}")
print(f" Untouched : {untouched}")
if missing:
print(f" ⚠️ In output.txt but no box: {missing}")
if not to_process:
print("❌ No matching IDs. Aborting.")
return
print(f"\n🖼️ Loading: {input_image}")
cv_image = cv2.imread(input_image)
if cv_image is None:
print(f"❌ Could not load: {input_image}")
return
print(f" {cv_image.shape[1]}×{cv_image.shape[0]}px")
# Sample backgrounds BEFORE erasing
print("\n🎨 Sampling backgrounds...")
bg_colors = {}
for bid in to_process:
bg_bgr = sample_bubble_background(
cv_image, bubble_boxes[bid])
bg_colors[bid] = bg_bgr
bg_rgb = (bg_bgr[2], bg_bgr[1], bg_bgr[0])
brightness = sum(bg_rgb) / 3
ink = "black" if brightness > 128 else "white"
print(f" #{bid}: RGB{bg_rgb} ink→{ink}")
# Erase
print("\n🧹 Erasing original text...")
for bid in to_process:
bd = bubble_boxes[bid]
erase_bubble_text(cv_image, bd, bg_color=bg_colors[bid])
print(f" ✅ #{bid} ({bd['w']}×{bd['h']}px)")
pil_image = Image.fromarray(
cv2.cvtColor(cv_image, cv2.COLOR_BGR2RGB))
print("\n🔤 Resolving font...")
resolved_font = resolve_font(font_path, font_fallback)
# Render
print("\n✍️ Rendering...")
for bid in to_process:
text = translations[bid]
bd = bubble_boxes[bid]
bg_rgb = (bg_colors[bid][2],
bg_colors[bid][1],
bg_colors[bid][0])
brightness = sum(bg_rgb) / 3
txt_color = (0, 0, 0) if brightness > 128 \
else (255, 255, 255)
render_text_in_bubble(
pil_image, bd, text,
font_path = resolved_font,
padding = text_padding,
font_color = txt_color,
)
print(f" ✅ #{bid}: '{text}' "
f"({bd['x']},{bd['y']}) {bd['w']}×{bd['h']}px")
if debug:
dbg = pil_image.copy()
dbg_draw = ImageDraw.Draw(dbg)
for bid, bd in sorted(bubble_boxes.items()):
color = (0, 200, 0) if bid in translate_ids \
else (160, 160, 160)
dbg_draw.rectangle(
[bd["x"], bd["y"],
bd["x"] + bd["w"], bd["y"] + bd["h"]],
outline=color, width=2)
dbg_draw.text((bd["x"] + 3, bd["y"] + 3),
f"#{bid}", fill=color)
dbg.save("debug_render.png")
print("\n 🐛 debug_render.png saved "
"(green=translated, grey=untouched)")
print(f"\n💾 Saving → {output_image}")
pil_image.save(output_image, "PNG")
print(" ✅ Done!")
print("=" * 55)
# ─────────────────────────────────────────────
# ENTRY POINT
# ─────────────────────────────────────────────
if __name__ == "__main__":
render_translated_page(
input_image = "page.png",
output_image = "page_translated.png",
translations_file = "output.txt",
bubbles_file = "bubbles.json",
font_path = "font.ttf",
font_fallback = "/System/Library/Fonts/Helvetica.ttc",
font_color = (0, 0, 0),
text_padding = 8,
debug = True,
)