Inici del render

This commit is contained in:
Guillem Hernandez Sola
2026-04-10 18:14:54 +02:00
parent f92ea8410b
commit 458915278e
3 changed files with 484 additions and 57 deletions

381
manga-renderer.py Normal file
View File

@@ -0,0 +1,381 @@
import re
import json
import cv2
import numpy as np
from PIL import Image, ImageDraw, ImageFont
import os
# ─────────────────────────────────────────────
# CONFIG
# ─────────────────────────────────────────────
INPUT_IMAGE = "page.png"
OUTPUT_IMAGE = "page_translated.png"
TRANSLATIONS_FILE = "output.txt"
BUBBLES_FILE = "bubbles.json"
FONT_PATH = "font.ttf"
FONT_FALLBACK = "/System/Library/Fonts/Helvetica.ttc"
FONT_COLOR = (0, 0, 0)
BUBBLE_FILL = (255, 255, 255)
# ─────────────────────────────────────────────
# STEP 1: PARSE output.txt
# Robust parser: always takes the LAST
# whitespace-separated column as translation.
# ─────────────────────────────────────────────
def parse_translations(filepath):
"""
Parses output.txt and returns {bubble_id: translated_text}.
Strategy: split each #N line on 2+ consecutive spaces,
then always take the LAST token as the translation.
This is robust even when original or translated text
contains internal spaces.
Args:
filepath : Path to output.txt
Returns:
Dict {1: "LA NOIA ESTÀ IL·LESA!", ...}
"""
translations = {}
with open(filepath, "r", encoding="utf-8") as f:
for line in f:
line = line.rstrip("\n")
# Must start with #N
if not re.match(r"^#\d+", line.strip()):
continue
# Split on 2+ spaces → [bubble_id_col, original_col, translated_col]
parts = re.split(r" {2,}", line.strip())
if len(parts) < 3:
continue
bubble_id = int(re.sub(r"[^0-9]", "", parts[0]))
translated = parts[-1].strip() # always last column
translations[bubble_id] = translated
print(f" ✅ Parsed {len(translations)} translation(s) from {filepath}")
for bid, text in sorted(translations.items()):
print(f" #{bid}: {text}")
return translations
# ─────────────────────────────────────────────
# STEP 2: LOAD BUBBLE BOXES from bubbles.json
# These were saved by manga-translator.py
# and are guaranteed to match the clusters.
# ─────────────────────────────────────────────
def load_bubble_boxes(filepath):
"""
Loads bubble bounding boxes from bubbles.json.
Expected format:
{
"1": {"x": 120, "y": 45, "w": 180, "h": 210},
"2": { ... },
...
}
Args:
filepath : Path to bubbles.json
Returns:
Dict {bubble_id (int): (x, y, w, h)}
"""
with open(filepath, "r", encoding="utf-8") as f:
raw = json.load(f)
boxes = {}
for key, val in raw.items():
bubble_id = int(key)
boxes[bubble_id] = (val["x"], val["y"], val["w"], val["h"])
print(f" ✅ Loaded {len(boxes)} bubble box(es) from {filepath}")
for bid, (x, y, w, h) in sorted(boxes.items()):
print(f" #{bid}: ({x},{y}) {w}×{h}px")
return boxes
# ─────────────────────────────────────────────
# STEP 3: ERASE BUBBLE CONTENT
# Fills a rectangular region with white.
# Uses a slightly inset rect to preserve
# the bubble border.
# ─────────────────────────────────────────────
def erase_bubble_rect(image, x, y, w, h, padding=6):
"""
Fills the interior of a bounding box with white,
leaving a border of `padding` pixels intact.
Args:
image : BGR numpy array (modified in place)
x,y,w,h : Bounding box
padding : Pixels to leave as border (default: 6)
"""
x1 = max(0, x + padding)
y1 = max(0, y + padding)
x2 = min(image.shape[1], x + w - padding)
y2 = min(image.shape[0], y + h - padding)
if x2 > x1 and y2 > y1:
image[y1:y2, x1:x2] = 255
# ─────────────────────────────────────────────
# STEP 4: FIT FONT SIZE
# Finds the largest font size where the text
# fits inside (max_w × max_h) with word wrap.
# ─────────────────────────────────────────────
def fit_font_size(draw, text, max_w, max_h, font_path,
min_size=8, max_size=48):
"""
Binary-searches for the largest font size where
word-wrapped text fits within the given box.
Args:
draw : PIL ImageDraw instance
text : Text string to fit
max_w : Available width in pixels
max_h : Available height in pixels
font_path : Path to .ttf font (or None for default)
min_size : Smallest font size to try (default: 8)
max_size : Largest font size to try (default: 48)
Returns:
(font, list_of_wrapped_lines)
"""
best_font = None
best_lines = [text]
for size in range(max_size, min_size - 1, -1):
try:
font = ImageFont.truetype(font_path, size) if font_path else ImageFont.load_default()
except Exception:
font = ImageFont.load_default()
# Word-wrap
words = text.split()
lines = []
current = ""
for word in words:
test = (current + " " + word).strip()
bbox = draw.textbbox((0, 0), test, font=font)
if (bbox[2] - bbox[0]) <= max_w:
current = test
else:
if current:
lines.append(current)
current = word
if current:
lines.append(current)
# Measure total block height
lh_bbox = draw.textbbox((0, 0), "Ay", font=font)
line_h = (lh_bbox[3] - lh_bbox[1]) + 3
total_h = line_h * len(lines)
if total_h <= max_h:
best_font = font
best_lines = lines
break
if best_font is None:
best_font = ImageFont.load_default()
return best_font, best_lines
# ─────────────────────────────────────────────
# STEP 5: RENDER TEXT INTO BUBBLE
# Draws translated text centered inside
# the bubble bounding box.
# ─────────────────────────────────────────────
def render_text_in_bubble(pil_image, x, y, w, h, text,
font_path, padding=12,
font_color=(0, 0, 0)):
"""
Renders text centered (horizontally + vertically)
inside a bubble bounding box.
Args:
pil_image : PIL Image (modified in place)
x,y,w,h : Bubble bounding box
text : Translated text to render
font_path : Path to .ttf font (or None)
padding : Inner padding in pixels (default: 12)
font_color : RGB color tuple (default: black)
"""
draw = ImageDraw.Draw(pil_image)
inner_w = max(1, w - padding * 2)
inner_h = max(1, h - padding * 2)
font, lines = fit_font_size(draw, text, inner_w, inner_h, font_path)
lh_bbox = draw.textbbox((0, 0), "Ay", font=font)
line_h = (lh_bbox[3] - lh_bbox[1]) + 3
total_h = line_h * len(lines)
start_y = y + padding + max(0, (inner_h - total_h) // 2)
for line in lines:
lb = draw.textbbox((0, 0), line, font=font)
line_w = lb[2] - lb[0]
start_x = x + padding + max(0, (inner_w - line_w) // 2)
draw.text((start_x, start_y), line, font=font, fill=font_color)
start_y += line_h
# ─────────────────────────────────────────────
# RESOLVE FONT
# ─────────────────────────────────────────────
def resolve_font(font_path, fallback):
if font_path and os.path.exists(font_path):
print(f" ✅ Using font: {font_path}")
return font_path
if fallback and os.path.exists(fallback):
print(f" ⚠️ '{font_path}' not found → fallback: {fallback}")
return fallback
print(" ⚠️ No font found. Using PIL default.")
return None
# ─────────────────────────────────────────────
# MAIN RENDERER
# ─────────────────────────────────────────────
def render_translated_page(
input_image = INPUT_IMAGE,
output_image = OUTPUT_IMAGE,
translations_file = TRANSLATIONS_FILE,
bubbles_file = BUBBLES_FILE,
font_path = FONT_PATH,
font_fallback = FONT_FALLBACK,
font_color = FONT_COLOR,
erase_padding = 6,
text_padding = 12,
debug = False,
):
"""
Full rendering pipeline:
1. Parse translations from output.txt
2. Load bubble boxes from bubbles.json
3. Load original manga page
4. Erase original text from each bubble
5. Render translated text into each bubble
6. Save output image
Args:
input_image : Source manga page (default: 'page.png')
output_image : Output path (default: 'page_translated.png')
translations_file : Path to output.txt (default: 'output.txt')
bubbles_file : Path to bubbles.json (default: 'bubbles.json')
font_path : Primary .ttf font path
font_fallback : Fallback font path
font_color : RGB text color (default: black)
erase_padding : Border px when erasing (default: 6)
text_padding : Inner padding for text (default: 12)
debug : Save debug_render.png (default: False)
"""
print("=" * 55)
print(" MANGA TRANSLATOR — RENDERER")
print("=" * 55)
# ── 1. Parse translations ─────────────────────────────────────────────────
print("\n📄 Parsing translations...")
translations = parse_translations(translations_file)
if not translations:
print("❌ No translations found. Aborting.")
return
# ── 2. Load bubble boxes ──────────────────────────────────────────────────
print(f"\n📦 Loading bubble boxes from {bubbles_file}...")
bubble_boxes = load_bubble_boxes(bubbles_file)
if not bubble_boxes:
print("❌ No bubble boxes found. Re-run manga-translator.py first.")
return
# ── 3. Load image ─────────────────────────────────────────────────────────
print(f"\n🖼️ Loading image: {input_image}")
cv_image = cv2.imread(input_image)
if cv_image is None:
print(f"❌ Could not load: {input_image}")
return
print(f" Image size: {cv_image.shape[1]}×{cv_image.shape[0]}px")
# ── 4. Erase original text ────────────────────────────────────────────────
print("\n🧹 Erasing original bubble text...")
for bubble_id in sorted(translations.keys()):
if bubble_id not in bubble_boxes:
print(f" ⚠️ #{bubble_id}: no box in bubbles.json, skipping")
continue
x, y, w, h = bubble_boxes[bubble_id]
erase_bubble_rect(cv_image, x, y, w, h, padding=erase_padding)
print(f" Erased #{bubble_id} at ({x},{y}) {w}×{h}px")
# ── 5. Convert to PIL ─────────────────────────────────────────────────────
pil_image = Image.fromarray(cv2.cvtColor(cv_image, cv2.COLOR_BGR2RGB))
# ── 6. Resolve font ───────────────────────────────────────────────────────
print("\n🔤 Resolving font...")
resolved_font = resolve_font(font_path, font_fallback)
# ── 7. Render translated text ─────────────────────────────────────────────
print("\n✍️ Rendering translated text...")
for bubble_id, text in sorted(translations.items()):
if bubble_id not in bubble_boxes:
continue
x, y, w, h = bubble_boxes[bubble_id]
render_text_in_bubble(
pil_image, x, y, w, h, text,
font_path = resolved_font,
padding = text_padding,
font_color = font_color,
)
print(f" #{bubble_id}: '{text}' → ({x},{y}) {w}×{h}px")
# ── 8. Debug overlay ──────────────────────────────────────────────────────
if debug:
dbg = pil_image.copy()
dbg_draw = ImageDraw.Draw(dbg)
for bubble_id, (x, y, w, h) in sorted(bubble_boxes.items()):
dbg_draw.rectangle([x, y, x + w, y + h], outline=(255, 0, 0), width=2)
dbg_draw.text((x + 4, y + 4), f"#{bubble_id}", fill=(255, 0, 0))
dbg.save("debug_render.png")
print("\n 🐛 Debug render saved → debug_render.png")
# ── 9. Save output ────────────────────────────────────────────────────────
print(f"\n💾 Saving → {output_image}")
pil_image.save(output_image, "PNG")
print(f" ✅ Done! Open: {output_image}")
print("=" * 55)
# ─────────────────────────────────────────────
# ENTRY POINT
# ─────────────────────────────────────────────
if __name__ == "__main__":
render_translated_page(
input_image = "page.png",
output_image = "page_translated.png",
translations_file = "output.txt",
bubbles_file = "bubbles.json",
font_path = "font.ttf",
font_fallback = "/System/Library/Fonts/Helvetica.ttc",
font_color = (0, 0, 0),
erase_padding = 6,
text_padding = 12,
debug = True,
)