Added new rendered
This commit is contained in:
@@ -3,13 +3,16 @@
|
|||||||
"""
|
"""
|
||||||
manga-renderer.py
|
manga-renderer.py
|
||||||
|
|
||||||
Inputs: 001.jpg + bubbles.json + output.txt
|
Inputs: 001.jpg + bubbles.json + output_001.txt
|
||||||
Output: translated_page.png
|
Output: translated_page_001.png
|
||||||
|
|
||||||
Strategy:
|
Strategy:
|
||||||
1. For every bubble, white-fill all its OCR quads (erases original text cleanly)
|
1. For every bubble, white-fill all its OCR quads (erases original text cleanly).
|
||||||
2. Render the translated text centered inside the bubble bounding box
|
2. Detect the original font size from the OCR bounding boxes.
|
||||||
3. Bubbles in SKIP_BUBBLE_IDS are erased but NOT re-rendered (left blank)
|
3. Dynamically wrap and scale down the translated text if it exceeds the bubble dimensions.
|
||||||
|
4. Render the translated text centered inside the bubble bounding box.
|
||||||
|
5. Uses uniform line heights to prevent accent collisions.
|
||||||
|
6. Adds a white stroke to the text to cover any residual original characters.
|
||||||
"""
|
"""
|
||||||
|
|
||||||
import json
|
import json
|
||||||
@@ -17,43 +20,40 @@ import textwrap
|
|||||||
import cv2
|
import cv2
|
||||||
import numpy as np
|
import numpy as np
|
||||||
from PIL import Image, ImageDraw, ImageFont
|
from PIL import Image, ImageDraw, ImageFont
|
||||||
from typing import Dict, List, Tuple, Optional, Set
|
from typing import Dict, List, Tuple, Optional, Set, Any
|
||||||
|
|
||||||
# ============================================================
|
# ============================================================
|
||||||
# CONFIG — edit these paths to match your setup
|
# CONFIG — edit these paths to match your setup
|
||||||
# ============================================================
|
# ============================================================
|
||||||
IMAGE_PATH = "004.png"
|
IMAGE_PATH = "003.jpg"
|
||||||
BUBBLES_PATH = "bubbles.json"
|
BUBBLES_PATH = "bubbles.json"
|
||||||
TRANSLATIONS_PATH = "output_004.txt"
|
TRANSLATIONS_PATH = "output_003.txt"
|
||||||
OUTPUT_PATH = "translated_page_004.png"
|
OUTPUT_PATH = "translated_page_003.png"
|
||||||
|
|
||||||
# Font candidates — first one that loads wins
|
# Font candidates — first one that loads wins
|
||||||
FONT_CANDIDATES = [
|
FONT_CANDIDATES = [
|
||||||
"fonts/ComicNeue-Bold.ttf",
|
"fonts/ComicNeue-Bold.ttf",
|
||||||
|
# Mac fallbacks
|
||||||
|
"/System/Library/Fonts/Supplemental/Comic Sans MS Bold.ttf",
|
||||||
|
"/System/Library/Fonts/Supplemental/Arial Bold.ttf",
|
||||||
|
# Windows fallbacks
|
||||||
|
"C:\\Windows\\Fonts\\comicbd.ttf",
|
||||||
|
"C:\\Windows\\Fonts\\arialbd.ttf",
|
||||||
|
# Linux fallbacks
|
||||||
|
"/usr/share/fonts/truetype/liberation/LiberationSans-Bold.ttf",
|
||||||
]
|
]
|
||||||
|
|
||||||
FONT_SIZE = 24
|
DEFAULT_FONT_SIZE = 24
|
||||||
MIN_FONT_SIZE = 18
|
MIN_FONT_SIZE = 12
|
||||||
QUAD_PAD = 4 # extra pixels added around each quad before white-fill
|
QUAD_PAD = 4 # extra pixels added around each quad before white-fill
|
||||||
|
|
||||||
# ============================================================
|
# ============================================================
|
||||||
# SKIP LIST
|
# SKIP LIST
|
||||||
# ── Add any bubble IDs you do NOT want rendered here.
|
|
||||||
# ── The quads will still be erased (white-filled) but no
|
|
||||||
# ── translated text will be drawn inside them.
|
|
||||||
# ──
|
|
||||||
# ── Examples of why you'd skip a bubble:
|
|
||||||
# ── • Sound effects (BURP, BAM, POW …)
|
|
||||||
# ── • Untranslatable single characters
|
|
||||||
# ── • Bubbles with bad OCR you want to fix manually later
|
|
||||||
# ── • Narrator boxes you want to leave in the source language
|
|
||||||
# ============================================================
|
# ============================================================
|
||||||
SKIP_BUBBLE_IDS: Set[int] = {
|
SKIP_BUBBLE_IDS: Set[int] = {
|
||||||
# 8, # BURP BURP — sound effect
|
# Add any bubble IDs you do NOT want rendered here.
|
||||||
# 2, # example: bad OCR, fix manually
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
# ============================================================
|
# ============================================================
|
||||||
# FONT LOADER
|
# FONT LOADER
|
||||||
# ============================================================
|
# ============================================================
|
||||||
@@ -69,20 +69,17 @@ def load_font(path: str, size: int) -> Optional[ImageFont.FreeTypeFont]:
|
|||||||
continue
|
continue
|
||||||
return None
|
return None
|
||||||
|
|
||||||
|
def resolve_font_path() -> str:
|
||||||
def resolve_font() -> Tuple[str, ImageFont.FreeTypeFont]:
|
"""Return the path for the first working candidate."""
|
||||||
"""Return (path, font) for the first working candidate."""
|
|
||||||
for candidate in FONT_CANDIDATES:
|
for candidate in FONT_CANDIDATES:
|
||||||
font = load_font(candidate, FONT_SIZE)
|
if load_font(candidate, DEFAULT_FONT_SIZE) is not None:
|
||||||
if font is not None:
|
|
||||||
print(f" ✅ Font: {candidate}")
|
print(f" ✅ Font: {candidate}")
|
||||||
return candidate, font
|
return candidate
|
||||||
print(" ⚠️ No TrueType font found — using Pillow bitmap fallback")
|
print(" ⚠️ No TrueType font found — using Pillow bitmap fallback")
|
||||||
return "", ImageFont.load_default()
|
return ""
|
||||||
|
|
||||||
|
|
||||||
# ============================================================
|
# ============================================================
|
||||||
# PARSE output.txt → {bid: translated_string}
|
# PARSERS
|
||||||
# ============================================================
|
# ============================================================
|
||||||
def parse_translations(filepath: str) -> Dict[int, str]:
|
def parse_translations(filepath: str) -> Dict[int, str]:
|
||||||
"""
|
"""
|
||||||
@@ -107,41 +104,21 @@ def parse_translations(filepath: str) -> Dict[int, str]:
|
|||||||
continue
|
continue
|
||||||
return translations
|
return translations
|
||||||
|
|
||||||
|
|
||||||
# ============================================================
|
|
||||||
# PARSE bubbles.json → bubble_boxes, quads_per_bubble
|
|
||||||
# ============================================================
|
|
||||||
def parse_bubbles(filepath: str):
|
def parse_bubbles(filepath: str):
|
||||||
"""
|
"""
|
||||||
Returns:
|
Returns the full JSON data.
|
||||||
bubble_boxes : {bid: (x1, y1, x2, y2)}
|
|
||||||
quads_per_bubble : {bid: [ [[x,y],[x,y],[x,y],[x,y]], ... ]}
|
|
||||||
"""
|
"""
|
||||||
with open(filepath, "r", encoding="utf-8") as f:
|
with open(filepath, "r", encoding="utf-8") as f:
|
||||||
data = json.load(f)
|
data = json.load(f)
|
||||||
|
return data
|
||||||
bubble_boxes = {}
|
|
||||||
quads_per_bubble = {}
|
|
||||||
|
|
||||||
for key, val in data.items():
|
|
||||||
bid = int(key)
|
|
||||||
|
|
||||||
x1 = val["x"]; y1 = val["y"]
|
|
||||||
x2 = x1 + val["w"]; y2 = y1 + val["h"]
|
|
||||||
bubble_boxes[bid] = (x1, y1, x2, y2)
|
|
||||||
|
|
||||||
quads_per_bubble[bid] = val.get("quads", [])
|
|
||||||
|
|
||||||
return bubble_boxes, quads_per_bubble
|
|
||||||
|
|
||||||
|
|
||||||
# ============================================================
|
# ============================================================
|
||||||
# ERASE — white-fill every OCR quad (with small padding)
|
# ERASE — white-fill every OCR quad (with small padding)
|
||||||
# ============================================================
|
# ============================================================
|
||||||
def erase_quads(
|
def erase_quads(
|
||||||
image_bgr,
|
image_bgr,
|
||||||
quads_per_bubble: Dict[int, List],
|
bubbles_data: Dict[str, dict],
|
||||||
translations: Dict[int, str], # ← NEW: only erase what we'll render
|
translations: Dict[int, str],
|
||||||
skip_ids: Set[int],
|
skip_ids: Set[int],
|
||||||
pad: int = QUAD_PAD
|
pad: int = QUAD_PAD
|
||||||
):
|
):
|
||||||
@@ -149,7 +126,6 @@ def erase_quads(
|
|||||||
White-fills OCR quads ONLY for bubbles that:
|
White-fills OCR quads ONLY for bubbles that:
|
||||||
- have a translation in output.txt AND
|
- have a translation in output.txt AND
|
||||||
- are NOT in SKIP_BUBBLE_IDS
|
- are NOT in SKIP_BUBBLE_IDS
|
||||||
Everything else is left completely untouched.
|
|
||||||
"""
|
"""
|
||||||
ih, iw = image_bgr.shape[:2]
|
ih, iw = image_bgr.shape[:2]
|
||||||
result = image_bgr.copy()
|
result = image_bgr.copy()
|
||||||
@@ -157,15 +133,11 @@ def erase_quads(
|
|||||||
erased_count = 0
|
erased_count = 0
|
||||||
skipped_count = 0
|
skipped_count = 0
|
||||||
|
|
||||||
for bid, quads in quads_per_bubble.items():
|
for bid_str, val in bubbles_data.items():
|
||||||
|
bid = int(bid_str)
|
||||||
|
quads = val.get("quads", [])
|
||||||
|
|
||||||
# ignore if explicitly skipped
|
if bid in skip_ids or bid not in translations:
|
||||||
if bid in skip_ids:
|
|
||||||
skipped_count += 1
|
|
||||||
continue
|
|
||||||
|
|
||||||
# ignore if no translation exists (deleted from output.txt)
|
|
||||||
if bid not in translations:
|
|
||||||
skipped_count += 1
|
skipped_count += 1
|
||||||
continue
|
continue
|
||||||
|
|
||||||
@@ -186,227 +158,206 @@ def erase_quads(
|
|||||||
print(f" Ignored: {skipped_count} bubbles (no translation or in skip list)")
|
print(f" Ignored: {skipped_count} bubbles (no translation or in skip list)")
|
||||||
return result
|
return result
|
||||||
|
|
||||||
|
# ============================================================
|
||||||
|
# DYNAMIC TEXT FITTING
|
||||||
|
# ============================================================
|
||||||
|
def get_original_font_size(bubble_data: dict, fallback_size: int = DEFAULT_FONT_SIZE) -> int:
|
||||||
|
"""Calculates the original font size based on the OCR bounding boxes."""
|
||||||
|
line_bboxes = bubble_data.get("line_bboxes", [])
|
||||||
|
if not line_bboxes:
|
||||||
|
return fallback_size
|
||||||
|
|
||||||
# ============================================================
|
heights = [box["h"] for box in line_bboxes]
|
||||||
# FONT SIZING + TEXT WRAP
|
median_h = int(np.median(heights))
|
||||||
# ============================================================
|
|
||||||
def fit_text(
|
estimated_size = int(median_h * 0.85)
|
||||||
|
return max(MIN_FONT_SIZE, min(estimated_size, 60))
|
||||||
|
|
||||||
|
def fit_text_dynamically(
|
||||||
text: str,
|
text: str,
|
||||||
box_w: int,
|
|
||||||
box_h: int,
|
|
||||||
font_path: str,
|
font_path: str,
|
||||||
max_size: int = FONT_SIZE,
|
max_w: int,
|
||||||
min_size: int = MIN_FONT_SIZE
|
max_h: int,
|
||||||
) -> Tuple[int, ImageFont.FreeTypeFont, List[str]]:
|
target_font_size: int
|
||||||
|
) -> Tuple[List[str], Any, int, int]:
|
||||||
"""
|
"""
|
||||||
Returns (fitted_size, font, wrapped_lines) — largest size where
|
Wraps text and scales down font size if it exceeds the bubble dimensions.
|
||||||
the text block fits inside box_w × box_h.
|
Returns: (wrapped_lines, font_object, line_spacing, final_font_size)
|
||||||
"""
|
"""
|
||||||
for size in range(max_size, min_size - 1, -1):
|
font_size = target_font_size
|
||||||
font = load_font(font_path, size) if font_path else None
|
|
||||||
if font is None:
|
|
||||||
return min_size, ImageFont.load_default(), [text]
|
|
||||||
|
|
||||||
chars_per_line = max(1, int(box_w / (size * 0.62)))
|
if not font_path:
|
||||||
wrapped = textwrap.fill(text, width=chars_per_line)
|
font = ImageFont.load_default()
|
||||||
lines = wrapped.split("\n")
|
char_w = 6
|
||||||
total_h = (size + 8) * len(lines)
|
chars_per_line = max(1, int(max_w / char_w))
|
||||||
|
wrapped_lines = textwrap.wrap(text, width=chars_per_line)
|
||||||
|
return wrapped_lines, font, 4, 10
|
||||||
|
|
||||||
if total_h <= box_h - 8:
|
while font_size >= MIN_FONT_SIZE:
|
||||||
return size, font, lines
|
font = load_font(font_path, font_size)
|
||||||
|
|
||||||
# Nothing fit — use minimum size
|
|
||||||
font = load_font(font_path, min_size) if font_path else None
|
|
||||||
if font is None:
|
if font is None:
|
||||||
font = ImageFont.load_default()
|
font = ImageFont.load_default()
|
||||||
chars_per_line = max(1, int(box_w / (min_size * 0.62)))
|
return [text], font, 4, 10
|
||||||
lines = textwrap.fill(text, width=chars_per_line).split("\n")
|
|
||||||
return min_size, font, lines
|
|
||||||
|
|
||||||
|
char_bbox = font.getbbox("A")
|
||||||
|
char_w = (char_bbox[2] - char_bbox[0]) or 10
|
||||||
|
chars_per_line = max(1, int((max_w * 0.95) / char_w))
|
||||||
|
|
||||||
# ============================================================
|
wrapped_lines = textwrap.wrap(text, width=chars_per_line)
|
||||||
# COLOR HELPERS
|
|
||||||
# ============================================================
|
|
||||||
def sample_bg_color(
|
|
||||||
image_bgr,
|
|
||||||
x1: int, y1: int,
|
|
||||||
x2: int, y2: int
|
|
||||||
) -> Tuple[int, int, int]:
|
|
||||||
"""Sample four corners of a bubble to estimate background color (R, G, B)."""
|
|
||||||
ih, iw = image_bgr.shape[:2]
|
|
||||||
samples = []
|
|
||||||
for sx, sy in [(x1+4, y1+4), (x2-4, y1+4), (x1+4, y2-4), (x2-4, y2-4)]:
|
|
||||||
sx = max(0, min(iw-1, sx)); sy = max(0, min(ih-1, sy))
|
|
||||||
b, g, r = image_bgr[sy, sx]
|
|
||||||
samples.append((int(r), int(g), int(b)))
|
|
||||||
return (
|
|
||||||
int(np.median([s[0] for s in samples])),
|
|
||||||
int(np.median([s[1] for s in samples])),
|
|
||||||
int(np.median([s[2] for s in samples])),
|
|
||||||
)
|
|
||||||
|
|
||||||
|
# Use uniform font metrics for height instead of per-line bounding boxes
|
||||||
|
line_spacing = max(2, int(font_size * 0.15))
|
||||||
|
if hasattr(font, 'getmetrics'):
|
||||||
|
ascent, descent = font.getmetrics()
|
||||||
|
line_h = ascent + descent
|
||||||
|
else:
|
||||||
|
line_h = font_size
|
||||||
|
|
||||||
def pick_fg_color(bg: Tuple[int, int, int]) -> Tuple[int, int, int]:
|
total_h = (line_h * len(wrapped_lines)) + (line_spacing * max(0, len(wrapped_lines) - 1))
|
||||||
lum = 0.299 * bg[0] + 0.587 * bg[1] + 0.114 * bg[2]
|
|
||||||
return (0, 0, 0) if lum > 128 else (255, 255, 255)
|
|
||||||
|
|
||||||
|
max_line_w = 0
|
||||||
|
for line in wrapped_lines:
|
||||||
|
bbox = font.getbbox(line)
|
||||||
|
lw = bbox[2] - bbox[0]
|
||||||
|
max_line_w = max(max_line_w, lw)
|
||||||
|
|
||||||
def safe_textbbox(
|
if max_line_w <= max_w and total_h <= max_h:
|
||||||
draw, pos, text, font
|
return wrapped_lines, font, line_spacing, font_size
|
||||||
) -> Tuple[int, int, int, int]:
|
|
||||||
try:
|
|
||||||
return draw.textbbox(pos, text, font=font)
|
|
||||||
except Exception:
|
|
||||||
size = getattr(font, "size", 12)
|
|
||||||
return (
|
|
||||||
pos[0], pos[1],
|
|
||||||
pos[0] + int(len(text) * size * 0.6),
|
|
||||||
pos[1] + int(size * 1.2)
|
|
||||||
)
|
|
||||||
|
|
||||||
|
font_size -= 2
|
||||||
|
|
||||||
|
font = load_font(font_path, MIN_FONT_SIZE) or ImageFont.load_default()
|
||||||
|
char_bbox = font.getbbox("A") if hasattr(font, 'getbbox') else (0,0,6,10)
|
||||||
|
char_w = (char_bbox[2] - char_bbox[0]) or 6
|
||||||
|
chars_per_line = max(1, int(max_w / char_w))
|
||||||
|
wrapped_lines = textwrap.wrap(text, width=chars_per_line)
|
||||||
|
|
||||||
|
return wrapped_lines, font, max(2, int(MIN_FONT_SIZE * 0.15)), MIN_FONT_SIZE
|
||||||
|
|
||||||
# ============================================================
|
# ============================================================
|
||||||
# RENDER
|
# RENDER
|
||||||
# ============================================================
|
# ============================================================
|
||||||
def render_translations(
|
def render_text(
|
||||||
image_bgr,
|
image_bgr,
|
||||||
bubble_boxes: Dict[int, Tuple],
|
bubbles_data: Dict[str, dict],
|
||||||
translations: Dict[int, str],
|
translations: Dict[int, str],
|
||||||
skip_ids: Set[int],
|
|
||||||
font_path: str,
|
font_path: str,
|
||||||
font_size: int = FONT_SIZE,
|
skip_ids: Set[int]
|
||||||
bold_outline: bool = True,
|
|
||||||
auto_color: bool = True,
|
|
||||||
output_path: str = OUTPUT_PATH
|
|
||||||
):
|
):
|
||||||
|
"""
|
||||||
|
Draws the translated text centered in the line_union_bbox of each bubble.
|
||||||
|
Adds a white stroke (outline) to cover any residual original characters.
|
||||||
|
"""
|
||||||
image_rgb = cv2.cvtColor(image_bgr, cv2.COLOR_BGR2RGB)
|
image_rgb = cv2.cvtColor(image_bgr, cv2.COLOR_BGR2RGB)
|
||||||
pil_img = Image.fromarray(image_rgb)
|
pil_img = Image.fromarray(image_rgb)
|
||||||
draw = ImageDraw.Draw(pil_img)
|
draw = ImageDraw.Draw(pil_img)
|
||||||
|
|
||||||
rendered = 0
|
rendered_count = 0
|
||||||
skipped = 0
|
|
||||||
missing = 0
|
|
||||||
|
|
||||||
for bid, (x1, y1, x2, y2) in sorted(bubble_boxes.items()):
|
for bid_str, val in bubbles_data.items():
|
||||||
|
bid = int(bid_str)
|
||||||
|
|
||||||
# ── skip list check ────────────────────────────────────────
|
if bid in skip_ids or bid not in translations:
|
||||||
if bid in skip_ids:
|
|
||||||
print(f" ⏭️ Bubble #{bid:<3} — skipped (in SKIP_BUBBLE_IDS)")
|
|
||||||
skipped += 1
|
|
||||||
continue
|
continue
|
||||||
|
|
||||||
text = translations.get(bid, "").strip()
|
text = translations[bid]
|
||||||
if not text:
|
|
||||||
print(f" ⚠️ Bubble #{bid:<3} — no translation found, left blank")
|
union_box = val.get("line_union_bbox")
|
||||||
missing += 1
|
if not union_box:
|
||||||
|
union_box = val.get("text_bbox")
|
||||||
|
if not union_box:
|
||||||
continue
|
continue
|
||||||
|
|
||||||
box_w = x2 - x1
|
bx, by, bw, bh = union_box["x"], union_box["y"], union_box["w"], union_box["h"]
|
||||||
box_h = y2 - y1
|
|
||||||
if box_w < 10 or box_h < 10:
|
|
||||||
continue
|
|
||||||
|
|
||||||
# ── fit font + wrap ────────────────────────────────────────
|
pad_x = int(bw * 0.1)
|
||||||
size, font, lines = fit_text(
|
pad_y = int(bh * 0.1)
|
||||||
text, box_w, box_h, font_path, max_size=font_size
|
bx -= pad_x // 2
|
||||||
|
by -= pad_y // 2
|
||||||
|
bw += pad_x
|
||||||
|
bh += pad_y
|
||||||
|
|
||||||
|
target_size = get_original_font_size(val)
|
||||||
|
wrapped_lines, font, line_spacing, final_size = fit_text_dynamically(text, font_path, bw, bh, target_size)
|
||||||
|
|
||||||
|
# Use uniform typographic line height for rendering
|
||||||
|
if hasattr(font, 'getmetrics'):
|
||||||
|
ascent, descent = font.getmetrics()
|
||||||
|
line_h = ascent + descent
|
||||||
|
else:
|
||||||
|
line_h = final_size
|
||||||
|
|
||||||
|
total_text_height = (line_h * len(wrapped_lines)) + (line_spacing * max(0, len(wrapped_lines) - 1))
|
||||||
|
|
||||||
|
current_y = by + (bh - total_text_height) // 2
|
||||||
|
outline_thickness = max(2, int(final_size * 0.10))
|
||||||
|
|
||||||
|
for i, line in enumerate(wrapped_lines):
|
||||||
|
if hasattr(font, 'getbbox'):
|
||||||
|
bbox = font.getbbox(line)
|
||||||
|
lw = bbox[2] - bbox[0]
|
||||||
|
else:
|
||||||
|
lw = len(line) * 6
|
||||||
|
|
||||||
|
current_x = bx + (bw - lw) // 2
|
||||||
|
|
||||||
|
draw.text(
|
||||||
|
(current_x, current_y),
|
||||||
|
line,
|
||||||
|
fill=(0, 0, 0),
|
||||||
|
font=font,
|
||||||
|
stroke_width=outline_thickness,
|
||||||
|
stroke_fill=(255, 255, 255)
|
||||||
)
|
)
|
||||||
|
|
||||||
# ── colors ─────────────────────────────────────────────────
|
# Advance Y by the uniform line height + spacing
|
||||||
if auto_color:
|
current_y += line_h + line_spacing
|
||||||
bg = sample_bg_color(image_bgr, x1, y1, x2, y2)
|
|
||||||
fg = pick_fg_color(bg)
|
|
||||||
ol = (255, 255, 255) if fg == (0, 0, 0) else (0, 0, 0)
|
|
||||||
else:
|
|
||||||
fg, ol = (0, 0, 0), (255, 255, 255)
|
|
||||||
|
|
||||||
# ── vertical center ────────────────────────────────────────
|
rendered_count += 1
|
||||||
line_h = size + 8
|
|
||||||
total_h = line_h * len(lines)
|
|
||||||
y_cur = y1 + max(4, (box_h - total_h) // 2)
|
|
||||||
|
|
||||||
for line in lines:
|
|
||||||
bb = safe_textbbox(draw, (0, 0), line, font)
|
|
||||||
line_w = bb[2] - bb[0]
|
|
||||||
x_cur = x1 + max(2, (box_w - line_w) // 2)
|
|
||||||
|
|
||||||
if bold_outline:
|
|
||||||
for dx, dy in [(-1, 0), (1, 0), (0, -1), (0, 1)]:
|
|
||||||
try:
|
|
||||||
draw.text((x_cur + dx, y_cur + dy), line, font=font, fill=ol)
|
|
||||||
except Exception:
|
|
||||||
pass
|
|
||||||
|
|
||||||
try:
|
|
||||||
draw.text((x_cur, y_cur), line, font=font, fill=fg)
|
|
||||||
except Exception as e:
|
|
||||||
print(f" ❌ Draw error bubble #{bid}: {e}")
|
|
||||||
|
|
||||||
y_cur += line_h
|
|
||||||
|
|
||||||
print(f" ✅ Bubble #{bid:<3} — rendered ({len(lines)} lines, size {size}px)")
|
|
||||||
rendered += 1
|
|
||||||
|
|
||||||
pil_img.save(output_path)
|
|
||||||
|
|
||||||
print()
|
|
||||||
print(f"{'─'*50}")
|
|
||||||
print(f" Rendered : {rendered}")
|
|
||||||
print(f" Skipped : {skipped} (SKIP_BUBBLE_IDS)")
|
|
||||||
print(f" No text : {missing} (not in output.txt)")
|
|
||||||
print(f"{'─'*50}")
|
|
||||||
print(f"✅ Saved → {output_path}")
|
|
||||||
|
|
||||||
return pil_img
|
|
||||||
|
|
||||||
|
print(f" Rendered: {rendered_count} bubbles (with uniform line spacing & outlines)")
|
||||||
|
return cv2.cvtColor(np.array(pil_img), cv2.COLOR_RGB2BGR)
|
||||||
|
|
||||||
# ============================================================
|
# ============================================================
|
||||||
# MAIN
|
# MAIN
|
||||||
# ============================================================
|
# ============================================================
|
||||||
def main():
|
def main():
|
||||||
print(f"📖 Loading image : {IMAGE_PATH}")
|
print(f"Loading image: {IMAGE_PATH}")
|
||||||
image = cv2.imread(IMAGE_PATH)
|
image_bgr = cv2.imread(IMAGE_PATH)
|
||||||
if image is None:
|
if image_bgr is None:
|
||||||
print(f"❌ Cannot load: {IMAGE_PATH}"); return
|
print(f"❌ Error: Could not load {IMAGE_PATH}")
|
||||||
|
return
|
||||||
|
|
||||||
print(f"📦 Loading bubbles : {BUBBLES_PATH}")
|
print(f"Loading translations: {TRANSLATIONS_PATH}")
|
||||||
bubble_boxes, quads_per_bubble = parse_bubbles(BUBBLES_PATH)
|
|
||||||
print(f" {len(bubble_boxes)} bubbles | "
|
|
||||||
f"{sum(len(v) for v in quads_per_bubble.values())} quads total")
|
|
||||||
|
|
||||||
print(f"🌐 Loading translations : {TRANSLATIONS_PATH}")
|
|
||||||
translations = parse_translations(TRANSLATIONS_PATH)
|
translations = parse_translations(TRANSLATIONS_PATH)
|
||||||
print(f" {len(translations)} translations found")
|
|
||||||
|
|
||||||
if SKIP_BUBBLE_IDS:
|
print(f"Loading bubble data: {BUBBLES_PATH}")
|
||||||
print(f"⏭️ Skip list : bubbles {sorted(SKIP_BUBBLE_IDS)}")
|
bubbles_data = parse_bubbles(BUBBLES_PATH)
|
||||||
else:
|
|
||||||
print(f"⏭️ Skip list : (empty — all bubbles will be rendered)")
|
|
||||||
|
|
||||||
print("🔤 Resolving font...")
|
print("Resolving font...")
|
||||||
font_path, _ = resolve_font()
|
font_path = resolve_font_path()
|
||||||
|
|
||||||
print(f"🧹 Erasing original text (quad fill + pad={QUAD_PAD}px)...")
|
print("\n--- Step 1: Erasing original text ---")
|
||||||
clean_image = erase_quads(
|
erased_bgr = erase_quads(
|
||||||
image,
|
image_bgr=image_bgr,
|
||||||
quads_per_bubble,
|
bubbles_data=bubbles_data,
|
||||||
translations = translations, # ← pass translations here
|
translations=translations,
|
||||||
skip_ids = SKIP_BUBBLE_IDS,
|
skip_ids=SKIP_BUBBLE_IDS,
|
||||||
pad = QUAD_PAD
|
pad=QUAD_PAD
|
||||||
)
|
)
|
||||||
|
|
||||||
print("✍️ Rendering translated text...")
|
print("\n--- Step 2: Rendering translated text ---")
|
||||||
render_translations(
|
final_bgr = render_text(
|
||||||
image_bgr = clean_image,
|
image_bgr=erased_bgr,
|
||||||
bubble_boxes = bubble_boxes,
|
bubbles_data=bubbles_data,
|
||||||
translations = translations,
|
translations=translations,
|
||||||
skip_ids = SKIP_BUBBLE_IDS,
|
font_path=font_path,
|
||||||
font_path = font_path,
|
skip_ids=SKIP_BUBBLE_IDS
|
||||||
font_size = FONT_SIZE,
|
|
||||||
bold_outline = True,
|
|
||||||
auto_color = True,
|
|
||||||
output_path = OUTPUT_PATH
|
|
||||||
)
|
)
|
||||||
|
|
||||||
|
print(f"\nSaving final image to: {OUTPUT_PATH}")
|
||||||
|
cv2.imwrite(OUTPUT_PATH, final_bgr)
|
||||||
|
print("✅ Done!")
|
||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
main()
|
main()
|
||||||
@@ -301,6 +301,54 @@ def split_bubble_if_multiple_columns(indices, ocr, bid=None, use_aggressive_thre
|
|||||||
return None
|
return None
|
||||||
|
|
||||||
|
|
||||||
|
def split_bubble_if_multiple_rows(indices, ocr, bid=None):
|
||||||
|
"""
|
||||||
|
Splits a bubble horizontally (top/bottom) if there is a massive vertical gap
|
||||||
|
between text lines, indicating two separate bubbles were merged.
|
||||||
|
"""
|
||||||
|
if len(indices) < 2:
|
||||||
|
return None
|
||||||
|
|
||||||
|
boxes = [quad_bbox(ocr[i][0]) for i in indices]
|
||||||
|
# Sort by Y-coordinate (top to bottom)
|
||||||
|
sorted_items = sorted(zip(indices, boxes), key=lambda x: x[1][1])
|
||||||
|
|
||||||
|
gaps = []
|
||||||
|
current_max_y = sorted_items[0][1][3]
|
||||||
|
|
||||||
|
for i in range(1, len(sorted_items)):
|
||||||
|
idx, b = sorted_items[i]
|
||||||
|
y1 = b[1]
|
||||||
|
gap = y1 - current_max_y
|
||||||
|
gaps.append((i, gap, current_max_y, y1))
|
||||||
|
current_max_y = max(current_max_y, b[3])
|
||||||
|
|
||||||
|
if not gaps:
|
||||||
|
return None
|
||||||
|
|
||||||
|
# Find the largest vertical gap
|
||||||
|
max_gap_idx, max_gap_size, _, _ = max(gaps, key=lambda x: x[1])
|
||||||
|
|
||||||
|
# Calculate median line height to define what a "large" gap is
|
||||||
|
hs = [b[3] - b[1] for b in boxes]
|
||||||
|
med_h = float(np.median(hs)) if hs else 15.0
|
||||||
|
|
||||||
|
# If the vertical gap is more than 2.5x the height of a text line, it's a split!
|
||||||
|
threshold = med_h * 2.5
|
||||||
|
min_gap = 40.0 # Absolute minimum pixel gap to prevent micro-splits
|
||||||
|
|
||||||
|
if max_gap_size > threshold and max_gap_size > min_gap:
|
||||||
|
split_idx = max_gap_idx
|
||||||
|
top_indices = [item[0] for item in sorted_items[:split_idx]]
|
||||||
|
bottom_indices = [item[0] for item in sorted_items[split_idx:]]
|
||||||
|
|
||||||
|
# Ensure we don't just split off a single noise character
|
||||||
|
if len(top_indices) >= 1 and len(bottom_indices) >= 1:
|
||||||
|
return top_indices, bottom_indices
|
||||||
|
|
||||||
|
return None
|
||||||
|
|
||||||
|
|
||||||
# ============================================================
|
# ============================================================
|
||||||
# OCR ENGINES (Apple Native Vision)
|
# OCR ENGINES (Apple Native Vision)
|
||||||
# ============================================================
|
# ============================================================
|
||||||
@@ -886,6 +934,7 @@ def translate_manga_text(
|
|||||||
box = bubble_boxes[bid]
|
box = bubble_boxes[bid]
|
||||||
bubble_split = None
|
bubble_split = None
|
||||||
|
|
||||||
|
# 1. Panel border split
|
||||||
split_result = split_panel_box(image, box, bubble_quads=bubble_quads[bid])
|
split_result = split_panel_box(image, box, bubble_quads=bubble_quads[bid])
|
||||||
if split_result:
|
if split_result:
|
||||||
box_left, box_right, split_x = split_result
|
box_left, box_right, split_x = split_result
|
||||||
@@ -909,27 +958,37 @@ def translate_manga_text(
|
|||||||
bubble_split = (left_idxs, right_idxs)
|
bubble_split = (left_idxs, right_idxs)
|
||||||
splits_performed.append(f"BOX#{bid} ({len(left_idxs)} quads | {len(right_idxs)} quads)")
|
splits_performed.append(f"BOX#{bid} ({len(left_idxs)} quads | {len(right_idxs)} quads)")
|
||||||
|
|
||||||
|
# 2. Check for vertical columns (left/right split)
|
||||||
if bubble_split is None:
|
if bubble_split is None:
|
||||||
col_split = split_bubble_if_multiple_columns(bubble_indices[bid], filtered, bid=bid)
|
col_split = split_bubble_if_multiple_columns(bubble_indices[bid], filtered, bid=bid)
|
||||||
if col_split:
|
if col_split:
|
||||||
left_idxs, right_idxs = col_split
|
left_idxs, right_idxs = col_split
|
||||||
if left_idxs and right_idxs:
|
if left_idxs and right_idxs:
|
||||||
bubble_split = (left_idxs, right_idxs)
|
bubble_split = (left_idxs, right_idxs)
|
||||||
splits_performed.append(f"BOX#{bid} ({len(left_idxs)} quads | {len(right_idxs)} quads)")
|
splits_performed.append(f"BOX#{bid} (Vertical Column Split: {len(left_idxs)} | {len(right_idxs)} quads)")
|
||||||
|
|
||||||
|
# 3. Check for horizontal rows (top/bottom split)
|
||||||
|
if bubble_split is None:
|
||||||
|
row_split = split_bubble_if_multiple_rows(bubble_indices[bid], filtered, bid=bid)
|
||||||
|
if row_split:
|
||||||
|
top_idxs, bottom_idxs = row_split
|
||||||
|
if top_idxs and bottom_idxs:
|
||||||
|
bubble_split = (top_idxs, bottom_idxs)
|
||||||
|
splits_performed.append(f"BOX#{bid} (Horizontal Row Split: {len(top_idxs)} | {len(bottom_idxs)} quads)")
|
||||||
|
|
||||||
if bubble_split:
|
if bubble_split:
|
||||||
left_idxs, right_idxs = bubble_split
|
part1_idxs, part2_idxs = bubble_split
|
||||||
new_bubbles[bid] = build_lines_from_indices(left_idxs, filtered)
|
new_bubbles[bid] = build_lines_from_indices(part1_idxs, filtered)
|
||||||
ub_left = boxes_union_xyxy([quad_bbox(filtered[i][0]) for i in left_idxs])
|
ub_1 = boxes_union_xyxy([quad_bbox(filtered[i][0]) for i in part1_idxs])
|
||||||
new_bubble_boxes[bid] = (max(0, ub_left[0]-3), max(0, ub_left[1]-3), min(iw-1, ub_left[2]+3), min(ih-1, ub_left[3]+3))
|
new_bubble_boxes[bid] = (max(0, ub_1[0]-3), max(0, ub_1[1]-3), min(iw-1, ub_1[2]+3), min(ih-1, ub_1[3]+3))
|
||||||
new_bubble_quads[bid] = [filtered[i][0] for i in left_idxs]
|
new_bubble_quads[bid] = [filtered[i][0] for i in part1_idxs]
|
||||||
new_bubble_indices[bid] = left_idxs
|
new_bubble_indices[bid] = part1_idxs
|
||||||
|
|
||||||
new_bubbles[next_bid] = build_lines_from_indices(right_idxs, filtered)
|
new_bubbles[next_bid] = build_lines_from_indices(part2_idxs, filtered)
|
||||||
ub_right = boxes_union_xyxy([quad_bbox(filtered[i][0]) for i in right_idxs])
|
ub_2 = boxes_union_xyxy([quad_bbox(filtered[i][0]) for i in part2_idxs])
|
||||||
new_bubble_boxes[next_bid] = (max(0, ub_right[0]-3), max(0, ub_right[1]-3), min(iw-1, ub_right[2]+3), min(ih-1, ub_right[3]+3))
|
new_bubble_boxes[next_bid] = (max(0, ub_2[0]-3), max(0, ub_2[1]-3), min(iw-1, ub_2[2]+3), min(ih-1, ub_2[3]+3))
|
||||||
new_bubble_quads[next_bid] = [filtered[i][0] for i in right_idxs]
|
new_bubble_quads[next_bid] = [filtered[i][0] for i in part2_idxs]
|
||||||
new_bubble_indices[next_bid] = right_idxs
|
new_bubble_indices[next_bid] = part2_idxs
|
||||||
next_bid += 1
|
next_bid += 1
|
||||||
else:
|
else:
|
||||||
new_bubbles[bid] = bubbles[bid]
|
new_bubbles[bid] = bubbles[bid]
|
||||||
@@ -938,7 +997,7 @@ def translate_manga_text(
|
|||||||
new_bubble_indices[bid] = bubble_indices[bid]
|
new_bubble_indices[bid] = bubble_indices[bid]
|
||||||
|
|
||||||
if splits_performed:
|
if splits_performed:
|
||||||
print(f"\n🔀 Multi-column bubble splits detected: {len(splits_performed)}")
|
print(f"\n🔀 Multi-column/row bubble splits detected: {len(splits_performed)}")
|
||||||
for split_info in splits_performed:
|
for split_info in splits_performed:
|
||||||
print(f" ✓ Split {split_info}")
|
print(f" ✓ Split {split_info}")
|
||||||
|
|
||||||
@@ -1049,8 +1108,8 @@ def translate_manga_text(
|
|||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
translate_manga_text(
|
translate_manga_text(
|
||||||
image_path="004.png",
|
image_path="003.jpg",
|
||||||
source_lang="en",
|
source_lang="es",
|
||||||
target_lang="ca",
|
target_lang="ca",
|
||||||
confidence_threshold=0.05,
|
confidence_threshold=0.05,
|
||||||
min_text_length=1,
|
min_text_length=1,
|
||||||
|
|||||||
Reference in New Issue
Block a user