Added new

This commit is contained in:
Guillem Hernandez Sola
2026-04-11 14:00:07 +02:00
parent 458915278e
commit 555892348f
3 changed files with 836 additions and 399 deletions

View File

@@ -13,218 +13,172 @@ INPUT_IMAGE = "page.png"
OUTPUT_IMAGE = "page_translated.png"
TRANSLATIONS_FILE = "output.txt"
BUBBLES_FILE = "bubbles.json"
FONT_PATH = "font.ttf"
FONT_FALLBACK = "/System/Library/Fonts/Helvetica.ttc"
FONT_COLOR = (0, 0, 0)
BUBBLE_FILL = (255, 255, 255)
# ─────────────────────────────────────────────
# STEP 1: PARSE output.txt
# Robust parser: always takes the LAST
# whitespace-separated column as translation.
# PARSE output.txt
# ─────────────────────────────────────────────
def parse_translations(filepath):
"""
Parses output.txt and returns {bubble_id: translated_text}.
Strategy: split each #N line on 2+ consecutive spaces,
then always take the LAST token as the translation.
This is robust even when original or translated text
contains internal spaces.
Args:
filepath : Path to output.txt
Returns:
Dict {1: "LA NOIA ESTÀ IL·LESA!", ...}
Parses output.txt {bubble_id: translated_text}.
Only bubbles present in the file are returned.
Absent IDs are left completely untouched on the page.
"""
translations = {}
with open(filepath, "r", encoding="utf-8") as f:
for line in f:
line = line.rstrip("\n")
# Must start with #N
if not re.match(r"^#\d+", line.strip()):
if not re.match(r"^\s*#\d+", line):
continue
# Split on 2+ spaces → [bubble_id_col, original_col, translated_col]
parts = re.split(r" {2,}", line.strip())
if len(parts) < 3:
continue
bubble_id = int(re.sub(r"[^0-9]", "", parts[0]))
translated = parts[-1].strip() # always last column
translated = parts[-1].strip()
if translated.startswith("["):
continue
translations[bubble_id] = translated
print(f" Parsed {len(translations)} translation(s) from {filepath}")
print(f"{len(translations)} bubble(s) to translate: "
f"{sorted(translations.keys())}")
for bid, text in sorted(translations.items()):
print(f" #{bid}: {text}")
return translations
# ─────────────────────────────────────────────
# STEP 2: LOAD BUBBLE BOXES from bubbles.json
# These were saved by manga-translator.py
# and are guaranteed to match the clusters.
# LOAD bubbles.json
# ─────────────────────────────────────────────
def load_bubble_boxes(filepath):
"""
Loads bubble bounding boxes from bubbles.json.
Expected format:
{
"1": {"x": 120, "y": 45, "w": 180, "h": 210},
"2": { ... },
...
}
Args:
filepath : Path to bubbles.json
Returns:
Dict {bubble_id (int): (x, y, w, h)}
"""
with open(filepath, "r", encoding="utf-8") as f:
raw = json.load(f)
boxes = {}
for key, val in raw.items():
bubble_id = int(key)
boxes[bubble_id] = (val["x"], val["y"], val["w"], val["h"])
print(f" ✅ Loaded {len(boxes)} bubble box(es) from {filepath}")
for bid, (x, y, w, h) in sorted(boxes.items()):
print(f" #{bid}: ({x},{y}) {w}×{h}px")
boxes = {int(k): v for k, v in raw.items()}
print(f" ✅ Loaded {len(boxes)} bubble(s)")
for bid, val in sorted(boxes.items()):
print(f" #{bid}: ({val['x']},{val['y']}) "
f"{val['w']}×{val['h']}px")
return boxes
# ─────────────────────────────────────────────
# STEP 3: ERASE BUBBLE CONTENT
# Fills a rectangular region with white.
# Uses a slightly inset rect to preserve
# the bubble border.
# SAMPLE BACKGROUND COLOR
# ─────────────────────────────────────────────
def erase_bubble_rect(image, x, y, w, h, padding=6):
def sample_bubble_background(cv_image, bubble_data):
"""
Fills the interior of a bounding box with white,
leaving a border of `padding` pixels intact.
Samples the dominant background color inside the bbox
by averaging the brightest 10% of pixels.
Returns (B, G, R).
"""
x = max(0, bubble_data["x"])
y = max(0, bubble_data["y"])
x2 = min(cv_image.shape[1], x + bubble_data["w"])
y2 = min(cv_image.shape[0], y + bubble_data["h"])
region = cv_image[y:y2, x:x2]
if region.size == 0:
return (255, 255, 255)
gray = cv2.cvtColor(region, cv2.COLOR_BGR2GRAY)
threshold = np.percentile(gray, 90)
bg_mask = gray >= threshold
if not np.any(bg_mask):
return (255, 255, 255)
return tuple(int(c) for c in region[bg_mask].mean(axis=0))
# ─────────────────────────────────────────────
# ERASE ORIGINAL TEXT
# Fills the tight OCR bbox with the sampled
# background color. No extra expansion —
# the bbox from bubbles.json is already the
# exact size of the red squares.
# ─────────────────────────────────────────────
def erase_bubble_text(cv_image, bubble_data,
bg_color=(255, 255, 255)):
"""
Fills the bubble bounding box with bg_color.
Args:
image : BGR numpy array (modified in place)
x,y,w,h : Bounding box
padding : Pixels to leave as border (default: 6)
cv_image : BGR numpy array (modified in place)
bubble_data : Dict with 'x','y','w','h'
bg_color : (B,G,R) fill color
"""
x1 = max(0, x + padding)
y1 = max(0, y + padding)
x2 = min(image.shape[1], x + w - padding)
y2 = min(image.shape[0], y + h - padding)
if x2 > x1 and y2 > y1:
image[y1:y2, x1:x2] = 255
img_h, img_w = cv_image.shape[:2]
x = max(0, bubble_data["x"])
y = max(0, bubble_data["y"])
x2 = min(img_w, bubble_data["x"] + bubble_data["w"])
y2 = min(img_h, bubble_data["y"] + bubble_data["h"])
cv_image[y:y2, x:x2] = list(bg_color)
# ─────────────────────────────────────────────
# STEP 4: FIT FONT SIZE
# Finds the largest font size where the text
# fits inside (max_w × max_h) with word wrap.
# FIT FONT SIZE
# ─────────────────────────────────────────────
def fit_font_size(draw, text, max_w, max_h, font_path,
min_size=8, max_size=48):
min_size=7, max_size=48):
"""
Binary-searches for the largest font size where
word-wrapped text fits within the given box.
Args:
draw : PIL ImageDraw instance
text : Text string to fit
max_w : Available width in pixels
max_h : Available height in pixels
font_path : Path to .ttf font (or None for default)
min_size : Smallest font size to try (default: 8)
max_size : Largest font size to try (default: 48)
Returns:
(font, list_of_wrapped_lines)
Finds the largest font size where word-wrapped text
fits inside (max_w × max_h).
"""
best_font = None
best_lines = [text]
for size in range(max_size, min_size - 1, -1):
try:
font = ImageFont.truetype(font_path, size) if font_path else ImageFont.load_default()
font = (ImageFont.truetype(font_path, size)
if font_path else ImageFont.load_default())
except Exception:
font = ImageFont.load_default()
# Word-wrap
words = text.split()
lines = []
current = ""
words, lines, current = text.split(), [], ""
for word in words:
test = (current + " " + word).strip()
bbox = draw.textbbox((0, 0), test, font=font)
if (bbox[2] - bbox[0]) <= max_w:
bb = draw.textbbox((0, 0), test, font=font)
if (bb[2] - bb[0]) <= max_w:
current = test
else:
if current:
lines.append(current)
current = word
if current:
lines.append(current)
# Measure total block height
lh_bbox = draw.textbbox((0, 0), "Ay", font=font)
line_h = (lh_bbox[3] - lh_bbox[1]) + 3
total_h = line_h * len(lines)
if total_h <= max_h:
lh = draw.textbbox((0, 0), "Ay", font=font)
line_h = (lh[3] - lh[1]) + 2
if line_h * len(lines) <= max_h:
best_font = font
best_lines = lines
break
if best_font is None:
best_font = ImageFont.load_default()
return best_font, best_lines
return best_font or ImageFont.load_default(), best_lines
# ─────────────────────────────────────────────
# STEP 5: RENDER TEXT INTO BUBBLE
# Draws translated text centered inside
# the bubble bounding box.
# RENDER TEXT INTO BUBBLE
# ─────────────────────────────────────────────
def render_text_in_bubble(pil_image, x, y, w, h, text,
font_path, padding=12,
def render_text_in_bubble(pil_image, bubble_data, text,
font_path, padding=8,
font_color=(0, 0, 0)):
"""
Renders text centered (horizontally + vertically)
inside a bubble bounding box.
Args:
pil_image : PIL Image (modified in place)
x,y,w,h : Bubble bounding box
text : Translated text to render
font_path : Path to .ttf font (or None)
padding : Inner padding in pixels (default: 12)
font_color : RGB color tuple (default: black)
Renders translated text centered inside the tight bbox.
Font auto-sizes to fill the same w×h the original occupied.
"""
x, y = bubble_data["x"], bubble_data["y"]
w, h = bubble_data["w"], bubble_data["h"]
draw = ImageDraw.Draw(pil_image)
inner_w = max(1, w - padding * 2)
inner_h = max(1, h - padding * 2)
font, lines = fit_font_size(draw, text, inner_w, inner_h, font_path)
lh_bbox = draw.textbbox((0, 0), "Ay", font=font)
line_h = (lh_bbox[3] - lh_bbox[1]) + 3
font, lines = fit_font_size(draw, text, inner_w, inner_h,
font_path)
lh_bb = draw.textbbox((0, 0), "Ay", font=font)
line_h = (lh_bb[3] - lh_bb[1]) + 2
total_h = line_h * len(lines)
start_y = y + padding + max(0, (inner_h - total_h) // 2)
@@ -232,7 +186,8 @@ def render_text_in_bubble(pil_image, x, y, w, h, text,
lb = draw.textbbox((0, 0), line, font=font)
line_w = lb[2] - lb[0]
start_x = x + padding + max(0, (inner_w - line_w) // 2)
draw.text((start_x, start_y), line, font=font, fill=font_color)
draw.text((start_x, start_y), line,
font=font, fill=font_color)
start_y += line_h
@@ -244,7 +199,7 @@ def resolve_font(font_path, fallback):
print(f" ✅ Using font: {font_path}")
return font_path
if fallback and os.path.exists(fallback):
print(f" ⚠️ '{font_path}' not found → fallback: {fallback}")
print(f" ⚠️ Fallback: {fallback}")
return fallback
print(" ⚠️ No font found. Using PIL default.")
return None
@@ -261,104 +216,122 @@ def render_translated_page(
font_path = FONT_PATH,
font_fallback = FONT_FALLBACK,
font_color = FONT_COLOR,
erase_padding = 6,
text_padding = 12,
text_padding = 8,
debug = False,
):
"""
Full rendering pipeline:
1. Parse translations from output.txt
Pipeline:
1. Parse translations (only present IDs processed)
2. Load bubble boxes from bubbles.json
3. Load original manga page
4. Erase original text from each bubble
5. Render translated text into each bubble
6. Save output image
Args:
input_image : Source manga page (default: 'page.png')
output_image : Output path (default: 'page_translated.png')
translations_file : Path to output.txt (default: 'output.txt')
bubbles_file : Path to bubbles.json (default: 'bubbles.json')
font_path : Primary .ttf font path
font_fallback : Fallback font path
font_color : RGB text color (default: black)
erase_padding : Border px when erasing (default: 6)
text_padding : Inner padding for text (default: 12)
debug : Save debug_render.png (default: False)
3. Cross-check IDs — absent ones left untouched
4. Sample background color per bubble
5. Erase original text (fill tight bbox)
6. Render translated text sized to fit the bbox
7. Save output
"""
print("=" * 55)
print(" MANGA TRANSLATOR — RENDERER")
print("=" * 55)
# ── 1. Parse translations ─────────────────────────────────────────────────
print("\n📄 Parsing translations...")
translations = parse_translations(translations_file)
if not translations:
print("❌ No translations found. Aborting.")
return
# ── 2. Load bubble boxes ──────────────────────────────────────────────────
print(f"\n📦 Loading bubble boxes from {bubbles_file}...")
print(f"\n📦 Loading bubble data...")
bubble_boxes = load_bubble_boxes(bubbles_file)
if not bubble_boxes:
print("❌ No bubble boxes found. Re-run manga-translator.py first.")
print("❌ No bubble data. Re-run manga-translator.py.")
return
# ── 3. Load image ─────────────────────────────────────────────────────────
print(f"\n🖼️ Loading image: {input_image}")
translate_ids = set(translations.keys())
box_ids = set(bubble_boxes.keys())
to_process = sorted(translate_ids & box_ids)
untouched = sorted(box_ids - translate_ids)
missing = sorted(translate_ids - box_ids)
print(f"\n🔗 To process : {to_process}")
print(f" Untouched : {untouched}")
if missing:
print(f" ⚠️ In output.txt but no box: {missing}")
if not to_process:
print("❌ No matching IDs. Aborting.")
return
print(f"\n🖼️ Loading: {input_image}")
cv_image = cv2.imread(input_image)
if cv_image is None:
print(f"❌ Could not load: {input_image}")
return
print(f" Image size: {cv_image.shape[1]}×{cv_image.shape[0]}px")
print(f" {cv_image.shape[1]}×{cv_image.shape[0]}px")
# ── 4. Erase original text ────────────────────────────────────────────────
print("\n🧹 Erasing original bubble text...")
for bubble_id in sorted(translations.keys()):
if bubble_id not in bubble_boxes:
print(f" ⚠️ #{bubble_id}: no box in bubbles.json, skipping")
continue
x, y, w, h = bubble_boxes[bubble_id]
erase_bubble_rect(cv_image, x, y, w, h, padding=erase_padding)
print(f" Erased #{bubble_id} at ({x},{y}) {w}×{h}px")
# Sample backgrounds BEFORE erasing
print("\n🎨 Sampling backgrounds...")
bg_colors = {}
for bid in to_process:
bg_bgr = sample_bubble_background(
cv_image, bubble_boxes[bid])
bg_colors[bid] = bg_bgr
bg_rgb = (bg_bgr[2], bg_bgr[1], bg_bgr[0])
brightness = sum(bg_rgb) / 3
ink = "black" if brightness > 128 else "white"
print(f" #{bid}: RGB{bg_rgb} ink→{ink}")
# ── 5. Convert to PIL ─────────────────────────────────────────────────────
pil_image = Image.fromarray(cv2.cvtColor(cv_image, cv2.COLOR_BGR2RGB))
# Erase
print("\n🧹 Erasing original text...")
for bid in to_process:
bd = bubble_boxes[bid]
erase_bubble_text(cv_image, bd, bg_color=bg_colors[bid])
print(f" ✅ #{bid} ({bd['w']}×{bd['h']}px)")
pil_image = Image.fromarray(
cv2.cvtColor(cv_image, cv2.COLOR_BGR2RGB))
# ── 6. Resolve font ───────────────────────────────────────────────────────
print("\n🔤 Resolving font...")
resolved_font = resolve_font(font_path, font_fallback)
# ── 7. Render translated text ─────────────────────────────────────────────
print("\n✍️ Rendering translated text...")
for bubble_id, text in sorted(translations.items()):
if bubble_id not in bubble_boxes:
continue
x, y, w, h = bubble_boxes[bubble_id]
# Render
print("\n✍️ Rendering...")
for bid in to_process:
text = translations[bid]
bd = bubble_boxes[bid]
bg_rgb = (bg_colors[bid][2],
bg_colors[bid][1],
bg_colors[bid][0])
brightness = sum(bg_rgb) / 3
txt_color = (0, 0, 0) if brightness > 128 \
else (255, 255, 255)
render_text_in_bubble(
pil_image, x, y, w, h, text,
pil_image, bd, text,
font_path = resolved_font,
padding = text_padding,
font_color = font_color,
font_color = txt_color,
)
print(f" #{bubble_id}: '{text}' → ({x},{y}) {w}×{h}px")
print(f" #{bid}: '{text}' "
f"({bd['x']},{bd['y']}) {bd['w']}×{bd['h']}px")
# ── 8. Debug overlay ──────────────────────────────────────────────────────
if debug:
dbg = pil_image.copy()
dbg = pil_image.copy()
dbg_draw = ImageDraw.Draw(dbg)
for bubble_id, (x, y, w, h) in sorted(bubble_boxes.items()):
dbg_draw.rectangle([x, y, x + w, y + h], outline=(255, 0, 0), width=2)
dbg_draw.text((x + 4, y + 4), f"#{bubble_id}", fill=(255, 0, 0))
for bid, bd in sorted(bubble_boxes.items()):
color = (0, 200, 0) if bid in translate_ids \
else (160, 160, 160)
dbg_draw.rectangle(
[bd["x"], bd["y"],
bd["x"] + bd["w"], bd["y"] + bd["h"]],
outline=color, width=2)
dbg_draw.text((bd["x"] + 3, bd["y"] + 3),
f"#{bid}", fill=color)
dbg.save("debug_render.png")
print("\n 🐛 Debug render saved → debug_render.png")
print("\n 🐛 debug_render.png saved "
"(green=translated, grey=untouched)")
# ── 9. Save output ────────────────────────────────────────────────────────
print(f"\n💾 Saving → {output_image}")
pil_image.save(output_image, "PNG")
print(f" ✅ Done! Open: {output_image}")
print(" ✅ Done!")
print("=" * 55)
@@ -366,7 +339,6 @@ def render_translated_page(
# ENTRY POINT
# ─────────────────────────────────────────────
if __name__ == "__main__":
render_translated_page(
input_image = "page.png",
output_image = "page_translated.png",
@@ -375,7 +347,6 @@ if __name__ == "__main__":
font_path = "font.ttf",
font_fallback = "/System/Library/Fonts/Helvetica.ttc",
font_color = (0, 0, 0),
erase_padding = 6,
text_padding = 12,
text_padding = 8,
debug = True,
)