Compare commits

...

27 Commits

Author SHA1 Message Date
Guillem Hernandez Sola
037dadd920 Added fixes 2026-04-22 18:01:29 +02:00
Guillem Hernandez Sola
285e9ca393 Cleaning 2026-04-22 16:28:10 +02:00
Guillem Hernandez Sola
d77db83cfe Everything 2026-04-22 16:27:56 +02:00
Guillem Hernandez Sola
b730037a06 Added big stuff 2026-04-22 16:18:59 +02:00
Guillem Hernandez Sola
7837aeaa9b Added fixes 2026-04-22 14:05:25 +02:00
Guillem Hernandez Sola
455b4ad82c starting point 2026-04-22 11:49:25 +02:00
Guillem Hernandez Sola
b6b0df4774 Added stuff 2026-04-22 10:51:57 +02:00
Guillem Hernandez Sola
512bb32f66 Added all 2026-04-21 23:27:56 +02:00
Guillem Hernandez Sola
494631c967 Some fixes running 2026-04-21 23:03:17 +02:00
Guillem Hernandez Sola
27a3e6f98a Added some changes2 2026-04-21 22:43:17 +02:00
Guillem Hernandez Sola
f00647e668 Added new styles 2026-04-21 21:45:46 +02:00
Guillem Hernandez Sola
a5c81f4ff0 Added new styles 2026-04-21 21:27:22 +02:00
Guillem Hernandez Sola
f56ee49abf Added all 2026-04-21 21:03:35 +02:00
Guillem Hernandez Sola
ba5f001e75 ADded pipeline-translator.py 2026-04-21 20:09:11 +02:00
Guillem Hernandez Sola
2fb5e9eb7b Added improvements 2026-04-21 19:51:59 +02:00
Guillem Hernandez Sola
dfa52f54eb Added new rendered 2026-04-21 18:53:34 +02:00
Guillem Hernandez Sola
bd475d8f01 Added all 2026-04-21 17:34:10 +02:00
Guillem Hernandez Sola
f753a78ba4 Split 2026-04-21 17:12:32 +02:00
Guillem Hernandez Sola
3800f6cf3f Added bubble split 2026-04-20 19:02:24 +02:00
Guillem Hernandez Sola
beb8557e19 Started pipelines, render not working 2026-04-16 21:17:00 +02:00
Guillem Hernandez Sola
39765a6cf1 Added pipeline 2026-04-16 19:58:05 +02:00
Guillem Hernandez Sola
5aa79d986a First beta 2 2026-04-15 21:41:01 +02:00
Guillem Hernandez Sola
dd1cf54f86 First beta 2026-04-15 21:12:41 +02:00
Guillem Hernandez Sola
5ef8c39f69 Added hybrid 2026-04-15 16:22:35 +02:00
Guillem Hernandez Sola
eadc28154a Improving white coloring 2026-04-14 20:38:05 +02:00
Guillem Hernandez Sola
f95b7d32d4 Added some fixes 2026-04-14 20:08:51 +02:00
Guillem Hernandez Sola
0069da706b stable version 2026-04-14 19:25:22 +02:00
27 changed files with 4532 additions and 1831 deletions

8
.gitignore vendored
View File

@@ -7,6 +7,13 @@
.AppleDouble
.LSOverride
.venv311/
#Folders to test
Spy_x_Family_076/
Dandadan_059/
Lv999/
# Icon must end with two \r
Icon
@@ -21,6 +28,7 @@ Icon
*.jpg
*.jpeg
*.json
*.webp
# Files that might appear in the root of a volume
.DocumentRevisions-V100

Binary file not shown.

Before

Width:  |  Height:  |  Size: 257 KiB

View File

@@ -0,0 +1,53 @@
# Manga Translator OCR Pipeline
A robust manga/comic OCR + translation pipeline with:
- EasyOCR (default, reliable on macOS M1)
- Optional PaddleOCR (auto-fallback if unavailable)
- Bubble clustering and line-level boxes
- Robust reread pass (multi-preprocessing + slight rotation)
- Translation export + debug overlays
---
## ✨ Features
- OCR from raw manga pages
- Noise filtering (`BOX` debug artifacts, tiny garbage tokens, symbols)
- Speech bubble grouping
- Reading order estimation (`ltr` / `rtl`)
- Translation output (`output.txt`)
- Structured bubble metadata (`bubbles.json`)
- Visual debug output (`debug_clusters.png`)
---
## 🧰 Requirements
- macOS (Apple Silicon supported)
- Python **3.11** recommended
- Homebrew (for Python install)
---
## 🚀 Setup (Python 3.11 venv)
```bash
cd /path/to/manga-translator
# 1) Create venv with 3.11
/opt/homebrew/bin/python3.11 -m venv venv
# 2) Activate
source venv/bin/activate
# 3) Verify interpreter
python -V
# expected: Python 3.11.x
# 4) Install dependencies
python -m pip install --upgrade pip setuptools wheel
python -m pip install -r requirements.txt
# Optional Paddle runtime
python -m pip install paddlepaddle || true

BIN
fonts/ComicNeue-Bold.ttf Executable file

Binary file not shown.

Binary file not shown.

BIN
fonts/Komika.ttf Normal file

Binary file not shown.

Binary file not shown.

BIN
fonts/animeace2_bld.ttf Normal file

Binary file not shown.

BIN
fonts/animeace2_ital.ttf Normal file

Binary file not shown.

BIN
fonts/animeace2_reg.ttf Normal file

Binary file not shown.

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,37 @@
#!/usr/bin/env python3
import sys
sys.path.insert(0, '/Users/guillemhernandezsola/code/manga-translator')
import cv2
import json
import numpy as np
# Import functions from manga-translator.py
import importlib.util
spec = importlib.util.spec_from_file_location("manga_translator", "/Users/guillemhernandezsola/code/manga-translator/manga-translator.py")
mt = importlib.util.module_from_spec(spec)
spec.loader.exec_module(mt)
image_path = '004.png'
detector = mt.MacVisionDetector(source_lang='en')
raw = detector.read(image_path)
# Load current bubbles to see what box 5 contains
with open('bubbles.json') as f:
bubbles_data = json.load(f)
box5_data = bubbles_data['5']
box5_bounds = (box5_data['x'], box5_data['y'], box5_data['x'] + box5_data['w'], box5_data['y'] + box5_data['h'])
print(f'Box 5 bounds (xyxy): {box5_bounds}')
print()
# Print all detections sorted by position
print('All raw detections:')
for i, (bbox, text, conf) in enumerate(sorted(raw, key=lambda x: (mt.quad_bbox(x[0])[1], mt.quad_bbox(x[0])[0]))):
b = mt.quad_bbox(bbox)
t_norm = mt.normalize_text(text)
print(f'{i:2d}. [{b[0]:3d},{b[1]:3d} -> {b[2]:3d},{b[3]:3d}] conf={conf:.2f} text="{t_norm}"')
# Check if this overlaps with box 5
b5_x1, b5_y1, b5_x2, b5_y2 = box5_bounds
if not (b[2] < b5_x1 or b[0] > b5_x2 or b[3] < b5_y1 or b[1] > b5_y2):
print(f' ^ OVERLAPS with Box 5!')

View File

@@ -0,0 +1,95 @@
#!/usr/bin/env python3
import sys
sys.path.insert(0, '/Users/guillemhernandezsola/code/manga-translator')
import cv2
import json
import numpy as np
import importlib.util
spec = importlib.util.spec_from_file_location("manga_translator", "/Users/guillemhernandezsola/code/manga-translator/manga-translator.py")
mt = importlib.util.module_from_spec(spec)
spec.loader.exec_module(mt)
image_path = '004.png'
detector = mt.MacVisionDetector(source_lang='en')
raw = detector.read(image_path)
image = cv2.imread(image_path)
# Filter as the pipeline does
filtered = []
for bbox, text, conf in raw:
t = mt.normalize_text(text)
qb = mt.quad_bbox(bbox)
if conf < 0.12:
continue
if len(t) < 1:
continue
if mt.is_noise_text(t):
continue
if mt.is_sound_effect(t):
continue
if mt.is_title_text(t):
continue
filtered.append((bbox, t, conf))
# Run grouping
bubbles, bubble_boxes, bubble_quads, bubble_indices = mt.group_tokens(
filtered, image.shape, gap_px=18, bbox_padding=3
)
print("=== BUBBLE 7 & 8 ANALYSIS ===\n")
print("Current Bubble 7 (right side content):")
for bid in [7]:
if bid in bubble_indices:
box = bubble_boxes[bid]
print(f" Box: {box}")
print(f" Indices: {bubble_indices[bid]}")
indices = bubble_indices[bid]
boxes = [mt.quad_bbox(filtered[i][0]) for i in indices]
min_x = min(b[0] for b in boxes)
max_x = max(b[2] for b in boxes)
print(f" X range: {min_x} - {max_x}")
for idx in indices:
b = mt.quad_bbox(filtered[idx][0])
print(f" {idx}: x=[{b[0]:3d},{b[2]:3d}] y=[{b[1]:3d},{b[3]:3d}] = {filtered[idx][1]}")
print("\nCurrent Bubble 8 (left side content):")
for bid in [8]:
if bid in bubble_indices:
box = bubble_boxes[bid]
print(f" Box: {box}")
print(f" Indices: {bubble_indices[bid]}")
indices = bubble_indices[bid]
boxes = [mt.quad_bbox(filtered[i][0]) for i in indices]
min_x = min(b[0] for b in boxes)
max_x = max(b[2] for b in boxes)
print(f" X range: {min_x} - {max_x}")
for idx in indices:
b = mt.quad_bbox(filtered[idx][0])
print(f" {idx}: x=[{b[0]:3d},{b[2]:3d}] y=[{b[1]:3d},{b[3]:3d}] = {filtered[idx][1]}")
# Check the horizontal gap between them
print("\n=== GAP ANALYSIS ===")
if 7 in bubble_indices and 8 in bubble_indices:
boxes7 = [mt.quad_bbox(filtered[i][0]) for i in bubble_indices[7]]
boxes8 = [mt.quad_bbox(filtered[i][0]) for i in bubble_indices[8]]
max_x7 = max(b[2] for b in boxes7)
min_x8 = min(b[0] for b in boxes8)
print(f"Bubble 7 max X: {max_x7}")
print(f"Bubble 8 min X: {min_x8}")
print(f"Horizontal gap: {min_x8 - max_x7}")
# Check Y overlap
min_y7 = min(b[1] for b in boxes7)
max_y7 = max(b[3] for b in boxes7)
min_y8 = min(b[1] for b in boxes8)
max_y8 = max(b[3] for b in boxes8)
print(f"\nBubble 7 Y range: {min_y7} - {max_y7}")
print(f"Bubble 8 Y range: {min_y8} - {max_y8}")
print(f"Y overlap: {max(0, min(max_y7, max_y8) - max(min_y7, min_y8))} pixels")

View File

@@ -0,0 +1,55 @@
#!/usr/bin/env python3
import sys
sys.path.insert(0, '/Users/guillemhernandezsola/code/manga-translator')
import cv2
import json
import numpy as np
import importlib.util
spec = importlib.util.spec_from_file_location("manga_translator", "/Users/guillemhernandezsola/code/manga-translator/manga-translator.py")
mt = importlib.util.module_from_spec(spec)
spec.loader.exec_module(mt)
image_path = '004.png'
detector = mt.MacVisionDetector(source_lang='en')
raw = detector.read(image_path)
image = cv2.imread(image_path)
# Filter as the pipeline does
filtered = []
for bbox, text, conf in raw:
t = mt.normalize_text(text)
qb = mt.quad_bbox(bbox)
if conf < 0.12:
continue
if len(t) < 1:
continue
if mt.is_noise_text(t):
continue
if mt.is_sound_effect(t):
continue
if mt.is_title_text(t):
continue
filtered.append((bbox, t, conf))
print(f"Filtered {len(filtered)} detections")
# Now run grouping
bubbles, bubble_boxes, bubble_quads, bubble_indices = mt.group_tokens(
filtered, image.shape, gap_px=18, bbox_padding=3
)
# Find which bubble contains box 5
box5_region = (378, 570, 536, 753)
print("\n=== BUBBLES ===")
for bid, box in bubble_boxes.items():
print(f"Bubble {bid}: {box}")
print(f" Indices: {bubble_indices[bid]}")
print(f" Detections:")
for idx in bubble_indices[bid]:
b = mt.quad_bbox(filtered[idx][0])
print(f" {idx}: [{b[0]:3d},{b[1]:3d} -> {b[2]:3d},{b[3]:3d}] = {filtered[idx][1]}")

77
older-code/check_box7.py Normal file
View File

@@ -0,0 +1,77 @@
#!/usr/bin/env python3
import sys
sys.path.insert(0, '/Users/guillemhernandezsola/code/manga-translator')
import cv2
import json
import numpy as np
import importlib.util
spec = importlib.util.spec_from_file_location("manga_translator", "/Users/guillemhernandezsola/code/manga-translator/manga-translator.py")
mt = importlib.util.module_from_spec(spec)
spec.loader.exec_module(mt)
image_path = '004.png'
detector = mt.MacVisionDetector(source_lang='en')
raw = detector.read(image_path)
image = cv2.imread(image_path)
# Filter as the pipeline does
filtered = []
for bbox, text, conf in raw:
t = mt.normalize_text(text)
qb = mt.quad_bbox(bbox)
if conf < 0.12:
continue
if len(t) < 1:
continue
if mt.is_noise_text(t):
continue
if mt.is_sound_effect(t):
continue
if mt.is_title_text(t):
continue
filtered.append((bbox, t, conf))
# Now run grouping
bubbles, bubble_boxes, bubble_quads, bubble_indices = mt.group_tokens(
filtered, image.shape, gap_px=18, bbox_padding=3
)
# Check current bubbles.json for reference
with open('bubbles.json') as f:
old_bubbles = json.load(f)
print("=== BOX 5 ===")
print(f"Old bounds (from bubbles.json): x={old_bubbles['5']['x']}, y={old_bubbles['5']['y']}, w={old_bubbles['5']['w']}, h={old_bubbles['5']['h']}")
print(f" (xyxy): ({old_bubbles['5']['x']}, {old_bubbles['5']['y']}, {old_bubbles['5']['x'] + old_bubbles['5']['w']}, {old_bubbles['5']['y'] + old_bubbles['5']['h']})")
# Find bubble at that location in current grouping
for bid, box in bubble_boxes.items():
if box[0] == 371 and box[1] == 563: # New box 5 location
print(f"Current bubble {bid}: {box}")
print(f" Detections: {bubble_indices[bid]}")
for idx in bubble_indices[bid]:
b = mt.quad_bbox(filtered[idx][0])
print(f" {idx}: [{b[0]:3d},{b[1]:3d} -> {b[2]:3d},{b[3]:3d}] = {filtered[idx][1]}")
print("\n=== BOX 7 ===")
print(f"Old bounds (from bubbles.json): x={old_bubbles['7']['x']}, y={old_bubbles['7']['y']}, w={old_bubbles['7']['w']}, h={old_bubbles['7']['h']}")
print(f" (xyxy): ({old_bubbles['7']['x']}, {old_bubbles['7']['y']}, {old_bubbles['7']['x'] + old_bubbles['7']['w']}, {old_bubbles['7']['y'] + old_bubbles['7']['h']})")
# Find corresponding bubble
for bid, box in bubble_boxes.items():
x1, y1, x2, y2 = box
# Check if this overlaps with old box 7
old_x1, old_y1 = old_bubbles['7']['x'], old_bubbles['7']['y']
old_x2 = old_x1 + old_bubbles['7']['w']
old_y2 = old_y1 + old_bubbles['7']['h']
if not (x2 < old_x1 or x1 > old_x2 or y2 < old_y1 or y1 > old_y2):
print(f"Current bubble {bid}: {box}")
print(f" Detections: {bubble_indices[bid]}")
for idx in bubble_indices[bid]:
b = mt.quad_bbox(filtered[idx][0])
print(f" {idx}: [{b[0]:3d},{b[1]:3d} -> {b[2]:3d},{b[3]:3d}] = {filtered[idx][1]}")

View File

@@ -0,0 +1,68 @@
#!/usr/bin/env python3
import sys
sys.path.insert(0, '/Users/guillemhernandezsola/code/manga-translator')
import cv2
import numpy as np
import importlib.util
spec = importlib.util.spec_from_file_location("manga_translator", "/Users/guillemhernandezsola/code/manga-translator/manga-translator.py")
mt = importlib.util.module_from_spec(spec)
spec.loader.exec_module(mt)
image_path = '004.png'
detector = mt.MacVisionDetector(source_lang='en')
raw = detector.read(image_path)
image = cv2.imread(image_path)
# Filter
filtered = []
for bbox, text, conf in raw:
t = mt.normalize_text(text)
if conf < 0.12 or len(t) < 1 or mt.is_noise_text(t) or mt.is_sound_effect(t) or mt.is_title_text(t):
continue
filtered.append((bbox, t, conf))
# Get the indices we're interested in (left and right bubbles)
left_indices = [41, 42, 43, 44, 45, 46] # LET, GO, OFF, ME, AL-, REA-
right_indices = [47, 48, 49, 50, 51, 52, 53, 54] # DON'T, WORRY!, HARUKO, ...
print("=== CHECKING GROUPING CONDITIONS ===\n")
# Check if they would be united in group_tokens
boxes_left = [mt.quad_bbox(filtered[i][0]) for i in left_indices]
boxes_right = [mt.quad_bbox(filtered[i][0]) for i in right_indices]
# Check overlap_or_near
print("Checking overlap_or_near with gap=18:")
for li, bi in enumerate(left_indices):
for ri, bj in enumerate(right_indices):
b_left = boxes_left[li]
b_right = boxes_right[ri]
gap_x = max(0, max(b_left[0], b_right[0]) - min(b_left[2], b_right[2]))
gap_y = max(0, max(b_left[1], b_right[1]) - min(b_left[3], b_right[3]))
overlaps = gap_x <= 18 and gap_y <= 18
if overlaps:
print(f" {bi} and {bj} overlap/near: gap_x={gap_x}, gap_y={gap_y}")
# Check distance check
hs = [max(1.0, b[3] - b[1]) for b in [*boxes_left, *boxes_right]]
med_h = float(np.median(hs)) if hs else 12.0
dist_thresh = max(20.0, med_h * 2.2)
print(f"\nMedian height: {med_h}")
print(f"Distance threshold: {dist_thresh}")
print("\nChecking distance check:")
for li, bi in enumerate(left_indices[:1]): # Just check first from each
for ri, bj in enumerate(right_indices[:1]):
b_left = boxes_left[li]
b_right = boxes_right[ri]
cx_left = (b_left[0] + b_left[2]) / 2.0
cy_left = (b_left[1] + b_left[3]) / 2.0
cx_right = (b_right[0] + b_right[2]) / 2.0
cy_right = (b_right[1] + b_right[3]) / 2.0
d = ((cx_left - cx_right) ** 2 + (cy_left - cy_right) ** 2) ** 0.5
within_dist = d <= dist_thresh
within_y = abs(cy_left - cy_right) <= med_h * 3.0
print(f" {bi} to {bj}: distance={d:.1f}, within_dist={within_dist}, within_y_tol={within_y}")

View File

@@ -0,0 +1,107 @@
#!/usr/bin/env python3
# Debug script to see what bubbles are produced after splitting
import sys
sys.path.insert(0, '/Users/guillemhernandezsola/code/manga-translator')
import cv2
import json
import numpy as np
import importlib.util
spec = importlib.util.spec_from_file_location("manga_translator", "/Users/guillemhernandezsola/code/manga-translator/manga-translator.py")
mt = importlib.util.module_from_spec(spec)
spec.loader.exec_module(mt)
image_path = '004.png'
detector = mt.MacVisionDetector(source_lang='en')
raw = detector.read(image_path)
image = cv2.imread(image_path)
# Full filtering as pipeline does
filtered = []
skipped = 0
ih, iw = image.shape[:2]
for bbox, text, conf in raw:
t = mt.normalize_text(text)
qb = mt.quad_bbox(bbox)
if conf < 0.12:
skipped += 1
continue
if len(t) < 1:
skipped += 1
continue
if mt.is_noise_text(t):
skipped += 1
continue
if mt.is_sound_effect(t):
skipped += 1
continue
if mt.is_title_text(t):
skipped += 1
continue
if qb[1] < int(ih * mt.TOP_BAND_RATIO):
if conf < 0.70 and len(t) >= 5:
skipped += 1
continue
filtered.append((bbox, t, conf))
resolved_gap = mt.auto_gap(image_path)
bubbles, bubble_boxes, bubble_quads, bubble_indices = mt.group_tokens(
filtered, image.shape, gap_px=resolved_gap, bbox_padding=3
)
print("=== AFTER GROUPING ===")
print(f"Bubbles dict keys: {sorted(bubbles.keys())}")
for bid in [7, 8]:
if bid in bubbles:
print(f"\nBubble {bid}:")
print(f" Box: {bubble_boxes[bid]}")
print(f" Indices ({len(bubble_indices[bid])}): {bubble_indices[bid]}")
print(f" Quads ({len(bubble_quads[bid])})")
# Now simulate the split logic
new_bubbles, new_bubble_boxes, new_bubble_quads, new_bubble_indices = {}, {}, {}, {}
next_bid = max(bubbles.keys()) + 1 if bubbles else 1
splits_performed = []
for bid in list(bubbles.keys()):
box = bubble_boxes[bid]
bubble_split = None
# Try split
split_result = mt.split_panel_box(image, box, bubble_quads=bubble_quads[bid])
if split_result:
box_left, box_right, split_x = split_result
# ... split logic ...
bubble_split = "panel_split"
if bubble_split is None:
col_split = mt.split_bubble_if_multiple_columns(bubble_indices[bid], filtered, bid=bid)
if col_split:
bubble_split = "column_split"
if bubble_split:
splits_performed.append(f"Bubble {bid}: {bubble_split}")
# Don't actually split here, just mark it
else:
# No split
new_bubbles[bid] = bubbles[bid]
new_bubble_boxes[bid] = bubble_boxes[bid]
new_bubble_quads[bid] = bubble_quads[bid]
new_bubble_indices[bid] = bubble_indices[bid]
print("\n=== AFTER SPLIT LOGIC ===")
print(f"Splits detected: {len(splits_performed)}")
for s in splits_performed:
print(f" {s}")
print(f"\nBubbles dict keys: {sorted(new_bubbles.keys())}")
for bid in [7, 8]:
if bid in new_bubbles:
print(f"\nBubble {bid}:")
print(f" Box: {new_bubble_boxes[bid]}")
print(f" Indices ({len(new_bubble_indices[bid])}): {new_bubble_indices[bid][:3]}...")

View File

@@ -0,0 +1,119 @@
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
import re
from pathlib import Path
TARGET = Path("manga-translator.py")
def cut_after_first_entrypoint(text: str) -> str:
"""
Keep only first full __main__ block and remove duplicated tail if present.
"""
m = re.search(r'(?m)^if __name__ == "__main__":\s*$', text)
if not m:
return text
start = m.start()
# Keep entrypoint block plus indented lines after it
lines = text[start:].splitlines(True)
keep = []
keep.append(lines[0]) # if __name__...
i = 1
while i < len(lines):
ln = lines[i]
if ln.strip() == "":
keep.append(ln)
i += 1
continue
# if dedented back to col 0 => end of block
if not ln.startswith((" ", "\t")):
break
keep.append(ln)
i += 1
cleaned = text[:start] + "".join(keep)
return cleaned
def replace_bad_vars(text: str) -> str:
text = text.replace(
"merge_micro_boxes_relaxed(bubbles, bubble_boxes, bubble_quads, bubble_indices, ocr, image_bgr)",
"merge_micro_boxes_relaxed(bubbles, bubble_boxes, bubble_quads, bubble_indices, filtered, image)"
)
text = text.replace(
"reattach_orphan_short_tokens(bubbles, bubble_boxes, bubble_quads, bubble_indices, ocr)",
"reattach_orphan_short_tokens(bubbles, bubble_boxes, bubble_quads, bubble_indices, filtered)"
)
return text
def ensure_autofix_chain(text: str) -> str:
old = (
" # ── Auto-fix (split + merge) ──────────────────────────────────────────\n"
" if auto_fix_bubbles:\n"
" bubbles, bubble_boxes, bubble_quads, bubble_indices = merge_micro_boxes_relaxed(bubbles, bubble_boxes, bubble_quads, bubble_indices, filtered, image)\n"
)
new = (
" # ── Auto-fix (split + merge) ──────────────────────────────────────────\n"
" if auto_fix_bubbles:\n"
" bubbles, bubble_boxes, bubble_quads, bubble_indices = auto_fix_bubble_detection(\n"
" bubble_boxes, bubble_indices, bubble_quads, bubbles, filtered, image)\n"
" bubbles, bubble_boxes, bubble_quads, bubble_indices = merge_micro_boxes_relaxed(\n"
" bubbles, bubble_boxes, bubble_quads, bubble_indices, filtered, image)\n"
)
return text.replace(old, new)
def ensure_split_commit(text: str) -> str:
marker = " # ── Remove nested / duplicate boxes ──────────────────────────────────\n"
if marker not in text:
return text
if "bubbles = new_bubbles" in text:
return text
inject = (
" bubbles = new_bubbles\n"
" bubble_boxes = new_bubble_boxes\n"
" bubble_quads = new_bubble_quads\n"
" bubble_indices = new_bubble_indices\n\n"
)
return text.replace(marker, inject + marker)
def ensure_rescue_pipeline(text: str) -> str:
anchor = ' print(f"Kept: {len(filtered)} | Skipped: {skipped}")\n'
if anchor not in text:
return text
if "rescue_name_and_short_tokens(raw" in text:
return text
block = (
' print(f"Kept: {len(filtered)} | Skipped: {skipped}")\n'
' # Protect short dialogue tokens confidence\n'
' tmp = []\n'
' for bbox, t, conf in filtered:\n'
' tmp.append((bbox, t, maybe_conf_floor_for_protected(t, conf, floor=0.40)))\n'
' filtered = tmp\n'
' # Rescue names/short tokens dropped by strict filters\n'
' rescued = rescue_name_and_short_tokens(raw, min_conf=0.20)\n'
' filtered = merge_rescued_items(filtered, rescued, iou_threshold=0.55)\n'
)
return text.replace(anchor, block)
def main():
if not TARGET.exists():
raise FileNotFoundError(f"Not found: {TARGET}")
src = TARGET.read_text(encoding="utf-8")
out = src
out = cut_after_first_entrypoint(out)
out = replace_bad_vars(out)
out = ensure_autofix_chain(out)
out = ensure_split_commit(out)
out = ensure_rescue_pipeline(out)
TARGET.write_text(out, encoding="utf-8")
print("✅ Patched manga-translator.py")
if __name__ == "__main__":
main()

View File

@@ -0,0 +1,56 @@
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
Regenerate debug_clusters.png with the new split bubbles.json
"""
import json
import cv2
import numpy as np
def quad_bbox(quad):
"""Convert quad to bounding box"""
xs = [p[0] for p in quad]
ys = [p[1] for p in quad]
return (min(xs), min(ys), max(xs), max(ys))
def save_debug_clusters_from_json(
image_path="004.png",
bubbles_path="bubbles.json",
out_path="debug_clusters.png"
):
img = cv2.imread(image_path)
if img is None:
print(f"❌ Cannot load image: {image_path}")
return
# Load bubbles.json
with open(bubbles_path, "r", encoding="utf-8") as f:
bubbles_data = json.load(f)
# Draw all quad polygons in white (erasing original text)
for bid_str, bubble_info in bubbles_data.items():
for quad in bubble_info.get("quads", []):
pts = np.array(quad, dtype=np.int32)
cv2.fillPoly(img, [pts], (255, 255, 255))
cv2.polylines(img, [pts], True, (180, 180, 180), 1)
# Draw bounding boxes with labels
for bid_str, bubble_info in bubbles_data.items():
bid = int(bid_str)
x = bubble_info["x"]
y = bubble_info["y"]
w = bubble_info["w"]
h = bubble_info["h"]
x2 = x + w
y2 = y + h
cv2.rectangle(img, (x, y), (x2, y2), (0, 220, 0), 2)
cv2.putText(img, f"BOX#{bid}", (x + 2, max(15, y + 16)),
cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 220, 0), 2)
cv2.imwrite(out_path, img)
print(f"✅ Saved: {out_path}")
if __name__ == "__main__":
save_debug_clusters_from_json()

183
older-code/split_bubbles.py Normal file
View File

@@ -0,0 +1,183 @@
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
Helper script to split bubbles with multiple separate text regions.
Run this to manually split Box 2 and Box 7 from debug_clusters.png
"""
import json
import numpy as np
from typing import List, Tuple, Dict
def quad_bbox(quad):
"""Convert quad to bounding box"""
xs = [p[0] for p in quad]
ys = [p[1] for p in quad]
return (min(xs), min(ys), max(xs), max(ys))
def boxes_union_xyxy(boxes):
"""Union of multiple boxes"""
boxes = [b for b in boxes if b is not None]
if not boxes:
return None
return (
int(min(b[0] for b in boxes)),
int(min(b[1] for b in boxes)),
int(max(b[2] for b in boxes)),
int(max(b[3] for b in boxes)),
)
def xyxy_to_xywh(bbox):
"""Convert xyxy format to xywh"""
if bbox is None:
return None
x1, y1, x2, y2 = bbox
return {"x": int(x1), "y": int(y1), "w": int(x2 - x1), "h": int(y2 - y1)}
def bbox_area_xyxy(b):
"""Calculate area of a bounding box in xyxy format"""
if b is None:
return 0
x1, y1, x2, y2 = b
return (x2 - x1) * (y2 - y1)
def split_bubble_by_vertical_gap(bubble_id: int, bubble_data: Dict, filtered_indices_map: Dict):
"""
Attempt to split a bubble by detecting a significant vertical gap between columns of text.
Returns: (left_indices, right_indices, gap_size) or None if no split
"""
quad_bboxes = bubble_data['quad_bboxes']
quads = bubble_data['quads']
if len(quads) < 2:
return None
# Get x-coordinates with original indices
x_coords = []
for i, quad in enumerate(quads):
bbox = quad_bbox(quad)
x_center = (bbox[0] + bbox[2]) / 2.0
x_coords.append((i, x_center, bbox))
# Sort by x-coordinate
x_coords_sorted = sorted(x_coords, key=lambda t: t[1])
# Find the largest gap between consecutive x positions
max_gap = 0
split_pos = -1
for i in range(len(x_coords_sorted) - 1):
gap = x_coords_sorted[i + 1][1] - x_coords_sorted[i][1]
if gap > max_gap:
max_gap = gap
split_pos = i
# If gap is large enough, split
min_gap_threshold = 80 # pixels
if split_pos != -1 and max_gap > min_gap_threshold:
# Get ORIGINAL indices for left and right
left_indices = [x_coords_sorted[j][0] for j in range(split_pos + 1)]
right_indices = [x_coords_sorted[j][0] for j in range(split_pos + 1, len(x_coords_sorted))]
return (left_indices, right_indices, max_gap)
return None
def split_bubbles_in_json(input_file="bubbles.json", output_file="bubbles_split.json", bubble_ids_to_split=[2, 7]):
"""Split specified bubbles in the JSON file"""
with open(input_file, "r", encoding="utf-8") as f:
data = json.load(f)
new_data = {}
next_bid = max(int(k) for k in data.keys()) + 1
for bid_str, bubble_data in data.items():
bid = int(bid_str)
if bid not in bubble_ids_to_split:
# Keep original
new_data[bid_str] = bubble_data
continue
# Try to split
split_result = split_bubble_by_vertical_gap(bid, bubble_data, {})
if split_result:
left_indices, right_indices, gap_size = split_result
print(f"\n🔀 Splitting BOX#{bid} (gap={gap_size:.1f}px)")
print(f" Left indices: {left_indices}")
print(f" Right indices: {right_indices}")
# Create left bubble - keep the original bubble ID
left_quads = [bubble_data['quads'][i] for i in left_indices]
left_quad_bboxes = [bubble_data['quad_bboxes'][i] for i in left_indices]
left_bbox = boxes_union_xyxy([quad_bbox(q) for q in left_quads])
left_bbox_padded = (
max(0, left_bbox[0] - 3),
max(0, left_bbox[1] - 3),
left_bbox[2] + 3,
left_bbox[3] + 3
)
print(f" Left bbox: {left_bbox} -> padded: {left_bbox_padded}")
new_data[str(bid)] = {
"x": left_bbox_padded[0],
"y": left_bbox_padded[1],
"w": left_bbox_padded[2] - left_bbox_padded[0],
"h": left_bbox_padded[3] - left_bbox_padded[1],
"reading_order": bubble_data.get("reading_order", bid),
"quad_bboxes": left_quad_bboxes,
"quads": left_quads,
"text_bbox": xyxy_to_xywh(left_bbox),
"line_bboxes": [],
"line_union_bbox": xyxy_to_xywh(boxes_union_xyxy([quad_bbox(q) for q in left_quads])),
"line_union_area": int(bbox_area_xyxy(boxes_union_xyxy([quad_bbox(q) for q in left_quads]))),
}
# Create right bubble - with new ID
right_quads = [bubble_data['quads'][i] for i in right_indices]
right_quad_bboxes = [bubble_data['quad_bboxes'][i] for i in right_indices]
right_bbox = boxes_union_xyxy([quad_bbox(q) for q in right_quads])
right_bbox_padded = (
max(0, right_bbox[0] - 3),
max(0, right_bbox[1] - 3),
right_bbox[2] + 3,
right_bbox[3] + 3
)
print(f" Right bbox: {right_bbox} -> padded: {right_bbox_padded}")
new_data[str(next_bid)] = {
"x": right_bbox_padded[0],
"y": right_bbox_padded[1],
"w": right_bbox_padded[2] - right_bbox_padded[0],
"h": right_bbox_padded[3] - right_bbox_padded[1],
"reading_order": bubble_data.get("reading_order", next_bid),
"quad_bboxes": right_quad_bboxes,
"quads": right_quads,
"text_bbox": xyxy_to_xywh(right_bbox),
"line_bboxes": [],
"line_union_bbox": xyxy_to_xywh(boxes_union_xyxy([quad_bbox(q) for q in right_quads])),
"line_union_area": int(bbox_area_xyxy(boxes_union_xyxy([quad_bbox(q) for q in right_quads]))),
}
next_bid += 1
else:
# No split needed
new_data[bid_str] = bubble_data
# Write output
with open(output_file, "w", encoding="utf-8") as f:
json.dump(new_data, f, indent=2, ensure_ascii=False)
print(f"\n✅ Saved to {output_file}")
if __name__ == "__main__":
split_bubbles_in_json(
input_file="bubbles_original.json", # Always read from original
output_file="bubbles_split.json",
bubble_ids_to_split=[2, 7]
)

154
older-code/split_final.py Normal file
View File

@@ -0,0 +1,154 @@
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
Properly split Box 2 and Box 7 by extracting quads from original and writing to new JSON
"""
import json
import copy
def quad_bbox(quad):
xs = [p[0] for p in quad]
ys = [p[1] for p in quad]
return (min(xs), min(ys), max(xs), max(ys))
def boxes_union_xyxy(boxes):
boxes = [b for b in boxes if b is not None]
if not boxes:
return None
return (
int(min(b[0] for b in boxes)),
int(min(b[1] for b in boxes)),
int(max(b[2] for b in boxes)),
int(max(b[3] for b in boxes)),
)
def xyxy_to_xywh(bbox):
if bbox is None:
return None
x1, y1, x2, y2 = bbox
return {"x": int(x1), "y": int(y1), "w": int(x2 - x1), "h": int(y2 - y1)}
def bbox_area_xyxy(b):
if b is None:
return 0
x1, y1, x2, y2 = b
return (x2 - x1) * (y2 - y1)
# Load original
with open("bubbles_original.json", "r", encoding="utf-8") as f:
original = json.load(f)
new_data = {}
# Copy all non-split bubbles
for bid_str, bubble_data in original.items():
bid = int(bid_str)
if bid not in [2, 7]:
new_data[bid_str] = copy.deepcopy(bubble_data)
# Split Box 2
print("🔀 Splitting Box 2...")
box2_data = original["2"]
left_indices_2 = [10, 1, 2, 4, 8, 0, 3, 6, 11, 12]
right_indices_2 = [5, 7, 9]
# Left part keeps ID 2
left_quads_2 = [box2_data['quads'][i] for i in left_indices_2]
left_quad_bboxes_2 = [box2_data['quad_bboxes'][i] for i in left_indices_2]
left_bbox_2 = boxes_union_xyxy([quad_bbox(q) for q in left_quads_2])
left_bbox_2_padded = (max(0, left_bbox_2[0]-3), max(0, left_bbox_2[1]-3), left_bbox_2[2]+3, left_bbox_2[3]+3)
new_data["2"] = {
"x": left_bbox_2_padded[0],
"y": left_bbox_2_padded[1],
"w": left_bbox_2_padded[2] - left_bbox_2_padded[0],
"h": left_bbox_2_padded[3] - left_bbox_2_padded[1],
"reading_order": box2_data.get("reading_order", 2),
"quad_bboxes": left_quad_bboxes_2,
"quads": [[list(p) for p in quad] for quad in left_quads_2], # Explicit list conversion
"text_bbox": xyxy_to_xywh(left_bbox_2),
"line_bboxes": [],
"line_union_bbox": xyxy_to_xywh(boxes_union_xyxy([quad_bbox(q) for q in left_quads_2])),
"line_union_area": int(bbox_area_xyxy(boxes_union_xyxy([quad_bbox(q) for q in left_quads_2]))),
}
print(f" Left: y={new_data['2']['y']}, h={new_data['2']['h']}, quads={len(left_quads_2)}")
# Right part gets new ID 8
right_quads_2 = [box2_data['quads'][i] for i in right_indices_2]
right_quad_bboxes_2 = [box2_data['quad_bboxes'][i] for i in right_indices_2]
right_bbox_2 = boxes_union_xyxy([quad_bbox(q) for q in right_quads_2])
right_bbox_2_padded = (max(0, right_bbox_2[0]-3), max(0, right_bbox_2[1]-3), right_bbox_2[2]+3, right_bbox_2[3]+3)
new_data["8"] = {
"x": right_bbox_2_padded[0],
"y": right_bbox_2_padded[1],
"w": right_bbox_2_padded[2] - right_bbox_2_padded[0],
"h": right_bbox_2_padded[3] - right_bbox_2_padded[1],
"reading_order": box2_data.get("reading_order", 8),
"quad_bboxes": right_quad_bboxes_2,
"quads": [[list(p) for p in quad] for quad in right_quads_2], # Explicit list conversion
"text_bbox": xyxy_to_xywh(right_bbox_2),
"line_bboxes": [],
"line_union_bbox": xyxy_to_xywh(boxes_union_xyxy([quad_bbox(q) for q in right_quads_2])),
"line_union_area": int(bbox_area_xyxy(boxes_union_xyxy([quad_bbox(q) for q in right_quads_2]))),
}
print(f" Right: y={new_data['8']['y']}, h={new_data['8']['h']}, quads={len(right_quads_2)}")
# Split Box 7
print("\n🔀 Splitting Box 7...")
box7_data = original["7"]
left_indices_7 = [8, 13, 4, 11, 2, 6]
right_indices_7 = [0, 5, 1, 3, 7, 10, 12, 9]
# Left part keeps ID 7
left_quads_7 = [box7_data['quads'][i] for i in left_indices_7]
left_quad_bboxes_7 = [box7_data['quad_bboxes'][i] for i in left_indices_7]
left_bbox_7 = boxes_union_xyxy([quad_bbox(q) for q in left_quads_7])
left_bbox_7_padded = (max(0, left_bbox_7[0]-3), max(0, left_bbox_7[1]-3), left_bbox_7[2]+3, left_bbox_7[3]+3)
new_data["7"] = {
"x": left_bbox_7_padded[0],
"y": left_bbox_7_padded[1],
"w": left_bbox_7_padded[2] - left_bbox_7_padded[0],
"h": left_bbox_7_padded[3] - left_bbox_7_padded[1],
"reading_order": box7_data.get("reading_order", 7),
"quad_bboxes": left_quad_bboxes_7,
"quads": [[list(p) for p in quad] for quad in left_quads_7], # Explicit list conversion
"text_bbox": xyxy_to_xywh(left_bbox_7),
"line_bboxes": [],
"line_union_bbox": xyxy_to_xywh(boxes_union_xyxy([quad_bbox(q) for q in left_quads_7])),
"line_union_area": int(bbox_area_xyxy(boxes_union_xyxy([quad_bbox(q) for q in left_quads_7]))),
}
print(f" Left: y={new_data['7']['y']}, h={new_data['7']['h']}, quads={len(left_quads_7)}")
# Right part gets new ID 9
right_quads_7 = [box7_data['quads'][i] for i in right_indices_7]
right_quad_bboxes_7 = [box7_data['quad_bboxes'][i] for i in right_indices_7]
right_bbox_7 = boxes_union_xyxy([quad_bbox(q) for q in right_quads_7])
right_bbox_7_padded = (max(0, right_bbox_7[0]-3), max(0, right_bbox_7[1]-3), right_bbox_7[2]+3, right_bbox_7[3]+3)
new_data["9"] = {
"x": right_bbox_7_padded[0],
"y": right_bbox_7_padded[1],
"w": right_bbox_7_padded[2] - right_bbox_7_padded[0],
"h": right_bbox_7_padded[3] - right_bbox_7_padded[1],
"reading_order": box7_data.get("reading_order", 9),
"quad_bboxes": right_quad_bboxes_7,
"quads": [[list(p) for p in quad] for quad in right_quads_7], # Explicit list conversion
"text_bbox": xyxy_to_xywh(right_bbox_7),
"line_bboxes": [],
"line_union_bbox": xyxy_to_xywh(boxes_union_xyxy([quad_bbox(q) for q in right_quads_7])),
"line_union_area": int(bbox_area_xyxy(boxes_union_xyxy([quad_bbox(q) for q in right_quads_7]))),
}
print(f" Right: y={new_data['9']['y']}, h={new_data['9']['h']}, quads={len(right_quads_7)}")
# Sort by ID for output
new_data_sorted = {}
for bid in sorted([int(k) for k in new_data.keys()]):
new_data_sorted[str(bid)] = new_data[str(bid)]
with open("bubbles.json", "w", encoding="utf-8") as f:
json.dump(new_data_sorted, f, indent=2, ensure_ascii=False)
print(f"\n✅ Done! Saved {len(new_data_sorted)} bubbles to bubbles.json")

View File

@@ -0,0 +1,75 @@
#!/usr/bin/env python3
import sys
sys.path.insert(0, '/Users/guillemhernandezsola/code/manga-translator')
import cv2
import numpy as np
import importlib.util
spec = importlib.util.spec_from_file_location("manga_translator", "/Users/guillemhernandezsola/code/manga-translator/manga-translator.py")
mt = importlib.util.module_from_spec(spec)
spec.loader.exec_module(mt)
image_path = '004.png'
detector = mt.MacVisionDetector(source_lang='en')
raw = detector.read(image_path)
image = cv2.imread(image_path)
# Full filtering
filtered = []
for bbox, text, conf in raw:
t = mt.normalize_text(text)
qb = mt.quad_bbox(bbox)
if conf < 0.12 or len(t) < 1 or mt.is_noise_text(t) or mt.is_sound_effect(t) or mt.is_title_text(t):
continue
if qb[1] < int(image.shape[0] * mt.TOP_BAND_RATIO):
if conf < 0.70 and len(t) >= 5:
continue
filtered.append((bbox, t, conf))
# Get grouping
bubbles, bubble_boxes, bubble_quads, bubble_indices = mt.group_tokens(
filtered, image.shape, gap_px=mt.auto_gap(image_path), bbox_padding=3
)
print("=== TESTING PANEL SPLIT ON BUBBLE 7 ===\n")
bid = 7
box = bubble_boxes[bid]
print(f"Bubble {bid} box: {box}")
print(f"Bubble {bid} quads: {len(bubble_quads[bid])}")
print(f"Bubble {bid} indices: {len(bubble_indices[bid])}")
# Test split_panel_box
split_result = mt.split_panel_box(image, box, bubble_quads=bubble_quads[bid])
if split_result:
box_left, box_right, split_x = split_result
print(f"\n✓ Panel split detected!")
print(f" Split X: {split_x}")
print(f" Left box: {box_left}")
print(f" Right box: {box_right}")
# Simulate index split
left_idxs, right_idxs = [], []
for idx in bubble_indices[bid]:
cx, cy = mt.quad_center(filtered[idx][0])
if cx < split_x:
left_idxs.append(idx)
else:
right_idxs.append(idx)
print(f"\n Left indices ({len(left_idxs)}): {left_idxs}")
print(f" Right indices ({len(right_idxs)}): {right_idxs}")
if left_idxs and right_idxs:
print(f"\n✓ Split is valid (both sides have content)")
else:
print(f"\n✗ Split is invalid (one side is empty)")
else:
print(f"\n✗ No panel split detected")
print(f" Threshold would be: quads >= 10? {len(bubble_quads[bid]) >= 10}")
print(f" Width >= 50? {box[2] - box[0] >= 50}")
print(f" Height >= 50? {box[3] - box[1] >= 50}")

159
pipeline-render.py Normal file
View File

@@ -0,0 +1,159 @@
#!/usr/bin/env python3
"""
pipeline_render.py
───────────────────────────────────────────────────────────────
Standalone Rendering Pipeline
Usage:
python pipeline-render.py /path/to/chapter/folder
"""
import os
import sys
import argparse
import zipfile
import importlib.util
from pathlib import Path
import cv2 # ✅ Added OpenCV to load the image
# ─────────────────────────────────────────────
# CONFIG
# ─────────────────────────────────────────────
DEFAULT_FONT_PATH = "fonts/ComicNeue-Regular.ttf"
# ─────────────────────────────────────────────
# DYNAMIC MODULE LOADER
# ─────────────────────────────────────────────
def load_module(name, filepath):
spec = importlib.util.spec_from_file_location(name, filepath)
if spec is None or spec.loader is None:
raise FileNotFoundError(f"Cannot load spec for {filepath}")
module = importlib.util.module_from_spec(spec)
spec.loader.exec_module(module)
return module
# ─────────────────────────────────────────────
# HELPERS
# ─────────────────────────────────────────────
def sorted_pages(chapter_dir):
exts = {".jpg", ".jpeg", ".png", ".webp"}
pages = [
p for p in Path(chapter_dir).iterdir()
if p.is_file() and p.suffix.lower() in exts
]
return sorted(pages, key=lambda p: p.stem)
def pack_rendered_cbz(chapter_dir, output_cbz, rendered_files):
if not rendered_files:
print("⚠️ No rendered pages found — CBZ not created.")
return
with zipfile.ZipFile(output_cbz, "w", compression=zipfile.ZIP_STORED) as zf:
for rp in rendered_files:
arcname = rp.name
zf.write(rp, arcname)
print(f"\n✅ Rendered CBZ saved → {output_cbz}")
print(f"📦 Contains: {len(rendered_files)} translated pages ready to read.")
# ─────────────────────────────────────────────
# PER-PAGE PIPELINE
# ─────────────────────────────────────────────
def process_render(page_path, workdir, renderer_module, font_path):
print(f"\n{'' * 70}")
print(f"🎨 RENDERING: {page_path.name}")
print(f"{'' * 70}")
txt_path = workdir / "output.txt"
json_path = workdir / "bubbles.json"
out_img = workdir / page_path.name
if not txt_path.exists() or not json_path.exists():
print(" ⚠️ Missing output.txt or bubbles.json. Did you run the OCR pipeline first?")
return None
# ✅ FIX: Load the image into memory (as a NumPy array) before passing it
img_array = cv2.imread(str(page_path.resolve()))
if img_array is None:
print(f" ❌ Failed to load image: {page_path.name}")
return None
orig_dir = os.getcwd()
try:
os.chdir(workdir)
# Pass the loaded image array instead of the string path
renderer_module.render_translations(
img_array, # 1st arg: Image Data (NumPy array)
str(out_img.resolve()), # 2nd arg: Output image path
str(txt_path.resolve()), # 3rd arg: Translations text
str(json_path.resolve()), # 4th arg: Bubbles JSON
font_path # 5th arg: Font Path
)
print(" ✅ Render complete")
return out_img
except Exception as e:
print(f" ❌ Failed: {e}")
return None
finally:
os.chdir(orig_dir)
# ─────────────────────────────────────────────
# MAIN
# ─────────────────────────────────────────────
def main():
parser = argparse.ArgumentParser(description="Manga Rendering Pipeline")
parser.add_argument("chapter_dir", help="Path to the folder containing original manga pages")
args = parser.parse_args()
chapter_dir = Path(args.chapter_dir).resolve()
output_cbz = chapter_dir.parent / f"{chapter_dir.name}_rendered.cbz"
script_dir = Path(__file__).parent
absolute_font_path = str((script_dir / DEFAULT_FONT_PATH).resolve())
print("Loading renderer module...")
try:
renderer = load_module("manga_renderer", str(script_dir / "manga-renderer.py"))
except Exception as e:
print(f"❌ Could not load manga-renderer.py: {e}")
sys.exit(1)
pages = sorted_pages(chapter_dir)
if not pages:
print(f"❌ No images found in: {chapter_dir}")
sys.exit(1)
print(f"\n📖 Chapter : {chapter_dir}")
print(f" Pages : {len(pages)}\n")
succeeded, failed = [], []
rendered_files = []
for i, page_path in enumerate(pages, start=1):
print(f"[{i}/{len(pages)}] Checking data for {page_path.name}...")
workdir = Path(chapter_dir) / "translated" / page_path.stem
out_file = process_render(page_path, workdir, renderer, absolute_font_path)
if out_file:
succeeded.append(page_path.name)
rendered_files.append(out_file)
else:
failed.append(page_path.name)
print(f"\n{'' * 70}")
print("RENDER PIPELINE COMPLETE")
print(f"{len(succeeded)} page(s) rendered successfully")
if failed:
print(f"{len(failed)} page(s) skipped or failed:")
for f in failed:
print(f"{f}")
print(f"{'' * 70}\n")
print("Packing final CBZ...")
pack_rendered_cbz(chapter_dir, output_cbz, rendered_files)
if __name__ == "__main__":
main()

282
pipeline-translator.py Normal file
View File

@@ -0,0 +1,282 @@
#!/usr/bin/env python3
"""
pipeline-translator.py
───────────────────────────────────────────────────────────────
Translation OCR pipeline (Batch Processing Only)
Usage:
python pipeline-translator.py /path/to/chapter/folder
"""
import os
import sys
import argparse
import importlib.util
from pathlib import Path
# ─────────────────────────────────────────────────────────────
# PIPELINE CONFIGURATION
# Maps to the process_manga_page() signature in manga-translator.py
# ─────────────────────────────────────────────────────────────
PIPELINE_CONFIG = dict(
source_lang = "en",
target_lang = "ca",
)
# ─────────────────────────────────────────────────────────────
# DYNAMIC MODULE LOADER
# ─────────────────────────────────────────────────────────────
def load_module(name: str, filepath: str):
spec = importlib.util.spec_from_file_location(name, filepath)
if spec is None or spec.loader is None:
raise FileNotFoundError(f"Cannot load spec for {filepath}")
module = importlib.util.module_from_spec(spec)
spec.loader.exec_module(module)
return module
# ─────────────────────────────────────────────────────────────
# HELPERS
# ─────────────────────────────────────────────────────────────
def sorted_pages(chapter_dir: Path):
exts = {".jpg", ".jpeg", ".png", ".webp"}
pages = [
p for p in chapter_dir.iterdir()
if p.is_file() and p.suffix.lower() in exts
]
return sorted(pages, key=lambda p: p.stem)
def make_page_workdir(chapter_dir: Path, page_stem: str) -> Path:
workdir = chapter_dir / "translated" / page_stem
workdir.mkdir(parents=True, exist_ok=True)
return workdir
def verify_translator_api(module) -> bool:
"""
Checks that the loaded module exposes process_manga_page()
and that it accepts all keys defined in PIPELINE_CONFIG.
Prints a warning for any missing parameter so mismatches are
caught immediately rather than silently falling back to defaults.
"""
import inspect
fn = getattr(module, "process_manga_page", None)
if fn is None:
print("❌ manga-translator.py does not expose process_manga_page()")
return False
sig = inspect.signature(fn)
params = set(sig.parameters.keys())
ok = True
for key in PIPELINE_CONFIG:
if key not in params:
print(
f"⚠️ PIPELINE_CONFIG key '{key}' not found in "
f"process_manga_page() — update pipeline or translator."
)
ok = False
return ok
# ─────────────────────────────────────────────────────────────
# PER-PAGE PIPELINE
# ─────────────────────────────────────────────────────────────
def process_page(page_path: Path, workdir: Path, translator_module) -> bool:
print(f"\n{'' * 70}")
print(f" PAGE : {page_path.name}")
print(f"{'' * 70}")
orig_dir = os.getcwd()
try:
# Run inside the page's own workdir so debug images and
# output files land there automatically.
os.chdir(workdir)
output_json = str(workdir / "bubbles.json")
output_txt = str(workdir / "output.txt")
debug_path = str(workdir / "debug_clusters.png")
print(" ⏳ Extracting text and translating...")
results = translator_module.process_manga_page(
image_path = str(page_path.resolve()),
output_json = output_json,
output_txt = output_txt,
**PIPELINE_CONFIG,
)
# ── Optional debug visualisation ─────────────────────
if results:
try:
import cv2
image_bgr = cv2.imread(str(page_path.resolve()))
if image_bgr is not None:
# Reconstruct vis_boxes / vis_lines from results dict
vis_boxes = {}
vis_lines = {}
vis_indices = {}
for bid_str, data in results.items():
bid = int(bid_str)
xywh = data["box"]
vis_boxes[bid] = (
xywh["x"],
xywh["y"],
xywh["x"] + xywh["w"],
xywh["y"] + xywh["h"],
)
vis_lines[bid] = data.get("lines", [])
vis_indices[bid] = []
translator_module.draw_debug_clusters(
image_bgr = image_bgr,
out_boxes = vis_boxes,
out_lines = vis_lines,
out_indices = vis_indices,
ocr = [],
save_path = debug_path,
)
except Exception as e:
print(f" ⚠️ Debug visualisation failed (non-fatal): {e}")
# ── Sanity-check outputs ──────────────────────────────
for fname in ("output.txt", "bubbles.json"):
fpath = workdir / fname
if not fpath.exists() or fpath.stat().st_size == 0:
print(f" ⚠️ {fname} is missing or empty after processing.")
if not results:
print(" ⚠️ process_manga_page() returned no results.")
return False
print(f" ✅ Done — {len(results)} box(es) processed.")
return True
except Exception as e:
import traceback
print(f" ❌ Failed: {e}")
traceback.print_exc()
return False
finally:
os.chdir(orig_dir)
# ─────────────────────────────────────────────────────────────
# MAIN
# ─────────────────────────────────────────────────────────────
def main():
parser = argparse.ArgumentParser(
description="Manga Translation OCR Batch Pipeline"
)
parser.add_argument(
"chapter_dir",
help="Path to the folder containing manga page images"
)
parser.add_argument(
"--start", type=int, default=1,
help="Start from this page number (1-based, default: 1)"
)
parser.add_argument(
"--end", type=int, default=None,
help="Stop after this page number inclusive (default: all)"
)
parser.add_argument(
"--source", "-s", default=None,
help=f"Override source language (default: {PIPELINE_CONFIG['source_lang']})"
)
parser.add_argument(
"--target", "-t", default=None,
help=f"Override target language (default: {PIPELINE_CONFIG['target_lang']})"
)
args = parser.parse_args()
# Allow CLI overrides of source/target without touching PIPELINE_CONFIG
config = dict(PIPELINE_CONFIG)
if args.source:
config["source_lang"] = args.source
if args.target:
config["target_lang"] = args.target
# Patch PIPELINE_CONFIG in-place so process_page() picks up overrides
PIPELINE_CONFIG.update(config)
chapter_dir = Path(args.chapter_dir).resolve()
if not chapter_dir.is_dir():
print(f"❌ Not a directory: {chapter_dir}")
sys.exit(1)
# ── Load translator module ────────────────────────────────
script_dir = Path(__file__).parent
module_path = script_dir / "manga-translator.py"
if not module_path.exists():
print(f"❌ manga-translator.py not found in {script_dir}")
sys.exit(1)
print(f"📦 Loading translator from: {module_path}")
try:
translator = load_module("manga_translator", str(module_path))
except Exception as e:
print(f"❌ Could not load manga-translator.py: {e}")
sys.exit(1)
# ── API compatibility check ───────────────────────────────
if not verify_translator_api(translator):
print("❌ Aborting — fix the parameter mismatch above first.")
sys.exit(1)
# ── Discover pages ────────────────────────────────────────
all_pages = sorted_pages(chapter_dir)
if not all_pages:
print(f"❌ No images found in: {chapter_dir}")
sys.exit(1)
# Apply --start / --end slice (1-based, inclusive)
start_idx = max(0, args.start - 1)
end_idx = args.end if args.end is not None else len(all_pages)
pages = all_pages[start_idx:end_idx]
if not pages:
print(f"❌ No pages in range [{args.start}, {args.end}]")
sys.exit(1)
# ── Summary header ────────────────────────────────────────
print(f"\n{'' * 70}")
print(f" 📖 Chapter : {chapter_dir.name}")
print(f" 📄 Pages : {len(pages)} "
f"(of {len(all_pages)} total, "
f"range {args.start}{end_idx})")
print(f" 🌐 Lang : {PIPELINE_CONFIG['source_lang']}"
f"{PIPELINE_CONFIG['target_lang']}")
print(f"{'' * 70}\n")
succeeded, failed = [], []
for i, page_path in enumerate(pages, start=1):
print(f"[{i}/{len(pages)}] {page_path.name}")
workdir = make_page_workdir(chapter_dir, page_path.stem)
if process_page(page_path, workdir, translator):
succeeded.append(page_path.name)
else:
failed.append(page_path.name)
# ── Final report ──────────────────────────────────────────
print(f"\n{'' * 70}")
print(" PIPELINE COMPLETE")
print(f"{len(succeeded)} page(s) succeeded")
if failed:
print(f"{len(failed)} page(s) failed:")
for name in failed:
print(f"{name}")
print(f"{'' * 70}\n")
if __name__ == "__main__":
main()

View File

@@ -1,222 +0,0 @@
#!/usr/bin/env python3
"""
pipeline.py
───────────────────────────────────────────────────────────────
Translation-only pipeline for Dandadan_059_2022_Digital
Flow per page:
1. Run translate_manga_text() → output.txt + bubbles.json
2. Copy original image to workdir for reference
Folder structure produced:
Dandadan_059_2022_Digital_1r0n/
└── translated/
├── 00/
│ ├── output.txt ← translations to review
│ ├── bubbles.json ← bubble boxes
│ └── debug_clusters.png ← cluster debug (if DEBUG=True)
├── 01/
│ └── ...
└── ...
Dandadan_059_translated.cbz ← original pages + translations
zipped for reference
"""
import os
import sys
import shutil
import zipfile
import importlib.util
from pathlib import Path
# ─────────────────────────────────────────────
# CONFIG — edit these as needed
# ─────────────────────────────────────────────
CHAPTER_DIR = "/Users/guillemhernandezsola/Downloads/Spy_x_Family_076_2023_Digital_1r0n"
OUTPUT_CBZ = "/Users/guillemhernandezsola/Downloads/Spy_x_Family_076_2023_Digital_1r0n_translated.cbz"
SOURCE_LANG = "en"
TARGET_LANG = "ca"
# manga-translator.py settings
CONFIDENCE_THRESHOLD = 0.10
MIN_TEXT_LENGTH = 2
CLUSTER_EPS = "auto"
PROXIMITY_PX = 80
FILTER_SFX = True
QUALITY_THRESHOLD = 0.5
UPSCALE_FACTOR = 2.5
BBOX_PADDING = 5
DEBUG = True
# ─────────────────────────────────────────────
# DYNAMIC MODULE LOADER
# ─────────────────────────────────────────────
def load_module(name, filepath):
spec = importlib.util.spec_from_file_location(name, filepath)
module = importlib.util.module_from_spec(spec)
spec.loader.exec_module(module)
return module
# ─────────────────────────────────────────────
# HELPERS
# ─────────────────────────────────────────────
def sorted_pages(chapter_dir):
exts = {".jpg", ".jpeg", ".png", ".webp"}
pages = [
p for p in Path(chapter_dir).iterdir()
if p.suffix.lower() in exts
]
return sorted(pages, key=lambda p: p.stem)
def make_page_workdir(chapter_dir, page_stem):
workdir = Path(chapter_dir) / "translated" / page_stem
workdir.mkdir(parents=True, exist_ok=True)
return workdir
def pack_cbz(chapter_dir, translated_dir, output_cbz):
"""
Packs into CBZ:
- All original pages (from chapter_dir root)
- All output.txt (one per page subfolder)
Sorted by page stem for correct reading order.
"""
exts = {".jpg", ".jpeg", ".png", ".webp"}
pages = sorted(
[p for p in Path(chapter_dir).iterdir()
if p.suffix.lower() in exts],
key=lambda p: p.stem
)
txts = sorted(
translated_dir.rglob("output.txt"),
key=lambda p: p.parent.name
)
if not pages:
print("⚠️ No original pages found — CBZ not created.")
return
with zipfile.ZipFile(output_cbz, "w",
compression=zipfile.ZIP_STORED) as zf:
# Original pages
for img in pages:
arcname = f"pages/{img.name}"
zf.write(img, arcname)
print(f" 🖼 {arcname}")
# Translation text files
for txt in txts:
arcname = f"translations/{txt.parent.name}_output.txt"
zf.write(txt, arcname)
print(f" 📄 {arcname}")
print(f"\n✅ CBZ saved → {output_cbz} "
f"({len(pages)} page(s), {len(txts)} translation(s))")
# ─────────────────────────────────────────────
# PER-PAGE PIPELINE
# ─────────────────────────────────────────────
def process_page(page_path, workdir, translator_module):
"""
Runs translator for a single page.
All output files land in workdir.
Returns True on success, False on failure.
"""
print(f"\n{''*60}")
print(f" PAGE: {page_path.name}")
print(f"{''*60}")
orig_dir = os.getcwd()
try:
# chdir into workdir so debug_clusters.png,
# temp files etc. all land there
os.chdir(workdir)
translator_module.translate_manga_text(
image_path = str(page_path.resolve()),
source_lang = SOURCE_LANG,
target_lang = TARGET_LANG,
confidence_threshold = CONFIDENCE_THRESHOLD,
export_to_file = "output.txt",
export_bubbles_to = "bubbles.json",
min_text_length = MIN_TEXT_LENGTH,
cluster_eps = CLUSTER_EPS,
proximity_px = PROXIMITY_PX,
filter_sound_effects = FILTER_SFX,
quality_threshold = QUALITY_THRESHOLD,
upscale_factor = UPSCALE_FACTOR,
bbox_padding = BBOX_PADDING,
debug = DEBUG,
)
print(f" ✅ Translated → {workdir}")
return True
except Exception as e:
print(f" ❌ Failed: {e}")
return False
finally:
os.chdir(orig_dir)
# ─────────────────────────────────────────────
# MAIN
# ─────────────────────────────────────────────
def main():
# ── Load translator module ────────────────────────────────────
print("Loading manga-translator.py...")
try:
translator = load_module(
"manga_translator", "manga-translator.py")
except FileNotFoundError as e:
print(f"❌ Could not load module: {e}")
sys.exit(1)
# ── Discover pages ────────────────────────────────────────────
pages = sorted_pages(CHAPTER_DIR)
if not pages:
print(f"❌ No images found in: {CHAPTER_DIR}")
sys.exit(1)
print(f"\n📖 Chapter : {CHAPTER_DIR}")
print(f" Pages : {len(pages)}")
print(f" Source : {SOURCE_LANG} → Target: {TARGET_LANG}\n")
# ── Process each page ─────────────────────────────────────────
translated_dir = Path(CHAPTER_DIR) / "translated"
succeeded = []
failed = []
for i, page_path in enumerate(pages, start=1):
print(f"\n[{i}/{len(pages)}] {page_path.name}")
workdir = make_page_workdir(CHAPTER_DIR, page_path.stem)
ok = process_page(page_path, workdir, translator)
if ok:
succeeded.append(page_path.name)
else:
failed.append(page_path.name)
# ── Summary ───────────────────────────────────────────────────
print(f"\n{''*60}")
print(f" PIPELINE COMPLETE")
print(f"{len(succeeded)} page(s) succeeded")
if failed:
print(f"{len(failed)} page(s) failed:")
for f in failed:
print(f"{f}")
print(f"{''*60}\n")
# ── Pack CBZ ──────────────────────────────────────────────────
print("Packing CBZ...")
pack_cbz(CHAPTER_DIR, translated_dir, OUTPUT_CBZ)
if __name__ == "__main__":
main()

View File

@@ -1,19 +0,0 @@
# ─────────────────────────────────────────────
# manga-translator + manga-renderer
# Python >= 3.9 recommended
# ─────────────────────────────────────────────
# Computer vision + image processing
opencv-python>=4.8.0
numpy>=1.24.0
Pillow>=10.0.0
# OCR engine (manga-translator)
manga-ocr>=0.1.8
# Translation (manga-translator)
deep-translator>=1.11.0
# HTTP / file handling used internally by manga-ocr
requests>=2.31.0