Files
manga-translator/manga-translator.py
Guillem Hernandez Sola f95b7d32d4 Added some fixes
2026-04-14 20:08:51 +02:00

869 lines
27 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
import re
import os
import json
import cv2
import numpy as np
import easyocr
from deep_translator import GoogleTranslator
# ─────────────────────────────────────────────
# CONFIG
# ─────────────────────────────────────────────
GLOSSARY = {
"ANYA": "ANYA",
"STARLIGHT ANYA": "STARLIGHT ANYA",
"MR. HENDERSON": "MR. HENDERSON",
"HENDERSON": "HENDERSON",
"STELLA STAR": "STELLA STAR",
}
SOUND_EFFECT_PATTERNS = [
r"^b+i+p+$", r"^sha+$", r"^ha+$", r"^ah+$", r"^oh+$",
r"^ugh+$", r"^bam+$", r"^pow+$", r"^boom+$", r"^bang+$",
r"^crash+$", r"^thud+$", r"^zip+$", r"^swoosh+$", r"^chirp+$"
]
TITLE_PATTERNS = [
r"^(mission|chapter|episode|vol\.?|volume)\s*\d+$",
r"^(spy|family|spy.family)$",
r"^by\s+.+$",
]
NOISE_PATTERNS = [
r"^[^a-zA-Z0-9\?!.]+$",
r"^BOX[0-9A-Z#\s]*$",
]
TOP_BAND_RATIO = 0.08
# ─────────────────────────────────────────────
# TEXT HELPERS
# ─────────────────────────────────────────────
def normalize_text(text: str) -> str:
t = (text or "").strip().upper()
t = t.replace("", "\"").replace("", "\"")
t = t.replace("", "'").replace("", "'")
t = t.replace("", "...")
t = re.sub(r"\s+", " ", t)
t = re.sub(r"\s+([,.;:!?])", r"\1", t)
t = re.sub(r"\(\s+", "(", t)
t = re.sub(r"\s+\)", ")", t)
t = re.sub(r"\.{4,}", "...", t)
t = re.sub(r",\?", "?", t)
return t.strip()
def apply_glossary(text: str) -> str:
out = text or ""
for k in sorted(GLOSSARY.keys(), key=len, reverse=True):
out = re.sub(rf"\b{re.escape(k)}\b", GLOSSARY[k], out, flags=re.IGNORECASE)
return out
def postprocess_translation_general(text: str) -> str:
t = normalize_text(text)
t = re.sub(r"\s{2,}", " ", t).strip()
t = re.sub(r"([!?]){3,}", r"\1\1", t)
t = re.sub(r"\.{4,}", "...", t)
return t
# ─────────────────────────────────────────────
# FILTERS
# ─────────────────────────────────────────────
def is_sound_effect(text: str) -> bool:
cleaned = re.sub(r"[^a-z]", "", (text or "").strip().lower())
return any(re.fullmatch(p, cleaned, re.IGNORECASE) for p in SOUND_EFFECT_PATTERNS)
def is_title_text(text: str) -> bool:
t = (text or "").strip().lower()
return any(re.fullmatch(p, t, re.IGNORECASE) for p in TITLE_PATTERNS)
def is_noise_text(text: str) -> bool:
t = (text or "").strip()
return any(re.fullmatch(p, t) for p in NOISE_PATTERNS)
# ─────────────────────────────────────────────
# GEOMETRY
# ─────────────────────────────────────────────
def quad_bbox(quad):
xs = [p[0] for p in quad]
ys = [p[1] for p in quad]
return (int(min(xs)), int(min(ys)), int(max(xs)), int(max(ys)))
def quad_center(quad):
x1, y1, x2, y2 = quad_bbox(quad)
return ((x1 + x2) / 2.0, (y1 + y2) / 2.0)
def boxes_union_xyxy(boxes):
boxes = [b for b in boxes if b is not None]
if not boxes:
return None
return (
int(min(b[0] for b in boxes)),
int(min(b[1] for b in boxes)),
int(max(b[2] for b in boxes)),
int(max(b[3] for b in boxes)),
)
def bbox_area_xyxy(b):
if b is None:
return 0
return int(max(0, b[2] - b[0]) * max(0, b[3] - b[1]))
def xyxy_to_xywh(b):
if b is None:
return None
x1, y1, x2, y2 = b
return {"x": int(x1), "y": int(y1), "w": int(max(0, x2 - x1)), "h": int(max(0, y2 - y1))}
def overlap_or_near(a, b, gap=0):
ax1, ay1, ax2, ay2 = a
bx1, by1, bx2, by2 = b
gap_x = max(0, max(ax1, bx1) - min(ax2, bx2))
gap_y = max(0, max(ay1, by1) - min(ay2, by2))
return gap_x <= gap and gap_y <= gap
# ─────────────────────────────────────────────
# QUALITY / SCORING
# ─────────────────────────────────────────────
def ocr_quality_score(text: str) -> float:
if not text or len(text) < 2:
return 0.0
alpha_ratio = sum(1 for c in text if c.isalpha()) / max(1, len(text))
penalty = 0.0
if re.search(r"[^\w\s\'\!\?\.,\-]{2,}", text):
penalty += 0.2
if re.search(r",,", text):
penalty += 0.2
bonus = 0.05 if re.search(r"[.!?]$", text) else 0.0
return max(0.0, min(1.0, alpha_ratio - penalty + bonus))
def ocr_candidate_score(text: str) -> float:
if not text:
return 0.0
t = text.strip()
n = len(t)
if n == 0:
return 0.0
alpha = sum(c.isalpha() for c in t) / n
spaces = sum(c.isspace() for c in t) / n
punct_ok = sum(c in ".,!?'-:;()[]\"" for c in t) / n
bad = len(re.findall(r"[^\w\s\.\,\!\?\-\'\:\;\(\)\[\]\"]", t)) / n
penalty = 0.0
if re.search(r"\b[A-Z]\b", t):
penalty += 0.05
if re.search(r"[0-9]{2,}", t):
penalty += 0.08
if re.search(r"(..)\1\1", t):
penalty += 0.08
score = (0.62 * alpha) + (0.10 * spaces) + (0.20 * punct_ok) - (0.45 * bad) - penalty
return max(0.0, min(1.0, score))
# ─────────────────────────────────────────────
# OCR MULTI-PASS
# ─────────────────────────────────────────────
def preprocess_variant(crop_bgr, mode):
gray = cv2.cvtColor(crop_bgr, cv2.COLOR_BGR2GRAY)
if mode == "raw":
return gray
if mode == "clahe":
return cv2.createCLAHE(clipLimit=2.0, tileGridSize=(8, 8)).apply(gray)
if mode == "adaptive":
den = cv2.GaussianBlur(gray, (3, 3), 0)
return cv2.adaptiveThreshold(
den, 255, cv2.ADAPTIVE_THRESH_GAUSSIAN_C,
cv2.THRESH_BINARY, 35, 11
)
if mode == "otsu":
den = cv2.GaussianBlur(gray, (3, 3), 0)
_, th = cv2.threshold(den, 0, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU)
return th
if mode == "invert":
return 255 - gray
return gray
def rotate_image_keep_bounds(img, angle_deg):
h, w = img.shape[:2]
c = (w / 2, h / 2)
M = cv2.getRotationMatrix2D(c, angle_deg, 1.0)
cos = abs(M[0, 0])
sin = abs(M[0, 1])
new_w = int((h * sin) + (w * cos))
new_h = int((h * cos) + (w * sin))
M[0, 2] += (new_w / 2) - c[0]
M[1, 2] += (new_h / 2) - c[1]
return cv2.warpAffine(img, M, (new_w, new_h), flags=cv2.INTER_CUBIC, borderValue=255)
def run_ocr_on_array(reader, arr):
tmp = "_tmp_ocr.png"
cv2.imwrite(tmp, arr)
try:
return reader.readtext(tmp, paragraph=False)
finally:
if os.path.exists(tmp):
os.remove(tmp)
def rebuild_text_from_ocr_result(res):
if not res:
return ""
norm = []
for item in res:
if len(item) != 3:
continue
bbox, txt, conf = item
if not txt or not txt.strip():
continue
b = quad_bbox(bbox)
xc = (b[0] + b[2]) / 2.0
yc = (b[1] + b[3]) / 2.0
h = max(1.0, b[3] - b[1])
norm.append((b, txt, conf, xc, yc, h))
if not norm:
return ""
med_h = float(np.median([x[5] for x in norm]))
row_tol = max(6.0, med_h * 0.75)
norm.sort(key=lambda z: z[4]) # y
rows = []
for it in norm:
placed = False
for r in rows:
if abs(it[4] - r["yc"]) <= row_tol:
r["m"].append(it)
r["yc"] = float(np.mean([k[4] for k in r["m"]]))
placed = True
break
if not placed:
rows.append({"yc": it[4], "m": [it]})
rows.sort(key=lambda r: r["yc"])
lines = []
for r in rows:
mem = sorted(r["m"], key=lambda z: z[3]) # x
line = normalize_text(" ".join(x[1] for x in mem))
if line:
lines.append(line)
return normalize_text(" ".join(lines))
def reread_crop_robust(image, bbox, reader, upscale=3.0, pad=22):
ih, iw = image.shape[:2]
x1, y1, x2, y2 = bbox
x1 = max(0, int(x1 - pad))
y1 = max(0, int(y1 - pad))
x2 = min(iw, int(x2 + pad))
y2 = min(ih, int(y2 + pad))
crop = image[y1:y2, x1:x2]
if crop.size == 0:
return None, 0.0
up = cv2.resize(
crop,
(int(crop.shape[1] * upscale), int(crop.shape[0] * upscale)),
interpolation=cv2.INTER_CUBIC
)
modes = ["raw", "clahe", "adaptive", "otsu", "invert"]
angles = [0.0, 1.5, -1.5]
best_text, best_score = "", 0.0
for mode in modes:
proc = preprocess_variant(up, mode)
if len(proc.shape) == 2:
proc3 = cv2.cvtColor(proc, cv2.COLOR_GRAY2BGR)
else:
proc3 = proc
for a in angles:
rot = rotate_image_keep_bounds(proc3, a)
if len(rot.shape) == 3:
rot_in = cv2.cvtColor(rot, cv2.COLOR_BGR2GRAY)
else:
rot_in = rot
res = run_ocr_on_array(reader, rot_in)
txt = rebuild_text_from_ocr_result(res)
sc = ocr_candidate_score(txt)
if sc > best_score:
best_text, best_score = txt, sc
if not best_text:
return None, 0.0
return best_text, best_score
# ─────────────────────────────────────────────
# LINES + YELLOW BOXES
# ─────────────────────────────────────────────
def build_lines_from_indices(indices, ocr):
if not indices:
return []
items = []
for i in indices:
b = quad_bbox(ocr[i][0])
xc = (b[0] + b[2]) / 2.0
yc = (b[1] + b[3]) / 2.0
h = max(1.0, b[3] - b[1])
items.append((i, b, xc, yc, h))
med_h = float(np.median([it[4] for it in items])) if items else 10.0
row_tol = max(6.0, med_h * 0.75)
items.sort(key=lambda x: x[3])
rows = []
for it in items:
i, b, xc, yc, h = it
placed = False
for r in rows:
if abs(yc - r["yc"]) <= row_tol:
r["m"].append((i, b, xc, yc))
r["yc"] = float(np.mean([k[3] for k in r["m"]]))
placed = True
break
if not placed:
rows.append({"yc": yc, "m": [(i, b, xc, yc)]})
rows.sort(key=lambda r: r["yc"])
lines = []
for r in rows:
mem = sorted(r["m"], key=lambda z: z[2])
txt = normalize_text(" ".join(ocr[i][1] for i, _, _, _ in mem))
lines.append(txt)
return lines
def build_line_boxes_from_indices(indices, ocr):
"""
Robust yellow-box generation with punctuation attachment:
- row grouping
- chunking by x gap
- attach tiny punctuation/special tokens to nearest chunk
- token coverage guarantee
"""
if not indices:
return []
items = []
for i in indices:
b = quad_bbox(ocr[i][0])
txt = normalize_text(ocr[i][1])
xc = (b[0] + b[2]) / 2.0
yc = (b[1] + b[3]) / 2.0
w = max(1.0, b[2] - b[0])
h = max(1.0, b[3] - b[1])
items.append({
"i": i, "b": b, "txt": txt,
"xc": xc, "yc": yc, "w": w, "h": h
})
med_h = float(np.median([it["h"] for it in items])) if items else 10.0
row_tol = max(6.0, med_h * 0.90)
gap_x_tol = max(8.0, med_h * 1.25)
pad = max(1, int(round(med_h * 0.12)))
def is_punct_like(t):
raw = (t or "").strip()
if raw == "":
return True
punct_ratio = sum(1 for c in raw if not c.isalnum()) / max(1, len(raw))
return punct_ratio >= 0.5 or len(raw) <= 2
# 1) row grouping
items_sorted = sorted(items, key=lambda x: x["yc"])
rows = []
for it in items_sorted:
placed = False
for r in rows:
if abs(it["yc"] - r["yc"]) <= row_tol:
r["m"].append(it)
r["yc"] = float(np.mean([k["yc"] for k in r["m"]]))
placed = True
break
if not placed:
rows.append({"yc": it["yc"], "m": [it]})
rows.sort(key=lambda r: r["yc"])
out_boxes = []
for r in rows:
mem = sorted(r["m"], key=lambda z: z["xc"])
normal = [t for t in mem if not is_punct_like(t["txt"])]
punct = [t for t in mem if is_punct_like(t["txt"])]
if not normal:
normal = mem
punct = []
# 2) chunk normal by x-gap
chunks = []
cur = [normal[0]]
for t in normal[1:]:
prev = cur[-1]["b"]
b = t["b"]
gap = b[0] - prev[2]
if gap <= gap_x_tol:
cur.append(t)
else:
chunks.append(cur)
cur = [t]
chunks.append(cur)
# 3) attach punct tokens to nearest chunk
for p in punct:
pb = p["b"]
pxc, pyc = p["xc"], p["yc"]
best_k = -1
best_score = 1e18
for k, ch in enumerate(chunks):
ub = boxes_union_xyxy([x["b"] for x in ch])
cx = (ub[0] + ub[2]) / 2.0
cy = (ub[1] + ub[3]) / 2.0
dx = abs(pxc - cx)
dy = abs(pyc - cy)
score = dx + 1.8 * dy
near = overlap_or_near(pb, ub, gap=int(med_h * 0.9))
if near:
score -= med_h * 2.0
if score < best_score:
best_score = score
best_k = k
if best_k >= 0:
chunks[best_k].append(p)
else:
chunks.append([p])
# 4) emit chunk boxes
for ch in chunks:
ub = boxes_union_xyxy([x["b"] for x in ch])
if ub:
x1, y1, x2, y2 = ub
out_boxes.append((x1 - pad, y1 - pad, x2 + pad, y2 + pad))
# 5) guarantee every token is inside some yellow box
token_boxes = [it["b"] for it in items]
def inside(tb, lb):
return tb[0] >= lb[0] and tb[1] >= lb[1] and tb[2] <= lb[2] and tb[3] <= lb[3]
for tb in token_boxes:
if not any(inside(tb, lb) for lb in out_boxes):
x1, y1, x2, y2 = tb
out_boxes.append((x1 - pad, y1 - pad, x2 + pad, y2 + pad))
# 6) merge heavy overlaps
merged = []
for b in out_boxes:
merged_into = False
for i, m in enumerate(merged):
ix1 = max(b[0], m[0]); iy1 = max(b[1], m[1])
ix2 = min(b[2], m[2]); iy2 = min(b[3], m[3])
inter = max(0, ix2 - ix1) * max(0, iy2 - iy1)
a1 = max(1, (b[2] - b[0]) * (b[3] - b[1]))
a2 = max(1, (m[2] - m[0]) * (m[3] - m[1]))
iou = inter / float(a1 + a2 - inter) if (a1 + a2 - inter) > 0 else 0.0
if iou > 0.72:
merged[i] = boxes_union_xyxy([b, m])
merged_into = True
break
if not merged_into:
merged.append(b)
merged.sort(key=lambda z: (z[1], z[0]))
return merged
# ─────────────────────────────────────────────
# GROUPING
# ─────────────────────────────────────────────
def auto_gap(image_path, base=18, ref_w=750):
img = cv2.imread(image_path)
if img is None:
return base
return base * (img.shape[1] / ref_w)
def group_tokens(ocr, image_shape, gap_px=18, bbox_padding=3):
n = len(ocr)
if n == 0:
return {}, {}, {}, {}
boxes = [quad_bbox(r[0]) for r in ocr]
centers = [quad_center(r[0]) for r in ocr]
hs = [max(1.0, b[3] - b[1]) for b in boxes]
med_h = float(np.median(hs)) if hs else 12.0
dist_thresh = max(20.0, med_h * 2.2)
p = list(range(n))
def find(x):
while p[x] != x:
p[x] = p[p[x]]
x = p[x]
return x
def unite(a, b):
p[find(a)] = find(b)
for i in range(n):
for j in range(i + 1, n):
if overlap_or_near(boxes[i], boxes[j], gap=gap_px):
unite(i, j)
continue
cx1, cy1 = centers[i]
cx2, cy2 = centers[j]
d = ((cx1 - cx2) ** 2 + (cy1 - cy2) ** 2) ** 0.5
if d <= dist_thresh and abs(cy1 - cy2) <= med_h * 3.0:
unite(i, j)
groups = {}
for i in range(n):
groups.setdefault(find(i), []).append(i)
sorted_groups = sorted(
groups.values(),
key=lambda idxs: (
min(boxes[i][1] for i in idxs),
min(boxes[i][0] for i in idxs)
)
)
bubbles = {}
bubble_boxes = {}
bubble_quads = {}
bubble_indices = {}
ih, iw = image_shape[:2]
for bid, idxs in enumerate(sorted_groups, start=1):
idxs = sorted(idxs, key=lambda k: boxes[k][1])
lines = build_lines_from_indices(idxs, ocr)
quads = [ocr[k][0] for k in idxs]
ub = boxes_union_xyxy([quad_bbox(q) for q in quads])
if ub is None:
continue
x1, y1, x2, y2 = ub
x1 = max(0, x1 - bbox_padding)
y1 = max(0, y1 - bbox_padding)
x2 = min(iw, x2 + bbox_padding)
y2 = min(ih, y2 + bbox_padding)
bubbles[bid] = lines
bubble_boxes[bid] = (x1, y1, x2, y2)
bubble_quads[bid] = quads
bubble_indices[bid] = idxs
return bubbles, bubble_boxes, bubble_quads, bubble_indices
# ─────────────────────────────────────────────
# DEBUG
# ─────────────────────────────────────────────
def save_debug_clusters(image_path, ocr, bubble_boxes, bubble_indices, out_path="debug_clusters.png"):
img = cv2.imread(image_path)
if img is None:
return
# OCR token quads
for bbox, txt, conf in ocr:
pts = np.array(bbox, dtype=np.int32)
cv2.polylines(img, [pts], True, (180, 180, 180), 1)
# Bubble + line boxes
for bid, bb in bubble_boxes.items():
x1, y1, x2, y2 = bb
cv2.rectangle(img, (x1, y1), (x2, y2), (0, 220, 0), 2)
cv2.putText(img, f"BOX#{bid}", (x1 + 2, y1 + 16),
cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 220, 0), 2)
idxs = bubble_indices.get(bid, [])
line_boxes = build_line_boxes_from_indices(idxs, ocr)
for lb in line_boxes:
lx1, ly1, lx2, ly2 = lb
lx1 = max(0, int(lx1)); ly1 = max(0, int(ly1))
lx2 = min(img.shape[1] - 1, int(lx2)); ly2 = min(img.shape[0] - 1, int(ly2))
cv2.rectangle(img, (lx1, ly1), (lx2, ly2), (0, 255, 255), 3)
cv2.imwrite(out_path, img)
# ─────────────────────────────────────────────
# EXPORT
# ─────────────────────────────────────────────
def estimate_reading_order(bbox_dict, mode="ltr"):
items = []
for bid, (x1, y1, x2, y2) in bbox_dict.items():
cx = (x1 + x2) / 2.0
cy = (y1 + y2) / 2.0
items.append((bid, cx, cy))
items.sort(key=lambda t: t[2]) # top to bottom
rows = []
tol = 90
for it in items:
placed = False
for r in rows:
if abs(it[2] - r["cy"]) <= tol:
r["items"].append(it)
r["cy"] = float(np.mean([x[2] for x in r["items"]]))
placed = True
break
if not placed:
rows.append({"cy": it[2], "items": [it]})
rows.sort(key=lambda r: r["cy"])
order = []
for r in rows:
r["items"].sort(key=lambda x: x[1], reverse=(mode == "rtl"))
order.extend([z[0] for z in r["items"]])
return {bid: i + 1 for i, bid in enumerate(order)}
def export_bubbles(filepath, bbox_dict, quads_dict, indices_dict, ocr, reading_map, image_shape):
out = {}
for bid, bb in bbox_dict.items():
x1, y1, x2, y2 = bb
quads = quads_dict.get(bid, [])
idxs = indices_dict.get(bid, [])
qboxes = [quad_bbox(q) for q in quads]
text_union = boxes_union_xyxy(qboxes)
line_boxes_xyxy = build_line_boxes_from_indices(idxs, ocr)
line_union_xyxy = boxes_union_xyxy(line_boxes_xyxy)
line_union_area = bbox_area_xyxy(line_union_xyxy)
out[str(bid)] = {
"x": int(x1), "y": int(y1), "w": int(x2 - x1), "h": int(y2 - y1),
"reading_order": int(reading_map.get(bid, bid)),
"quad_bboxes": [
{"x": int(b[0]), "y": int(b[1]), "w": int(b[2] - b[0]), "h": int(b[3] - b[1])}
for b in qboxes
],
"quads": [
[[int(p[0]), int(p[1])] for p in q] for q in quads
],
"text_bbox": xyxy_to_xywh(text_union),
"line_bboxes": [xyxy_to_xywh(lb) for lb in line_boxes_xyxy],
"line_union_bbox": xyxy_to_xywh(line_union_xyxy) if line_union_xyxy else None,
"line_union_area": int(line_union_area),
}
with open(filepath, "w", encoding="utf-8") as f:
json.dump(out, f, indent=2, ensure_ascii=False)
# ─────────────────────────────────────────────
# MAIN
# ─────────────────────────────────────────────
def translate_manga_text(
image_path,
source_lang="en",
target_lang="ca",
confidence_threshold=0.12,
min_text_length=1,
gap_px="auto",
filter_sound_effects=True,
quality_threshold=0.62,
export_to_file="output.txt",
export_bubbles_to="bubbles.json",
reading_mode="ltr",
debug=True
):
image = cv2.imread(image_path)
if image is None:
print(f"❌ Cannot load image: {image_path}")
return
resolved_gap = auto_gap(image_path) if gap_px == "auto" else float(gap_px)
print("Loading OCR...")
ocr_lang_list = ["en", "es"] if source_lang == "ca" else [source_lang]
reader = easyocr.Reader(ocr_lang_list)
print("Running OCR...")
raw = reader.readtext(image_path, paragraph=False)
print(f"Raw detections: {len(raw)}")
filtered = []
skipped = 0
ih, iw = image.shape[:2]
for bbox, text, conf in raw:
t = normalize_text(text)
qb = quad_bbox(bbox)
if conf < confidence_threshold:
skipped += 1
continue
if len(t) < min_text_length:
skipped += 1
continue
if is_noise_text(t):
skipped += 1
continue
if filter_sound_effects and is_sound_effect(t):
skipped += 1
continue
if is_title_text(t):
skipped += 1
continue
# reduce false positives in very top strip
if qb[1] < int(ih * TOP_BAND_RATIO):
if conf < 0.70 and len(t) >= 5:
skipped += 1
continue
filtered.append((bbox, t, conf))
print(f"Kept: {len(filtered)} | Skipped: {skipped}")
if not filtered:
print("⚠️ No text after filtering.")
return
bubbles, bubble_boxes, bubble_quads, bubble_indices = group_tokens(
filtered, image.shape, gap_px=resolved_gap, bbox_padding=3
)
if debug:
save_debug_clusters(
image_path=image_path,
ocr=filtered,
bubble_boxes=bubble_boxes,
bubble_indices=bubble_indices,
out_path="debug_clusters.png"
)
translator = GoogleTranslator(source=source_lang, target=target_lang)
# robust bubble text cleanup
clean_lines = {}
for bid, lines in bubbles.items():
base_txt = normalize_text(" ".join(lines))
base_sc = ocr_candidate_score(base_txt)
# only robust reread on low quality
if base_sc < quality_threshold:
rr_txt, rr_sc = reread_crop_robust(
image,
bubble_boxes[bid],
reader,
upscale=3.0,
pad=22
)
if rr_txt and rr_sc > base_sc + 0.06:
txt = rr_txt
else:
txt = base_txt
else:
txt = base_txt
clean_lines[bid] = apply_glossary(txt)
reading_map = estimate_reading_order(bubble_boxes, mode=reading_mode)
divider = "" * 120
out_lines = ["BUBBLE|ORDER|ORIGINAL|TRANSLATED|FLAGS", divider]
print(divider)
print(f"{'BUBBLE':<8} {'ORDER':<6} {'ORIGINAL':<50} {'TRANSLATED':<50} FLAGS")
print(divider)
translated_count = 0
for bid in sorted(clean_lines.keys(), key=lambda x: reading_map.get(x, x)):
src = clean_lines[bid].strip()
if not src:
continue
flags = []
try:
tgt = translator.translate(src) or ""
except Exception as e:
tgt = f"[Translation error: {e}]"
flags.append("TRANSLATION_ERROR")
tgt = apply_glossary(postprocess_translation_general(tgt)).upper()
src_u = src.upper()
out_lines.append(
f"#{bid}|{reading_map.get(bid,bid)}|{src_u}|{tgt}|{','.join(flags) if flags else '-'}"
)
print(
f"#{bid:<7} {reading_map.get(bid,bid):<6} "
f"{src_u[:50]:<50} {tgt[:50]:<50} {','.join(flags) if flags else '-'}"
)
translated_count += 1
out_lines.append(divider)
out_lines.append(f"✅ Done! {translated_count} bubble(s) translated, {skipped} detection(s) skipped.")
with open(export_to_file, "w", encoding="utf-8") as f:
f.write("\n".join(out_lines))
export_bubbles(
export_bubbles_to,
bbox_dict=bubble_boxes,
quads_dict=bubble_quads,
indices_dict=bubble_indices,
ocr=filtered,
reading_map=reading_map,
image_shape=image.shape
)
print(divider)
print(f"Saved: {export_to_file}")
print(f"Saved: {export_bubbles_to}")
if debug:
print("Saved: debug_clusters.png")
if __name__ == "__main__":
translate_manga_text(
image_path="001-page.png",
source_lang="it",
target_lang="ca",
confidence_threshold=0.12,
min_text_length=1,
gap_px="auto",
filter_sound_effects=True,
quality_threshold=0.62,
export_to_file="output.txt",
export_bubbles_to="bubbles.json",
reading_mode="ltr",
debug=True
)