712 lines
23 KiB
Python
712 lines
23 KiB
Python
import re
|
||
import os
|
||
import json
|
||
import cv2
|
||
import numpy as np
|
||
import easyocr
|
||
from deep_translator import GoogleTranslator
|
||
|
||
|
||
# ─────────────────────────────────────────────
|
||
# CONFIG
|
||
# ─────────────────────────────────────────────
|
||
GLOSSARY = {
|
||
"ANYA": "ANYA",
|
||
"STARLIGHT ANYA": "STARLIGHT ANYA",
|
||
"MR. HENDERSON": "MR. HENDERSON",
|
||
"HENDERSON": "HENDERSON",
|
||
"STELLA STAR": "STELLA STAR",
|
||
}
|
||
|
||
SOUND_EFFECT_PATTERNS = [
|
||
r"^b+i+p+$", r"^sha+$", r"^ha+$", r"^ah+$", r"^oh+$",
|
||
r"^ugh+$", r"^bam+$", r"^pow+$", r"^boom+$", r"^bang+$",
|
||
r"^crash+$", r"^thud+$", r"^zip+$", r"^swoosh+$", r"^chirp+$"
|
||
]
|
||
|
||
TITLE_PATTERNS = [
|
||
r"^(mission|chapter|episode|vol\.?|volume)\s*\d+$",
|
||
r"^(spy|family|spy.family)$",
|
||
r"^by\s+.+$",
|
||
]
|
||
|
||
NOISE_PATTERNS = [
|
||
r"^[^a-zA-Z0-9\?!.]+$",
|
||
r"^BOX[0-9A-Z]*$",
|
||
]
|
||
|
||
TOP_BAND_RATIO = 0.08
|
||
|
||
|
||
# ─────────────────────────────────────────────
|
||
# TEXT HELPERS
|
||
# ─────────────────────────────────────────────
|
||
def normalize_text(text):
|
||
t = text.strip().upper()
|
||
t = t.replace("“", "\"").replace("”", "\"")
|
||
t = t.replace("’", "'").replace("‘", "'")
|
||
t = t.replace("…", "...")
|
||
t = re.sub(r"\s+", " ", t)
|
||
t = re.sub(r"\s+([,.;:!?])", r"\1", t)
|
||
t = re.sub(r"\(\s+", "(", t)
|
||
t = re.sub(r"\s+\)", ")", t)
|
||
t = re.sub(r"\.{4,}", "...", t)
|
||
t = re.sub(r",\?", "?", t)
|
||
return t.strip()
|
||
|
||
def apply_glossary(text):
|
||
out = text
|
||
for k in sorted(GLOSSARY.keys(), key=len, reverse=True):
|
||
out = re.sub(rf"\b{re.escape(k)}\b", GLOSSARY[k], out, flags=re.IGNORECASE)
|
||
return out
|
||
|
||
def postprocess_translation_general(text):
|
||
t = normalize_text(text)
|
||
t = re.sub(r"\s{2,}", " ", t).strip()
|
||
t = re.sub(r"([!?]){3,}", r"\1\1", t)
|
||
t = re.sub(r"\.{4,}", "...", t)
|
||
return t
|
||
|
||
|
||
# ─────────────────────────────────────────────
|
||
# FILTERS
|
||
# ─────────────────────────────────────────────
|
||
def is_sound_effect(text):
|
||
cleaned = re.sub(r"[^a-z]", "", text.strip().lower())
|
||
return any(re.fullmatch(p, cleaned, re.IGNORECASE) for p in SOUND_EFFECT_PATTERNS)
|
||
|
||
def is_title_text(text):
|
||
t = text.strip().lower()
|
||
return any(re.fullmatch(p, t, re.IGNORECASE) for p in TITLE_PATTERNS)
|
||
|
||
def is_noise_text(text):
|
||
t = text.strip()
|
||
return any(re.fullmatch(p, t) for p in NOISE_PATTERNS)
|
||
|
||
|
||
# ─────────────────────────────────────────────
|
||
# GEOMETRY
|
||
# ─────────────────────────────────────────────
|
||
def quad_bbox(quad):
|
||
xs = [p[0] for p in quad]
|
||
ys = [p[1] for p in quad]
|
||
return (int(min(xs)), int(min(ys)), int(max(xs)), int(max(ys)))
|
||
|
||
def quad_center(quad):
|
||
x1, y1, x2, y2 = quad_bbox(quad)
|
||
return ((x1 + x2) / 2.0, (y1 + y2) / 2.0)
|
||
|
||
def boxes_union_xyxy(boxes):
|
||
boxes = [b for b in boxes if b is not None]
|
||
if not boxes:
|
||
return None
|
||
return (
|
||
int(min(b[0] for b in boxes)),
|
||
int(min(b[1] for b in boxes)),
|
||
int(max(b[2] for b in boxes)),
|
||
int(max(b[3] for b in boxes)),
|
||
)
|
||
|
||
def bbox_area_xyxy(b):
|
||
if b is None:
|
||
return 0
|
||
return int(max(0, b[2] - b[0]) * max(0, b[3] - b[1]))
|
||
|
||
def xyxy_to_xywh(b):
|
||
if b is None:
|
||
return None
|
||
x1, y1, x2, y2 = b
|
||
return {"x": int(x1), "y": int(y1), "w": int(max(0, x2 - x1)), "h": int(max(0, y2 - y1))}
|
||
|
||
def overlap_or_near(a, b, gap=0):
|
||
ax1, ay1, ax2, ay2 = a
|
||
bx1, by1, bx2, by2 = b
|
||
gap_x = max(0, max(ax1, bx1) - min(ax2, bx2))
|
||
gap_y = max(0, max(ay1, by1) - min(ay2, by2))
|
||
return gap_x <= gap and gap_y <= gap
|
||
|
||
|
||
# ─────────────────────────────────────────────
|
||
# QUALITY
|
||
# ─────────────────────────────────────────────
|
||
def ocr_quality_score(text):
|
||
if not text or len(text) < 2:
|
||
return 0.0
|
||
alpha_ratio = sum(1 for c in text if c.isalpha()) / max(1, len(text))
|
||
penalty = 0.0
|
||
if re.search(r"[^\w\s\'\!\?\.,\-]{2,}", text):
|
||
penalty += 0.2
|
||
if re.search(r",,", text):
|
||
penalty += 0.2
|
||
bonus = 0.05 if re.search(r"[.!?]$", text) else 0.0
|
||
return max(0.0, min(1.0, alpha_ratio - penalty + bonus))
|
||
|
||
|
||
# ─────────────────────────────────────────────
|
||
# OCR RE-READ
|
||
# ─────────────────────────────────────────────
|
||
def preprocess_variant(crop_bgr, mode):
|
||
gray = cv2.cvtColor(crop_bgr, cv2.COLOR_BGR2GRAY)
|
||
if mode == "raw":
|
||
return gray
|
||
if mode == "clahe":
|
||
return cv2.createCLAHE(clipLimit=2.0, tileGridSize=(8, 8)).apply(gray)
|
||
if mode == "adaptive":
|
||
den = cv2.GaussianBlur(gray, (3, 3), 0)
|
||
return cv2.adaptiveThreshold(den, 255, cv2.ADAPTIVE_THRESH_GAUSSIAN_C, cv2.THRESH_BINARY, 35, 11)
|
||
return gray
|
||
|
||
def run_ocr_on_array(reader, arr):
|
||
tmp = "_tmp_ocr.png"
|
||
cv2.imwrite(tmp, arr)
|
||
try:
|
||
return reader.readtext(tmp, paragraph=False)
|
||
finally:
|
||
if os.path.exists(tmp):
|
||
os.remove(tmp)
|
||
|
||
def reread_crop(image, bbox, reader, upscale=2.5, pad=18):
|
||
ih, iw = image.shape[:2]
|
||
x1, y1, x2, y2 = bbox
|
||
x1 = max(0, int(x1 - pad)); y1 = max(0, int(y1 - pad))
|
||
x2 = min(iw, int(x2 + pad)); y2 = min(ih, int(y2 + pad))
|
||
crop = image[y1:y2, x1:x2]
|
||
if crop.size == 0:
|
||
return None
|
||
|
||
up = cv2.resize(crop, (int(crop.shape[1] * upscale), int(crop.shape[0] * upscale)), interpolation=cv2.INTER_CUBIC)
|
||
|
||
best = None
|
||
for mode in ("raw", "clahe", "adaptive"):
|
||
proc = preprocess_variant(up, mode)
|
||
res = run_ocr_on_array(reader, proc)
|
||
if not res:
|
||
continue
|
||
res.sort(key=lambda r: (r[0][0][1], r[0][0][0]))
|
||
lines = [normalize_text(t) for _, t, _ in res if t.strip()]
|
||
merged = re.sub(r"\s{2,}", " ", " ".join(lines)).strip()
|
||
s = ocr_quality_score(merged)
|
||
if best is None or s > best[0]:
|
||
best = (s, merged)
|
||
|
||
return best[1] if best else None
|
||
|
||
|
||
# ─────────────────────────────────────────────
|
||
# LINES + YELLOW BOXES
|
||
# ─────────────────────────────────────────────
|
||
def build_lines_from_indices(indices, ocr):
|
||
if not indices:
|
||
return []
|
||
|
||
items = []
|
||
for i in indices:
|
||
b = quad_bbox(ocr[i][0])
|
||
xc = (b[0] + b[2]) / 2.0
|
||
yc = (b[1] + b[3]) / 2.0
|
||
h = max(1.0, b[3] - b[1])
|
||
items.append((i, b, xc, yc, h))
|
||
|
||
med_h = float(np.median([it[4] for it in items])) if items else 10.0
|
||
row_tol = max(6.0, med_h * 0.75)
|
||
|
||
items.sort(key=lambda x: x[3])
|
||
rows = []
|
||
for it in items:
|
||
i, b, xc, yc, h = it
|
||
placed = False
|
||
for r in rows:
|
||
if abs(yc - r["yc"]) <= row_tol:
|
||
r["m"].append((i, b, xc, yc))
|
||
r["yc"] = float(np.mean([k[3] for k in r["m"]]))
|
||
placed = True
|
||
break
|
||
if not placed:
|
||
rows.append({"yc": yc, "m": [(i, b, xc, yc)]})
|
||
|
||
rows.sort(key=lambda r: r["yc"])
|
||
lines = []
|
||
for r in rows:
|
||
mem = sorted(r["m"], key=lambda z: z[2])
|
||
txt = normalize_text(" ".join(ocr[i][1] for i, _, _, _ in mem))
|
||
lines.append(txt)
|
||
|
||
return lines
|
||
|
||
|
||
def build_line_boxes_from_indices(indices, ocr):
|
||
"""
|
||
Robust yellow-box generation with punctuation attachment:
|
||
- row grouping
|
||
- chunking by x gap
|
||
- attach tiny punctuation/special tokens to nearest chunk
|
||
- coverage guarantee
|
||
"""
|
||
if not indices:
|
||
return []
|
||
|
||
items = []
|
||
for i in indices:
|
||
b = quad_bbox(ocr[i][0])
|
||
txt = normalize_text(ocr[i][1])
|
||
xc = (b[0] + b[2]) / 2.0
|
||
yc = (b[1] + b[3]) / 2.0
|
||
w = max(1.0, b[2] - b[0])
|
||
h = max(1.0, b[3] - b[1])
|
||
items.append({
|
||
"i": i, "b": b, "txt": txt,
|
||
"xc": xc, "yc": yc, "w": w, "h": h
|
||
})
|
||
|
||
med_h = float(np.median([it["h"] for it in items])) if items else 10.0
|
||
row_tol = max(6.0, med_h * 0.90)
|
||
gap_x_tol = max(8.0, med_h * 1.25)
|
||
pad = max(1, int(round(med_h * 0.12)))
|
||
|
||
def is_punct_like(t):
|
||
raw = t.strip()
|
||
if raw == "":
|
||
return True
|
||
punct_ratio = sum(1 for c in raw if not c.isalnum()) / max(1, len(raw))
|
||
return punct_ratio >= 0.5 or len(raw) <= 2
|
||
|
||
# 1) rows
|
||
items_sorted = sorted(items, key=lambda x: x["yc"])
|
||
rows = []
|
||
for it in items_sorted:
|
||
placed = False
|
||
for r in rows:
|
||
if abs(it["yc"] - r["yc"]) <= row_tol:
|
||
r["m"].append(it)
|
||
r["yc"] = float(np.mean([k["yc"] for k in r["m"]]))
|
||
placed = True
|
||
break
|
||
if not placed:
|
||
rows.append({"yc": it["yc"], "m": [it]})
|
||
|
||
rows.sort(key=lambda r: r["yc"])
|
||
out_boxes = []
|
||
|
||
for r in rows:
|
||
mem = sorted(r["m"], key=lambda z: z["xc"])
|
||
normal = [t for t in mem if not is_punct_like(t["txt"])]
|
||
punct = [t for t in mem if is_punct_like(t["txt"])]
|
||
|
||
if not normal:
|
||
normal = mem
|
||
punct = []
|
||
|
||
# 2) chunk normal tokens
|
||
chunks = []
|
||
cur = [normal[0]]
|
||
for t in normal[1:]:
|
||
prev = cur[-1]["b"]
|
||
b = t["b"]
|
||
gap = b[0] - prev[2]
|
||
if gap <= gap_x_tol:
|
||
cur.append(t)
|
||
else:
|
||
chunks.append(cur)
|
||
cur = [t]
|
||
chunks.append(cur)
|
||
|
||
# 3) attach punctuation tokens
|
||
for p in punct:
|
||
pb = p["b"]
|
||
pxc, pyc = p["xc"], p["yc"]
|
||
|
||
best_k = -1
|
||
best_score = 1e18
|
||
for k, ch in enumerate(chunks):
|
||
ub = boxes_union_xyxy([x["b"] for x in ch])
|
||
cx = (ub[0] + ub[2]) / 2.0
|
||
cy = (ub[1] + ub[3]) / 2.0
|
||
|
||
dx = abs(pxc - cx)
|
||
dy = abs(pyc - cy)
|
||
score = dx + 1.8 * dy
|
||
|
||
near = overlap_or_near(pb, ub, gap=int(med_h * 0.9))
|
||
if near:
|
||
score -= med_h * 2.0
|
||
|
||
if score < best_score:
|
||
best_score = score
|
||
best_k = k
|
||
|
||
if best_k >= 0:
|
||
chunks[best_k].append(p)
|
||
else:
|
||
chunks.append([p])
|
||
|
||
# 4) chunk boxes
|
||
for ch in chunks:
|
||
ub = boxes_union_xyxy([x["b"] for x in ch])
|
||
if ub:
|
||
x1, y1, x2, y2 = ub
|
||
out_boxes.append((x1 - pad, y1 - pad, x2 + pad, y2 + pad))
|
||
|
||
# 5) guarantee all tokens included
|
||
token_boxes = [it["b"] for it in items]
|
||
|
||
def inside(tb, lb):
|
||
return tb[0] >= lb[0] and tb[1] >= lb[1] and tb[2] <= lb[2] and tb[3] <= lb[3]
|
||
|
||
for tb in token_boxes:
|
||
ok = any(inside(tb, lb) for lb in out_boxes)
|
||
if not ok:
|
||
x1, y1, x2, y2 = tb
|
||
out_boxes.append((x1 - pad, y1 - pad, x2 + pad, y2 + pad))
|
||
|
||
# 6) merge heavy overlaps
|
||
merged = []
|
||
for b in out_boxes:
|
||
merged_into = False
|
||
for i, m in enumerate(merged):
|
||
ix1 = max(b[0], m[0]); iy1 = max(b[1], m[1])
|
||
ix2 = min(b[2], m[2]); iy2 = min(b[3], m[3])
|
||
inter = max(0, ix2 - ix1) * max(0, iy2 - iy1)
|
||
a1 = max(1, (b[2]-b[0])*(b[3]-b[1]))
|
||
a2 = max(1, (m[2]-m[0])*(m[3]-m[1]))
|
||
iou = inter / float(a1 + a2 - inter) if (a1 + a2 - inter) > 0 else 0.0
|
||
if iou > 0.72:
|
||
merged[i] = boxes_union_xyxy([b, m])
|
||
merged_into = True
|
||
break
|
||
if not merged_into:
|
||
merged.append(b)
|
||
|
||
merged.sort(key=lambda z: (z[1], z[0]))
|
||
return merged
|
||
|
||
|
||
# ─────────────────────────────────────────────
|
||
# GROUPING
|
||
# ─────────────────────────────────────────────
|
||
def auto_gap(image_path, base=18, ref_w=750):
|
||
img = cv2.imread(image_path)
|
||
if img is None:
|
||
return base
|
||
return base * (img.shape[1] / ref_w)
|
||
|
||
def group_tokens(ocr, image_shape, gap_px=18, bbox_padding=3):
|
||
n = len(ocr)
|
||
if n == 0:
|
||
return {}, {}, {}, {}
|
||
|
||
boxes = [quad_bbox(r[0]) for r in ocr]
|
||
centers = [quad_center(r[0]) for r in ocr]
|
||
hs = [max(1.0, b[3] - b[1]) for b in boxes]
|
||
med_h = float(np.median(hs)) if hs else 12.0
|
||
dist_thresh = max(20.0, med_h * 2.2)
|
||
|
||
p = list(range(n))
|
||
|
||
def find(x):
|
||
while p[x] != x:
|
||
p[x] = p[p[x]]
|
||
x = p[x]
|
||
return x
|
||
|
||
def unite(a, b):
|
||
p[find(a)] = find(b)
|
||
|
||
for i in range(n):
|
||
for j in range(i + 1, n):
|
||
if overlap_or_near(boxes[i], boxes[j], gap=gap_px):
|
||
unite(i, j)
|
||
continue
|
||
cx1, cy1 = centers[i]
|
||
cx2, cy2 = centers[j]
|
||
d = ((cx1 - cx2) ** 2 + (cy1 - cy2) ** 2) ** 0.5
|
||
if d <= dist_thresh and abs(cy1 - cy2) <= med_h * 3.0:
|
||
unite(i, j)
|
||
|
||
groups = {}
|
||
for i in range(n):
|
||
groups.setdefault(find(i), []).append(i)
|
||
|
||
sorted_groups = sorted(groups.values(), key=lambda idxs: (min(boxes[i][1] for i in idxs), min(boxes[i][0] for i in idxs)))
|
||
|
||
bubbles = {}
|
||
bubble_boxes = {}
|
||
bubble_quads = {}
|
||
bubble_indices = {}
|
||
|
||
ih, iw = image_shape[:2]
|
||
for bid, idxs in enumerate(sorted_groups, start=1):
|
||
idxs = sorted(idxs, key=lambda k: boxes[k][1])
|
||
lines = build_lines_from_indices(idxs, ocr)
|
||
quads = [ocr[k][0] for k in idxs]
|
||
ub = boxes_union_xyxy([quad_bbox(q) for q in quads])
|
||
if ub is None:
|
||
continue
|
||
|
||
x1, y1, x2, y2 = ub
|
||
x1 = max(0, x1 - bbox_padding); y1 = max(0, y1 - bbox_padding)
|
||
x2 = min(iw, x2 + bbox_padding); y2 = min(ih, y2 + bbox_padding)
|
||
|
||
bubbles[bid] = lines
|
||
bubble_boxes[bid] = (x1, y1, x2, y2)
|
||
bubble_quads[bid] = quads
|
||
bubble_indices[bid] = idxs
|
||
|
||
return bubbles, bubble_boxes, bubble_quads, bubble_indices
|
||
|
||
|
||
# ─────────────────────────────────────────────
|
||
# DEBUG
|
||
# ─────────────────────────────────────────────
|
||
def save_debug_clusters(image_path, ocr, bubble_boxes, bubble_indices, out_path="debug_clusters.png"):
|
||
img = cv2.imread(image_path)
|
||
if img is None:
|
||
return
|
||
|
||
# token quads
|
||
for bbox, txt, conf in ocr:
|
||
pts = np.array(bbox, dtype=np.int32)
|
||
cv2.polylines(img, [pts], True, (180, 180, 180), 1)
|
||
|
||
# bubble boxes + yellow line boxes
|
||
for bid, bb in bubble_boxes.items():
|
||
x1, y1, x2, y2 = bb
|
||
cv2.rectangle(img, (x1, y1), (x2, y2), (0, 220, 0), 2)
|
||
cv2.putText(img, f"BOX#{bid}", (x1 + 2, y1 + 16), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 220, 0), 2)
|
||
|
||
idxs = bubble_indices.get(bid, [])
|
||
line_boxes = build_line_boxes_from_indices(idxs, ocr)
|
||
for lb in line_boxes:
|
||
lx1, ly1, lx2, ly2 = lb
|
||
lx1 = max(0, int(lx1)); ly1 = max(0, int(ly1))
|
||
lx2 = min(img.shape[1] - 1, int(lx2)); ly2 = min(img.shape[0] - 1, int(ly2))
|
||
cv2.rectangle(img, (lx1, ly1), (lx2, ly2), (0, 255, 255), 3)
|
||
|
||
cv2.imwrite(out_path, img)
|
||
|
||
|
||
# ─────────────────────────────────────────────
|
||
# EXPORT
|
||
# ─────────────────────────────────────────────
|
||
def estimate_reading_order(bbox_dict, mode="ltr"):
|
||
items = []
|
||
for bid, (x1, y1, x2, y2) in bbox_dict.items():
|
||
cx = (x1 + x2) / 2.0
|
||
cy = (y1 + y2) / 2.0
|
||
items.append((bid, cx, cy))
|
||
|
||
items.sort(key=lambda t: t[2])
|
||
|
||
rows = []
|
||
tol = 90
|
||
for it in items:
|
||
placed = False
|
||
for r in rows:
|
||
if abs(it[2] - r["cy"]) <= tol:
|
||
r["items"].append(it)
|
||
r["cy"] = float(np.mean([x[2] for x in r["items"]]))
|
||
placed = True
|
||
break
|
||
if not placed:
|
||
rows.append({"cy": it[2], "items": [it]})
|
||
|
||
rows.sort(key=lambda r: r["cy"])
|
||
order = []
|
||
for r in rows:
|
||
r["items"].sort(key=lambda x: x[1], reverse=(mode == "rtl"))
|
||
order.extend([z[0] for z in r["items"]])
|
||
|
||
return {bid: i + 1 for i, bid in enumerate(order)}
|
||
|
||
|
||
def export_bubbles(filepath, bbox_dict, quads_dict, indices_dict, ocr, reading_map, image_shape):
|
||
out = {}
|
||
|
||
for bid, bb in bbox_dict.items():
|
||
x1, y1, x2, y2 = bb
|
||
quads = quads_dict.get(bid, [])
|
||
idxs = indices_dict.get(bid, [])
|
||
|
||
qboxes = [quad_bbox(q) for q in quads]
|
||
text_union = boxes_union_xyxy(qboxes)
|
||
|
||
line_boxes_xyxy = build_line_boxes_from_indices(idxs, ocr)
|
||
line_union_xyxy = boxes_union_xyxy(line_boxes_xyxy)
|
||
line_union_area = bbox_area_xyxy(line_union_xyxy)
|
||
|
||
out[str(bid)] = {
|
||
"x": int(x1), "y": int(y1), "w": int(x2 - x1), "h": int(y2 - y1),
|
||
"reading_order": int(reading_map.get(bid, bid)),
|
||
"quad_bboxes": [{"x": int(b[0]), "y": int(b[1]), "w": int(b[2]-b[0]), "h": int(b[3]-b[1])} for b in qboxes],
|
||
"quads": [[[int(p[0]), int(p[1])] for p in q] for q in quads],
|
||
"text_bbox": xyxy_to_xywh(text_union),
|
||
|
||
# yellow geometry
|
||
"line_bboxes": [xyxy_to_xywh(lb) for lb in line_boxes_xyxy],
|
||
"line_union_bbox": xyxy_to_xywh(line_union_xyxy) if line_union_xyxy else None,
|
||
"line_union_area": int(line_union_area),
|
||
}
|
||
|
||
with open(filepath, "w", encoding="utf-8") as f:
|
||
json.dump(out, f, indent=2, ensure_ascii=False)
|
||
|
||
|
||
# ─────────────────────────────────────────────
|
||
# MAIN
|
||
# ─────────────────────────────────────────────
|
||
def translate_manga_text(
|
||
image_path,
|
||
source_lang="en",
|
||
target_lang="ca",
|
||
confidence_threshold=0.12,
|
||
min_text_length=1,
|
||
gap_px="auto",
|
||
filter_sound_effects=True,
|
||
quality_threshold=0.62,
|
||
export_to_file="output.txt",
|
||
export_bubbles_to="bubbles.json",
|
||
reading_mode="ltr",
|
||
debug=True
|
||
):
|
||
image = cv2.imread(image_path)
|
||
if image is None:
|
||
print(f"❌ Cannot load image: {image_path}")
|
||
return
|
||
|
||
resolved_gap = auto_gap(image_path) if gap_px == "auto" else float(gap_px)
|
||
|
||
print("Loading OCR...")
|
||
ocr_lang_list = ["en", "es"] if source_lang == "ca" else [source_lang]
|
||
reader = easyocr.Reader(ocr_lang_list)
|
||
|
||
print("Running OCR...")
|
||
raw = reader.readtext(image_path, paragraph=False)
|
||
print(f"Raw detections: {len(raw)}")
|
||
|
||
filtered = []
|
||
skipped = 0
|
||
ih, iw = image.shape[:2]
|
||
|
||
for bbox, text, conf in raw:
|
||
t = normalize_text(text)
|
||
qb = quad_bbox(bbox)
|
||
|
||
if conf < confidence_threshold:
|
||
skipped += 1
|
||
continue
|
||
if len(t) < min_text_length:
|
||
skipped += 1
|
||
continue
|
||
if is_noise_text(t):
|
||
skipped += 1
|
||
continue
|
||
if filter_sound_effects and is_sound_effect(t):
|
||
skipped += 1
|
||
continue
|
||
if is_title_text(t):
|
||
skipped += 1
|
||
continue
|
||
|
||
if qb[1] < int(ih * TOP_BAND_RATIO):
|
||
if conf < 0.70 and len(t) >= 5:
|
||
skipped += 1
|
||
continue
|
||
|
||
filtered.append((bbox, t, conf))
|
||
|
||
print(f"Kept: {len(filtered)} | Skipped: {skipped}")
|
||
if not filtered:
|
||
print("⚠️ No text after filtering.")
|
||
return
|
||
|
||
bubbles, bubble_boxes, bubble_quads, bubble_indices = group_tokens(
|
||
filtered, image.shape, gap_px=resolved_gap, bbox_padding=3
|
||
)
|
||
|
||
if debug:
|
||
save_debug_clusters(
|
||
image_path=image_path,
|
||
ocr=filtered,
|
||
bubble_boxes=bubble_boxes,
|
||
bubble_indices=bubble_indices,
|
||
out_path="debug_clusters.png"
|
||
)
|
||
|
||
translator = GoogleTranslator(source=source_lang, target=target_lang)
|
||
|
||
clean_lines = {}
|
||
for bid, lines in bubbles.items():
|
||
txt = normalize_text(" ".join(lines))
|
||
q = ocr_quality_score(txt)
|
||
if q < quality_threshold:
|
||
reread = reread_crop(image, bubble_boxes[bid], reader, upscale=2.5, pad=18)
|
||
if reread:
|
||
txt = normalize_text(reread)
|
||
clean_lines[bid] = apply_glossary(txt)
|
||
|
||
reading_map = estimate_reading_order(bubble_boxes, mode=reading_mode)
|
||
|
||
divider = "─" * 120
|
||
out_lines = ["BUBBLE|ORDER|ORIGINAL|TRANSLATED|FLAGS", divider]
|
||
|
||
print(divider)
|
||
print(f"{'BUBBLE':<8} {'ORDER':<6} {'ORIGINAL':<50} {'TRANSLATED':<50} FLAGS")
|
||
print(divider)
|
||
|
||
translated_count = 0
|
||
for bid in sorted(clean_lines.keys(), key=lambda x: reading_map.get(x, x)):
|
||
src = clean_lines[bid].strip()
|
||
if not src:
|
||
continue
|
||
flags = []
|
||
|
||
try:
|
||
tgt = translator.translate(src) or ""
|
||
except Exception as e:
|
||
tgt = f"[Translation error: {e}]"
|
||
|
||
tgt = apply_glossary(postprocess_translation_general(tgt)).upper()
|
||
src_u = src.upper()
|
||
|
||
out_lines.append(f"#{bid}|{reading_map.get(bid,bid)}|{src_u}|{tgt}|{','.join(flags) if flags else '-'}")
|
||
print(f"#{bid:<7} {reading_map.get(bid,bid):<6} {src_u[:50]:<50} {tgt[:50]:<50} {','.join(flags) if flags else '-'}")
|
||
translated_count += 1
|
||
|
||
out_lines.append(divider)
|
||
out_lines.append(f"✅ Done! {translated_count} bubble(s) translated, {skipped} detection(s) skipped.")
|
||
|
||
with open(export_to_file, "w", encoding="utf-8") as f:
|
||
f.write("\n".join(out_lines))
|
||
|
||
export_bubbles(
|
||
export_bubbles_to,
|
||
bbox_dict=bubble_boxes,
|
||
quads_dict=bubble_quads,
|
||
indices_dict=bubble_indices,
|
||
ocr=filtered,
|
||
reading_map=reading_map,
|
||
image_shape=image.shape
|
||
)
|
||
|
||
print(divider)
|
||
print(f"Saved: {export_to_file}")
|
||
print(f"Saved: {export_bubbles_to}")
|
||
if debug:
|
||
print("Saved: debug_clusters.png (special chars included in yellow boxes)")
|
||
|
||
|
||
if __name__ == "__main__":
|
||
translate_manga_text(
|
||
image_path="002-page.png",
|
||
source_lang="en",
|
||
target_lang="ca",
|
||
confidence_threshold=0.12,
|
||
min_text_length=1,
|
||
gap_px="auto",
|
||
filter_sound_effects=True,
|
||
quality_threshold=0.62,
|
||
export_to_file="output.txt",
|
||
export_bubbles_to="bubbles.json",
|
||
reading_mode="ltr",
|
||
debug=True
|
||
)
|