Added some fixes
This commit is contained in:
BIN
fonts/ComicNeue-Regular.ttf
Executable file
BIN
fonts/ComicNeue-Regular.ttf
Executable file
Binary file not shown.
Binary file not shown.
@@ -10,10 +10,8 @@ from PIL import Image, ImageDraw, ImageFont
|
||||
# CONFIG
|
||||
# ─────────────────────────────────────────────
|
||||
DEFAULT_FONT_CANDIDATES = [
|
||||
"fonts/AnimeAce2_reg.ttf",
|
||||
"fonts/WildWordsRoman.ttf",
|
||||
"fonts/ComicRelief-Regular.ttf",
|
||||
"fonts/NotoSans-Regular.ttf",
|
||||
"fonts/ComicNeue-Regular.ttf",
|
||||
]
|
||||
DEFAULT_FONT_COLOR = (0, 0, 0)
|
||||
DEFAULT_STROKE_COLOR = (255, 255, 255)
|
||||
@@ -501,7 +499,7 @@ def render_translations(
|
||||
|
||||
if __name__ == "__main__":
|
||||
render_translations(
|
||||
input_image="002-page.png",
|
||||
input_image="001-page.png",
|
||||
output_image="page_translated.png",
|
||||
translations_file="output.txt",
|
||||
bubbles_file="bubbles.json",
|
||||
|
||||
@@ -1,3 +1,6 @@
|
||||
#!/usr/bin/env python3
|
||||
# -*- coding: utf-8 -*-
|
||||
|
||||
import re
|
||||
import os
|
||||
import json
|
||||
@@ -8,7 +11,7 @@ from deep_translator import GoogleTranslator
|
||||
|
||||
|
||||
# ─────────────────────────────────────────────
|
||||
# CONFIG
|
||||
# CONFIG
|
||||
# ─────────────────────────────────────────────
|
||||
GLOSSARY = {
|
||||
"ANYA": "ANYA",
|
||||
@@ -32,17 +35,17 @@ TITLE_PATTERNS = [
|
||||
|
||||
NOISE_PATTERNS = [
|
||||
r"^[^a-zA-Z0-9\?!.]+$",
|
||||
r"^BOX[0-9A-Z]*$",
|
||||
r"^BOX[0-9A-Z#\s]*$",
|
||||
]
|
||||
|
||||
TOP_BAND_RATIO = 0.08
|
||||
|
||||
|
||||
# ─────────────────────────────────────────────
|
||||
# TEXT HELPERS
|
||||
# TEXT HELPERS
|
||||
# ─────────────────────────────────────────────
|
||||
def normalize_text(text):
|
||||
t = text.strip().upper()
|
||||
def normalize_text(text: str) -> str:
|
||||
t = (text or "").strip().upper()
|
||||
t = t.replace("“", "\"").replace("”", "\"")
|
||||
t = t.replace("’", "'").replace("‘", "'")
|
||||
t = t.replace("…", "...")
|
||||
@@ -54,13 +57,13 @@ def normalize_text(text):
|
||||
t = re.sub(r",\?", "?", t)
|
||||
return t.strip()
|
||||
|
||||
def apply_glossary(text):
|
||||
out = text
|
||||
def apply_glossary(text: str) -> str:
|
||||
out = text or ""
|
||||
for k in sorted(GLOSSARY.keys(), key=len, reverse=True):
|
||||
out = re.sub(rf"\b{re.escape(k)}\b", GLOSSARY[k], out, flags=re.IGNORECASE)
|
||||
return out
|
||||
|
||||
def postprocess_translation_general(text):
|
||||
def postprocess_translation_general(text: str) -> str:
|
||||
t = normalize_text(text)
|
||||
t = re.sub(r"\s{2,}", " ", t).strip()
|
||||
t = re.sub(r"([!?]){3,}", r"\1\1", t)
|
||||
@@ -69,23 +72,23 @@ def postprocess_translation_general(text):
|
||||
|
||||
|
||||
# ─────────────────────────────────────────────
|
||||
# FILTERS
|
||||
# FILTERS
|
||||
# ─────────────────────────────────────────────
|
||||
def is_sound_effect(text):
|
||||
cleaned = re.sub(r"[^a-z]", "", text.strip().lower())
|
||||
def is_sound_effect(text: str) -> bool:
|
||||
cleaned = re.sub(r"[^a-z]", "", (text or "").strip().lower())
|
||||
return any(re.fullmatch(p, cleaned, re.IGNORECASE) for p in SOUND_EFFECT_PATTERNS)
|
||||
|
||||
def is_title_text(text):
|
||||
t = text.strip().lower()
|
||||
def is_title_text(text: str) -> bool:
|
||||
t = (text or "").strip().lower()
|
||||
return any(re.fullmatch(p, t, re.IGNORECASE) for p in TITLE_PATTERNS)
|
||||
|
||||
def is_noise_text(text):
|
||||
t = text.strip()
|
||||
def is_noise_text(text: str) -> bool:
|
||||
t = (text or "").strip()
|
||||
return any(re.fullmatch(p, t) for p in NOISE_PATTERNS)
|
||||
|
||||
|
||||
# ─────────────────────────────────────────────
|
||||
# GEOMETRY
|
||||
# GEOMETRY
|
||||
# ─────────────────────────────────────────────
|
||||
def quad_bbox(quad):
|
||||
xs = [p[0] for p in quad]
|
||||
@@ -127,9 +130,9 @@ def overlap_or_near(a, b, gap=0):
|
||||
|
||||
|
||||
# ─────────────────────────────────────────────
|
||||
# QUALITY
|
||||
# QUALITY / SCORING
|
||||
# ─────────────────────────────────────────────
|
||||
def ocr_quality_score(text):
|
||||
def ocr_quality_score(text: str) -> float:
|
||||
if not text or len(text) < 2:
|
||||
return 0.0
|
||||
alpha_ratio = sum(1 for c in text if c.isalpha()) / max(1, len(text))
|
||||
@@ -141,21 +144,75 @@ def ocr_quality_score(text):
|
||||
bonus = 0.05 if re.search(r"[.!?]$", text) else 0.0
|
||||
return max(0.0, min(1.0, alpha_ratio - penalty + bonus))
|
||||
|
||||
def ocr_candidate_score(text: str) -> float:
|
||||
if not text:
|
||||
return 0.0
|
||||
t = text.strip()
|
||||
n = len(t)
|
||||
if n == 0:
|
||||
return 0.0
|
||||
|
||||
alpha = sum(c.isalpha() for c in t) / n
|
||||
spaces = sum(c.isspace() for c in t) / n
|
||||
punct_ok = sum(c in ".,!?'-:;()[]\"" for c in t) / n
|
||||
bad = len(re.findall(r"[^\w\s\.\,\!\?\-\'\:\;\(\)\[\]\"]", t)) / n
|
||||
|
||||
penalty = 0.0
|
||||
if re.search(r"\b[A-Z]\b", t):
|
||||
penalty += 0.05
|
||||
if re.search(r"[0-9]{2,}", t):
|
||||
penalty += 0.08
|
||||
if re.search(r"(..)\1\1", t):
|
||||
penalty += 0.08
|
||||
|
||||
score = (0.62 * alpha) + (0.10 * spaces) + (0.20 * punct_ok) - (0.45 * bad) - penalty
|
||||
return max(0.0, min(1.0, score))
|
||||
|
||||
|
||||
# ─────────────────────────────────────────────
|
||||
# OCR RE-READ
|
||||
# OCR MULTI-PASS
|
||||
# ─────────────────────────────────────────────
|
||||
def preprocess_variant(crop_bgr, mode):
|
||||
gray = cv2.cvtColor(crop_bgr, cv2.COLOR_BGR2GRAY)
|
||||
|
||||
if mode == "raw":
|
||||
return gray
|
||||
|
||||
if mode == "clahe":
|
||||
return cv2.createCLAHE(clipLimit=2.0, tileGridSize=(8, 8)).apply(gray)
|
||||
|
||||
if mode == "adaptive":
|
||||
den = cv2.GaussianBlur(gray, (3, 3), 0)
|
||||
return cv2.adaptiveThreshold(den, 255, cv2.ADAPTIVE_THRESH_GAUSSIAN_C, cv2.THRESH_BINARY, 35, 11)
|
||||
return cv2.adaptiveThreshold(
|
||||
den, 255, cv2.ADAPTIVE_THRESH_GAUSSIAN_C,
|
||||
cv2.THRESH_BINARY, 35, 11
|
||||
)
|
||||
|
||||
if mode == "otsu":
|
||||
den = cv2.GaussianBlur(gray, (3, 3), 0)
|
||||
_, th = cv2.threshold(den, 0, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU)
|
||||
return th
|
||||
|
||||
if mode == "invert":
|
||||
return 255 - gray
|
||||
|
||||
return gray
|
||||
|
||||
def rotate_image_keep_bounds(img, angle_deg):
|
||||
h, w = img.shape[:2]
|
||||
c = (w / 2, h / 2)
|
||||
M = cv2.getRotationMatrix2D(c, angle_deg, 1.0)
|
||||
cos = abs(M[0, 0])
|
||||
sin = abs(M[0, 1])
|
||||
|
||||
new_w = int((h * sin) + (w * cos))
|
||||
new_h = int((h * cos) + (w * sin))
|
||||
|
||||
M[0, 2] += (new_w / 2) - c[0]
|
||||
M[1, 2] += (new_h / 2) - c[1]
|
||||
|
||||
return cv2.warpAffine(img, M, (new_w, new_h), flags=cv2.INTER_CUBIC, borderValue=255)
|
||||
|
||||
def run_ocr_on_array(reader, arr):
|
||||
tmp = "_tmp_ocr.png"
|
||||
cv2.imwrite(tmp, arr)
|
||||
@@ -165,35 +222,104 @@ def run_ocr_on_array(reader, arr):
|
||||
if os.path.exists(tmp):
|
||||
os.remove(tmp)
|
||||
|
||||
def reread_crop(image, bbox, reader, upscale=2.5, pad=18):
|
||||
def rebuild_text_from_ocr_result(res):
|
||||
if not res:
|
||||
return ""
|
||||
|
||||
norm = []
|
||||
for item in res:
|
||||
if len(item) != 3:
|
||||
continue
|
||||
bbox, txt, conf = item
|
||||
if not txt or not txt.strip():
|
||||
continue
|
||||
b = quad_bbox(bbox)
|
||||
xc = (b[0] + b[2]) / 2.0
|
||||
yc = (b[1] + b[3]) / 2.0
|
||||
h = max(1.0, b[3] - b[1])
|
||||
norm.append((b, txt, conf, xc, yc, h))
|
||||
|
||||
if not norm:
|
||||
return ""
|
||||
|
||||
med_h = float(np.median([x[5] for x in norm]))
|
||||
row_tol = max(6.0, med_h * 0.75)
|
||||
|
||||
norm.sort(key=lambda z: z[4]) # y
|
||||
rows = []
|
||||
for it in norm:
|
||||
placed = False
|
||||
for r in rows:
|
||||
if abs(it[4] - r["yc"]) <= row_tol:
|
||||
r["m"].append(it)
|
||||
r["yc"] = float(np.mean([k[4] for k in r["m"]]))
|
||||
placed = True
|
||||
break
|
||||
if not placed:
|
||||
rows.append({"yc": it[4], "m": [it]})
|
||||
|
||||
rows.sort(key=lambda r: r["yc"])
|
||||
|
||||
lines = []
|
||||
for r in rows:
|
||||
mem = sorted(r["m"], key=lambda z: z[3]) # x
|
||||
line = normalize_text(" ".join(x[1] for x in mem))
|
||||
if line:
|
||||
lines.append(line)
|
||||
|
||||
return normalize_text(" ".join(lines))
|
||||
|
||||
def reread_crop_robust(image, bbox, reader, upscale=3.0, pad=22):
|
||||
ih, iw = image.shape[:2]
|
||||
x1, y1, x2, y2 = bbox
|
||||
x1 = max(0, int(x1 - pad)); y1 = max(0, int(y1 - pad))
|
||||
x2 = min(iw, int(x2 + pad)); y2 = min(ih, int(y2 + pad))
|
||||
x1 = max(0, int(x1 - pad))
|
||||
y1 = max(0, int(y1 - pad))
|
||||
x2 = min(iw, int(x2 + pad))
|
||||
y2 = min(ih, int(y2 + pad))
|
||||
crop = image[y1:y2, x1:x2]
|
||||
if crop.size == 0:
|
||||
return None
|
||||
return None, 0.0
|
||||
|
||||
up = cv2.resize(crop, (int(crop.shape[1] * upscale), int(crop.shape[0] * upscale)), interpolation=cv2.INTER_CUBIC)
|
||||
up = cv2.resize(
|
||||
crop,
|
||||
(int(crop.shape[1] * upscale), int(crop.shape[0] * upscale)),
|
||||
interpolation=cv2.INTER_CUBIC
|
||||
)
|
||||
|
||||
best = None
|
||||
for mode in ("raw", "clahe", "adaptive"):
|
||||
modes = ["raw", "clahe", "adaptive", "otsu", "invert"]
|
||||
angles = [0.0, 1.5, -1.5]
|
||||
|
||||
best_text, best_score = "", 0.0
|
||||
|
||||
for mode in modes:
|
||||
proc = preprocess_variant(up, mode)
|
||||
res = run_ocr_on_array(reader, proc)
|
||||
if not res:
|
||||
continue
|
||||
res.sort(key=lambda r: (r[0][0][1], r[0][0][0]))
|
||||
lines = [normalize_text(t) for _, t, _ in res if t.strip()]
|
||||
merged = re.sub(r"\s{2,}", " ", " ".join(lines)).strip()
|
||||
s = ocr_quality_score(merged)
|
||||
if best is None or s > best[0]:
|
||||
best = (s, merged)
|
||||
|
||||
return best[1] if best else None
|
||||
if len(proc.shape) == 2:
|
||||
proc3 = cv2.cvtColor(proc, cv2.COLOR_GRAY2BGR)
|
||||
else:
|
||||
proc3 = proc
|
||||
|
||||
for a in angles:
|
||||
rot = rotate_image_keep_bounds(proc3, a)
|
||||
if len(rot.shape) == 3:
|
||||
rot_in = cv2.cvtColor(rot, cv2.COLOR_BGR2GRAY)
|
||||
else:
|
||||
rot_in = rot
|
||||
|
||||
res = run_ocr_on_array(reader, rot_in)
|
||||
txt = rebuild_text_from_ocr_result(res)
|
||||
sc = ocr_candidate_score(txt)
|
||||
|
||||
if sc > best_score:
|
||||
best_text, best_score = txt, sc
|
||||
|
||||
if not best_text:
|
||||
return None, 0.0
|
||||
return best_text, best_score
|
||||
|
||||
|
||||
# ─────────────────────────────────────────────
|
||||
# LINES + YELLOW BOXES
|
||||
# LINES + YELLOW BOXES
|
||||
# ─────────────────────────────────────────────
|
||||
def build_lines_from_indices(indices, ocr):
|
||||
if not indices:
|
||||
@@ -233,14 +359,13 @@ def build_lines_from_indices(indices, ocr):
|
||||
|
||||
return lines
|
||||
|
||||
|
||||
def build_line_boxes_from_indices(indices, ocr):
|
||||
"""
|
||||
Robust yellow-box generation with punctuation attachment:
|
||||
- row grouping
|
||||
- chunking by x gap
|
||||
- attach tiny punctuation/special tokens to nearest chunk
|
||||
- coverage guarantee
|
||||
- token coverage guarantee
|
||||
"""
|
||||
if not indices:
|
||||
return []
|
||||
@@ -264,13 +389,13 @@ def build_line_boxes_from_indices(indices, ocr):
|
||||
pad = max(1, int(round(med_h * 0.12)))
|
||||
|
||||
def is_punct_like(t):
|
||||
raw = t.strip()
|
||||
raw = (t or "").strip()
|
||||
if raw == "":
|
||||
return True
|
||||
punct_ratio = sum(1 for c in raw if not c.isalnum()) / max(1, len(raw))
|
||||
return punct_ratio >= 0.5 or len(raw) <= 2
|
||||
|
||||
# 1) rows
|
||||
# 1) row grouping
|
||||
items_sorted = sorted(items, key=lambda x: x["yc"])
|
||||
rows = []
|
||||
for it in items_sorted:
|
||||
@@ -296,7 +421,7 @@ def build_line_boxes_from_indices(indices, ocr):
|
||||
normal = mem
|
||||
punct = []
|
||||
|
||||
# 2) chunk normal tokens
|
||||
# 2) chunk normal by x-gap
|
||||
chunks = []
|
||||
cur = [normal[0]]
|
||||
for t in normal[1:]:
|
||||
@@ -310,18 +435,17 @@ def build_line_boxes_from_indices(indices, ocr):
|
||||
cur = [t]
|
||||
chunks.append(cur)
|
||||
|
||||
# 3) attach punctuation tokens
|
||||
# 3) attach punct tokens to nearest chunk
|
||||
for p in punct:
|
||||
pb = p["b"]
|
||||
pxc, pyc = p["xc"], p["yc"]
|
||||
|
||||
best_k = -1
|
||||
best_score = 1e18
|
||||
|
||||
for k, ch in enumerate(chunks):
|
||||
ub = boxes_union_xyxy([x["b"] for x in ch])
|
||||
cx = (ub[0] + ub[2]) / 2.0
|
||||
cy = (ub[1] + ub[3]) / 2.0
|
||||
|
||||
dx = abs(pxc - cx)
|
||||
dy = abs(pyc - cy)
|
||||
score = dx + 1.8 * dy
|
||||
@@ -339,22 +463,21 @@ def build_line_boxes_from_indices(indices, ocr):
|
||||
else:
|
||||
chunks.append([p])
|
||||
|
||||
# 4) chunk boxes
|
||||
# 4) emit chunk boxes
|
||||
for ch in chunks:
|
||||
ub = boxes_union_xyxy([x["b"] for x in ch])
|
||||
if ub:
|
||||
x1, y1, x2, y2 = ub
|
||||
out_boxes.append((x1 - pad, y1 - pad, x2 + pad, y2 + pad))
|
||||
|
||||
# 5) guarantee all tokens included
|
||||
# 5) guarantee every token is inside some yellow box
|
||||
token_boxes = [it["b"] for it in items]
|
||||
|
||||
def inside(tb, lb):
|
||||
return tb[0] >= lb[0] and tb[1] >= lb[1] and tb[2] <= lb[2] and tb[3] <= lb[3]
|
||||
|
||||
for tb in token_boxes:
|
||||
ok = any(inside(tb, lb) for lb in out_boxes)
|
||||
if not ok:
|
||||
if not any(inside(tb, lb) for lb in out_boxes):
|
||||
x1, y1, x2, y2 = tb
|
||||
out_boxes.append((x1 - pad, y1 - pad, x2 + pad, y2 + pad))
|
||||
|
||||
@@ -366,8 +489,8 @@ def build_line_boxes_from_indices(indices, ocr):
|
||||
ix1 = max(b[0], m[0]); iy1 = max(b[1], m[1])
|
||||
ix2 = min(b[2], m[2]); iy2 = min(b[3], m[3])
|
||||
inter = max(0, ix2 - ix1) * max(0, iy2 - iy1)
|
||||
a1 = max(1, (b[2]-b[0])*(b[3]-b[1]))
|
||||
a2 = max(1, (m[2]-m[0])*(m[3]-m[1]))
|
||||
a1 = max(1, (b[2] - b[0]) * (b[3] - b[1]))
|
||||
a2 = max(1, (m[2] - m[0]) * (m[3] - m[1]))
|
||||
iou = inter / float(a1 + a2 - inter) if (a1 + a2 - inter) > 0 else 0.0
|
||||
if iou > 0.72:
|
||||
merged[i] = boxes_union_xyxy([b, m])
|
||||
@@ -381,7 +504,7 @@ def build_line_boxes_from_indices(indices, ocr):
|
||||
|
||||
|
||||
# ─────────────────────────────────────────────
|
||||
# GROUPING
|
||||
# GROUPING
|
||||
# ─────────────────────────────────────────────
|
||||
def auto_gap(image_path, base=18, ref_w=750):
|
||||
img = cv2.imread(image_path)
|
||||
@@ -426,7 +549,13 @@ def group_tokens(ocr, image_shape, gap_px=18, bbox_padding=3):
|
||||
for i in range(n):
|
||||
groups.setdefault(find(i), []).append(i)
|
||||
|
||||
sorted_groups = sorted(groups.values(), key=lambda idxs: (min(boxes[i][1] for i in idxs), min(boxes[i][0] for i in idxs)))
|
||||
sorted_groups = sorted(
|
||||
groups.values(),
|
||||
key=lambda idxs: (
|
||||
min(boxes[i][1] for i in idxs),
|
||||
min(boxes[i][0] for i in idxs)
|
||||
)
|
||||
)
|
||||
|
||||
bubbles = {}
|
||||
bubble_boxes = {}
|
||||
@@ -436,6 +565,7 @@ def group_tokens(ocr, image_shape, gap_px=18, bbox_padding=3):
|
||||
ih, iw = image_shape[:2]
|
||||
for bid, idxs in enumerate(sorted_groups, start=1):
|
||||
idxs = sorted(idxs, key=lambda k: boxes[k][1])
|
||||
|
||||
lines = build_lines_from_indices(idxs, ocr)
|
||||
quads = [ocr[k][0] for k in idxs]
|
||||
ub = boxes_union_xyxy([quad_bbox(q) for q in quads])
|
||||
@@ -443,8 +573,10 @@ def group_tokens(ocr, image_shape, gap_px=18, bbox_padding=3):
|
||||
continue
|
||||
|
||||
x1, y1, x2, y2 = ub
|
||||
x1 = max(0, x1 - bbox_padding); y1 = max(0, y1 - bbox_padding)
|
||||
x2 = min(iw, x2 + bbox_padding); y2 = min(ih, y2 + bbox_padding)
|
||||
x1 = max(0, x1 - bbox_padding)
|
||||
y1 = max(0, y1 - bbox_padding)
|
||||
x2 = min(iw, x2 + bbox_padding)
|
||||
y2 = min(ih, y2 + bbox_padding)
|
||||
|
||||
bubbles[bid] = lines
|
||||
bubble_boxes[bid] = (x1, y1, x2, y2)
|
||||
@@ -455,23 +587,24 @@ def group_tokens(ocr, image_shape, gap_px=18, bbox_padding=3):
|
||||
|
||||
|
||||
# ─────────────────────────────────────────────
|
||||
# DEBUG
|
||||
# DEBUG
|
||||
# ─────────────────────────────────────────────
|
||||
def save_debug_clusters(image_path, ocr, bubble_boxes, bubble_indices, out_path="debug_clusters.png"):
|
||||
img = cv2.imread(image_path)
|
||||
if img is None:
|
||||
return
|
||||
|
||||
# token quads
|
||||
# OCR token quads
|
||||
for bbox, txt, conf in ocr:
|
||||
pts = np.array(bbox, dtype=np.int32)
|
||||
cv2.polylines(img, [pts], True, (180, 180, 180), 1)
|
||||
|
||||
# bubble boxes + yellow line boxes
|
||||
# Bubble + line boxes
|
||||
for bid, bb in bubble_boxes.items():
|
||||
x1, y1, x2, y2 = bb
|
||||
cv2.rectangle(img, (x1, y1), (x2, y2), (0, 220, 0), 2)
|
||||
cv2.putText(img, f"BOX#{bid}", (x1 + 2, y1 + 16), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 220, 0), 2)
|
||||
cv2.putText(img, f"BOX#{bid}", (x1 + 2, y1 + 16),
|
||||
cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 220, 0), 2)
|
||||
|
||||
idxs = bubble_indices.get(bid, [])
|
||||
line_boxes = build_line_boxes_from_indices(idxs, ocr)
|
||||
@@ -485,7 +618,7 @@ def save_debug_clusters(image_path, ocr, bubble_boxes, bubble_indices, out_path=
|
||||
|
||||
|
||||
# ─────────────────────────────────────────────
|
||||
# EXPORT
|
||||
# EXPORT
|
||||
# ─────────────────────────────────────────────
|
||||
def estimate_reading_order(bbox_dict, mode="ltr"):
|
||||
items = []
|
||||
@@ -494,7 +627,7 @@ def estimate_reading_order(bbox_dict, mode="ltr"):
|
||||
cy = (y1 + y2) / 2.0
|
||||
items.append((bid, cx, cy))
|
||||
|
||||
items.sort(key=lambda t: t[2])
|
||||
items.sort(key=lambda t: t[2]) # top to bottom
|
||||
|
||||
rows = []
|
||||
tol = 90
|
||||
@@ -517,7 +650,6 @@ def estimate_reading_order(bbox_dict, mode="ltr"):
|
||||
|
||||
return {bid: i + 1 for i, bid in enumerate(order)}
|
||||
|
||||
|
||||
def export_bubbles(filepath, bbox_dict, quads_dict, indices_dict, ocr, reading_map, image_shape):
|
||||
out = {}
|
||||
|
||||
@@ -536,11 +668,15 @@ def export_bubbles(filepath, bbox_dict, quads_dict, indices_dict, ocr, reading_m
|
||||
out[str(bid)] = {
|
||||
"x": int(x1), "y": int(y1), "w": int(x2 - x1), "h": int(y2 - y1),
|
||||
"reading_order": int(reading_map.get(bid, bid)),
|
||||
"quad_bboxes": [{"x": int(b[0]), "y": int(b[1]), "w": int(b[2]-b[0]), "h": int(b[3]-b[1])} for b in qboxes],
|
||||
"quads": [[[int(p[0]), int(p[1])] for p in q] for q in quads],
|
||||
"quad_bboxes": [
|
||||
{"x": int(b[0]), "y": int(b[1]), "w": int(b[2] - b[0]), "h": int(b[3] - b[1])}
|
||||
for b in qboxes
|
||||
],
|
||||
"quads": [
|
||||
[[int(p[0]), int(p[1])] for p in q] for q in quads
|
||||
],
|
||||
"text_bbox": xyxy_to_xywh(text_union),
|
||||
|
||||
# yellow geometry
|
||||
"line_bboxes": [xyxy_to_xywh(lb) for lb in line_boxes_xyxy],
|
||||
"line_union_bbox": xyxy_to_xywh(line_union_xyxy) if line_union_xyxy else None,
|
||||
"line_union_area": int(line_union_area),
|
||||
@@ -551,7 +687,7 @@ def export_bubbles(filepath, bbox_dict, quads_dict, indices_dict, ocr, reading_m
|
||||
|
||||
|
||||
# ─────────────────────────────────────────────
|
||||
# MAIN
|
||||
# MAIN
|
||||
# ─────────────────────────────────────────────
|
||||
def translate_manga_text(
|
||||
image_path,
|
||||
@@ -606,6 +742,7 @@ def translate_manga_text(
|
||||
skipped += 1
|
||||
continue
|
||||
|
||||
# reduce false positives in very top strip
|
||||
if qb[1] < int(ih * TOP_BAND_RATIO):
|
||||
if conf < 0.70 and len(t) >= 5:
|
||||
skipped += 1
|
||||
@@ -633,14 +770,28 @@ def translate_manga_text(
|
||||
|
||||
translator = GoogleTranslator(source=source_lang, target=target_lang)
|
||||
|
||||
# robust bubble text cleanup
|
||||
clean_lines = {}
|
||||
for bid, lines in bubbles.items():
|
||||
txt = normalize_text(" ".join(lines))
|
||||
q = ocr_quality_score(txt)
|
||||
if q < quality_threshold:
|
||||
reread = reread_crop(image, bubble_boxes[bid], reader, upscale=2.5, pad=18)
|
||||
if reread:
|
||||
txt = normalize_text(reread)
|
||||
base_txt = normalize_text(" ".join(lines))
|
||||
base_sc = ocr_candidate_score(base_txt)
|
||||
|
||||
# only robust reread on low quality
|
||||
if base_sc < quality_threshold:
|
||||
rr_txt, rr_sc = reread_crop_robust(
|
||||
image,
|
||||
bubble_boxes[bid],
|
||||
reader,
|
||||
upscale=3.0,
|
||||
pad=22
|
||||
)
|
||||
if rr_txt and rr_sc > base_sc + 0.06:
|
||||
txt = rr_txt
|
||||
else:
|
||||
txt = base_txt
|
||||
else:
|
||||
txt = base_txt
|
||||
|
||||
clean_lines[bid] = apply_glossary(txt)
|
||||
|
||||
reading_map = estimate_reading_order(bubble_boxes, mode=reading_mode)
|
||||
@@ -657,18 +808,24 @@ def translate_manga_text(
|
||||
src = clean_lines[bid].strip()
|
||||
if not src:
|
||||
continue
|
||||
flags = []
|
||||
|
||||
flags = []
|
||||
try:
|
||||
tgt = translator.translate(src) or ""
|
||||
except Exception as e:
|
||||
tgt = f"[Translation error: {e}]"
|
||||
flags.append("TRANSLATION_ERROR")
|
||||
|
||||
tgt = apply_glossary(postprocess_translation_general(tgt)).upper()
|
||||
src_u = src.upper()
|
||||
|
||||
out_lines.append(f"#{bid}|{reading_map.get(bid,bid)}|{src_u}|{tgt}|{','.join(flags) if flags else '-'}")
|
||||
print(f"#{bid:<7} {reading_map.get(bid,bid):<6} {src_u[:50]:<50} {tgt[:50]:<50} {','.join(flags) if flags else '-'}")
|
||||
out_lines.append(
|
||||
f"#{bid}|{reading_map.get(bid,bid)}|{src_u}|{tgt}|{','.join(flags) if flags else '-'}"
|
||||
)
|
||||
print(
|
||||
f"#{bid:<7} {reading_map.get(bid,bid):<6} "
|
||||
f"{src_u[:50]:<50} {tgt[:50]:<50} {','.join(flags) if flags else '-'}"
|
||||
)
|
||||
translated_count += 1
|
||||
|
||||
out_lines.append(divider)
|
||||
@@ -691,13 +848,13 @@ def translate_manga_text(
|
||||
print(f"Saved: {export_to_file}")
|
||||
print(f"Saved: {export_bubbles_to}")
|
||||
if debug:
|
||||
print("Saved: debug_clusters.png (special chars included in yellow boxes)")
|
||||
print("Saved: debug_clusters.png")
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
translate_manga_text(
|
||||
image_path="002-page.png",
|
||||
source_lang="en",
|
||||
image_path="001-page.png",
|
||||
source_lang="it",
|
||||
target_lang="ca",
|
||||
confidence_threshold=0.12,
|
||||
min_text_length=1,
|
||||
|
||||
209
pipeline.py
209
pipeline.py
@@ -2,60 +2,76 @@
|
||||
"""
|
||||
pipeline.py
|
||||
───────────────────────────────────────────────────────────────
|
||||
Translation-only pipeline for Dandadan_059_2022_Digital
|
||||
Translation + render pipeline
|
||||
|
||||
Flow per page:
|
||||
1. Run translate_manga_text() → output.txt + bubbles.json
|
||||
2. Copy original image to workdir for reference
|
||||
1) translate_manga_text() -> output.txt + bubbles.json (+ debug_clusters.png if DEBUG)
|
||||
2) render_translations() -> page_translated.png
|
||||
3) Pack CBZ with originals + rendered pages + text outputs
|
||||
|
||||
Folder structure produced:
|
||||
Dandadan_059_2022_Digital_1r0n/
|
||||
Folder structure:
|
||||
<CHAPTER_DIR>/
|
||||
├── 000.png
|
||||
├── 001.png
|
||||
└── translated/
|
||||
├── 00/
|
||||
│ ├── output.txt ← translations to review
|
||||
│ ├── bubbles.json ← bubble boxes
|
||||
│ └── debug_clusters.png ← cluster debug (if DEBUG=True)
|
||||
├── 01/
|
||||
├── 000/
|
||||
│ ├── output.txt
|
||||
│ ├── bubbles.json
|
||||
│ ├── page_translated.png
|
||||
│ └── debug_clusters.png (optional)
|
||||
├── 001/
|
||||
│ └── ...
|
||||
└── ...
|
||||
|
||||
Dandadan_059_translated.cbz ← original pages + translations
|
||||
zipped for reference
|
||||
CBZ:
|
||||
- pages/<original pages>
|
||||
- rendered/<page_stem>_translated.png
|
||||
- translations/<page_stem>_output.txt
|
||||
"""
|
||||
|
||||
import os
|
||||
import sys
|
||||
import shutil
|
||||
import zipfile
|
||||
import importlib.util
|
||||
from pathlib import Path
|
||||
|
||||
|
||||
# ─────────────────────────────────────────────
|
||||
# CONFIG — edit these as needed
|
||||
# CONFIG
|
||||
# ─────────────────────────────────────────────
|
||||
CHAPTER_DIR = "/Users/guillemhernandezsola/Downloads/Spy_x_Family_076_2023_Digital_1r0n"
|
||||
OUTPUT_CBZ = "/Users/guillemhernandezsola/Downloads/Spy_x_Family_076_2023_Digital_1r0n_translated.cbz"
|
||||
SOURCE_LANG = "en"
|
||||
TARGET_LANG = "ca"
|
||||
CHAPTER_DIR = "/Users/guillemhernandezsola/Downloads/Dandadan_059_2022_Digital_1r0n"
|
||||
OUTPUT_CBZ = "/Users/guillemhernandezsola/Downloads/Dandadan_059_2022_Digital_1r0n_translated.cbz"
|
||||
|
||||
# manga-translator.py settings
|
||||
SOURCE_LANG = "en"
|
||||
TARGET_LANG = "ca"
|
||||
|
||||
# translator (NEW signature-compatible)
|
||||
CONFIDENCE_THRESHOLD = 0.10
|
||||
MIN_TEXT_LENGTH = 2
|
||||
CLUSTER_EPS = "auto"
|
||||
PROXIMITY_PX = 80
|
||||
MIN_TEXT_LENGTH = 1
|
||||
GAP_PX = "auto" # was cluster/proximity in old version
|
||||
FILTER_SFX = True
|
||||
QUALITY_THRESHOLD = 0.5
|
||||
UPSCALE_FACTOR = 2.5
|
||||
BBOX_PADDING = 5
|
||||
QUALITY_THRESHOLD = 0.50
|
||||
READING_MODE = "ltr"
|
||||
DEBUG = True
|
||||
|
||||
# renderer
|
||||
RENDER_ENABLED = True
|
||||
RENDER_OUTPUT_NAME = "page_translated.png"
|
||||
|
||||
# optional custom font list for renderer
|
||||
FONT_CANDIDATES = [
|
||||
"fonts/ComicNeue-Regular.ttf",
|
||||
"fonts/ComicRelief-Regular.ttf"
|
||||
]
|
||||
|
||||
|
||||
# ─────────────────────────────────────────────
|
||||
# DYNAMIC MODULE LOADER
|
||||
# ─────────────────────────────────────────────
|
||||
def load_module(name, filepath):
|
||||
spec = importlib.util.spec_from_file_location(name, filepath)
|
||||
spec = importlib.util.spec_from_file_location(name, filepath)
|
||||
if spec is None or spec.loader is None:
|
||||
raise FileNotFoundError(f"Cannot load spec for {filepath}")
|
||||
module = importlib.util.module_from_spec(spec)
|
||||
spec.loader.exec_module(module)
|
||||
return module
|
||||
@@ -65,10 +81,10 @@ def load_module(name, filepath):
|
||||
# HELPERS
|
||||
# ─────────────────────────────────────────────
|
||||
def sorted_pages(chapter_dir):
|
||||
exts = {".jpg", ".jpeg", ".png", ".webp"}
|
||||
exts = {".jpg", ".jpeg", ".png", ".webp"}
|
||||
pages = [
|
||||
p for p in Path(chapter_dir).iterdir()
|
||||
if p.suffix.lower() in exts
|
||||
if p.is_file() and p.suffix.lower() in exts
|
||||
]
|
||||
return sorted(pages, key=lambda p: p.stem)
|
||||
|
||||
@@ -80,82 +96,97 @@ def make_page_workdir(chapter_dir, page_stem):
|
||||
|
||||
|
||||
def pack_cbz(chapter_dir, translated_dir, output_cbz):
|
||||
"""
|
||||
Packs into CBZ:
|
||||
- All original pages (from chapter_dir root)
|
||||
- All output.txt (one per page subfolder)
|
||||
Sorted by page stem for correct reading order.
|
||||
"""
|
||||
exts = {".jpg", ".jpeg", ".png", ".webp"}
|
||||
pages = sorted(
|
||||
exts = {".jpg", ".jpeg", ".png", ".webp"}
|
||||
|
||||
pages = sorted(
|
||||
[p for p in Path(chapter_dir).iterdir()
|
||||
if p.suffix.lower() in exts],
|
||||
if p.is_file() and p.suffix.lower() in exts],
|
||||
key=lambda p: p.stem
|
||||
)
|
||||
txts = sorted(
|
||||
|
||||
txts = sorted(
|
||||
translated_dir.rglob("output.txt"),
|
||||
key=lambda p: p.parent.name
|
||||
)
|
||||
|
||||
rendered = sorted(
|
||||
translated_dir.rglob(RENDER_OUTPUT_NAME),
|
||||
key=lambda p: p.parent.name
|
||||
)
|
||||
|
||||
if not pages:
|
||||
print("⚠️ No original pages found — CBZ not created.")
|
||||
return
|
||||
|
||||
with zipfile.ZipFile(output_cbz, "w",
|
||||
compression=zipfile.ZIP_STORED) as zf:
|
||||
# Original pages
|
||||
with zipfile.ZipFile(output_cbz, "w", compression=zipfile.ZIP_STORED) as zf:
|
||||
# original pages
|
||||
for img in pages:
|
||||
arcname = f"pages/{img.name}"
|
||||
zf.write(img, arcname)
|
||||
print(f" 🖼 {arcname}")
|
||||
|
||||
# Translation text files
|
||||
# rendered pages
|
||||
for rp in rendered:
|
||||
arcname = f"rendered/{rp.parent.name}_translated.png"
|
||||
zf.write(rp, arcname)
|
||||
print(f" 🎨 {arcname}")
|
||||
|
||||
# text outputs
|
||||
for txt in txts:
|
||||
arcname = f"translations/{txt.parent.name}_output.txt"
|
||||
zf.write(txt, arcname)
|
||||
print(f" 📄 {arcname}")
|
||||
|
||||
print(f"\n✅ CBZ saved → {output_cbz} "
|
||||
f"({len(pages)} page(s), {len(txts)} translation(s))")
|
||||
print(
|
||||
f"\n✅ CBZ saved → {output_cbz} "
|
||||
f"({len(pages)} original, {len(rendered)} rendered, {len(txts)} text)"
|
||||
)
|
||||
|
||||
|
||||
# ─────────────────────────────────────────────
|
||||
# PER-PAGE PIPELINE
|
||||
# ─────────────────────────────────────────────
|
||||
def process_page(page_path, workdir, translator_module):
|
||||
def process_page(page_path, workdir, translator_module, renderer_module):
|
||||
"""
|
||||
Runs translator for a single page.
|
||||
All output files land in workdir.
|
||||
Returns True on success, False on failure.
|
||||
Runs translator + renderer for one page.
|
||||
All generated files are written inside workdir.
|
||||
"""
|
||||
print(f"\n{'─'*60}")
|
||||
print(f" PAGE: {page_path.name}")
|
||||
print(f"{'─'*60}")
|
||||
print(f"\n{'─' * 70}")
|
||||
print(f"PAGE: {page_path.name}")
|
||||
print(f"{'─' * 70}")
|
||||
|
||||
orig_dir = os.getcwd()
|
||||
try:
|
||||
# chdir into workdir so debug_clusters.png,
|
||||
# temp files etc. all land there
|
||||
os.chdir(workdir)
|
||||
|
||||
# 1) translate
|
||||
translator_module.translate_manga_text(
|
||||
image_path = str(page_path.resolve()),
|
||||
source_lang = SOURCE_LANG,
|
||||
target_lang = TARGET_LANG,
|
||||
confidence_threshold = CONFIDENCE_THRESHOLD,
|
||||
export_to_file = "output.txt",
|
||||
export_bubbles_to = "bubbles.json",
|
||||
min_text_length = MIN_TEXT_LENGTH,
|
||||
cluster_eps = CLUSTER_EPS,
|
||||
proximity_px = PROXIMITY_PX,
|
||||
filter_sound_effects = FILTER_SFX,
|
||||
quality_threshold = QUALITY_THRESHOLD,
|
||||
upscale_factor = UPSCALE_FACTOR,
|
||||
bbox_padding = BBOX_PADDING,
|
||||
debug = DEBUG,
|
||||
image_path= str(page_path.resolve()),
|
||||
source_lang=SOURCE_LANG,
|
||||
target_lang=TARGET_LANG,
|
||||
confidence_threshold=CONFIDENCE_THRESHOLD,
|
||||
min_text_length=MIN_TEXT_LENGTH,
|
||||
gap_px=GAP_PX,
|
||||
filter_sound_effects=FILTER_SFX,
|
||||
quality_threshold=QUALITY_THRESHOLD,
|
||||
export_to_file="output.txt",
|
||||
export_bubbles_to="bubbles.json",
|
||||
reading_mode=READING_MODE,
|
||||
debug=DEBUG
|
||||
)
|
||||
print(" ✅ translator done")
|
||||
|
||||
# 2) render
|
||||
if RENDER_ENABLED:
|
||||
renderer_module.render_translations(
|
||||
input_image=str(page_path.resolve()),
|
||||
output_image=RENDER_OUTPUT_NAME,
|
||||
translations_file="output.txt",
|
||||
bubbles_file="bubbles.json",
|
||||
font_candidates=FONT_CANDIDATES
|
||||
)
|
||||
print(" ✅ renderer done")
|
||||
|
||||
print(f" ✅ Translated → {workdir}")
|
||||
return True
|
||||
|
||||
except Exception as e:
|
||||
@@ -170,16 +201,20 @@ def process_page(page_path, workdir, translator_module):
|
||||
# MAIN
|
||||
# ─────────────────────────────────────────────
|
||||
def main():
|
||||
# ── Load translator module ────────────────────────────────────
|
||||
print("Loading manga-translator.py...")
|
||||
print("Loading modules...")
|
||||
|
||||
try:
|
||||
translator = load_module(
|
||||
"manga_translator", "manga-translator.py")
|
||||
except FileNotFoundError as e:
|
||||
print(f"❌ Could not load module: {e}")
|
||||
translator = load_module("manga_translator", "manga-translator.py")
|
||||
except Exception as e:
|
||||
print(f"❌ Could not load manga-translator.py: {e}")
|
||||
sys.exit(1)
|
||||
|
||||
try:
|
||||
renderer = load_module("manga_renderer", "manga-renderer.py")
|
||||
except Exception as e:
|
||||
print(f"❌ Could not load manga-renderer.py: {e}")
|
||||
sys.exit(1)
|
||||
|
||||
# ── Discover pages ────────────────────────────────────────────
|
||||
pages = sorted_pages(CHAPTER_DIR)
|
||||
if not pages:
|
||||
print(f"❌ No images found in: {CHAPTER_DIR}")
|
||||
@@ -187,33 +222,31 @@ def main():
|
||||
|
||||
print(f"\n📖 Chapter : {CHAPTER_DIR}")
|
||||
print(f" Pages : {len(pages)}")
|
||||
print(f" Source : {SOURCE_LANG} → Target: {TARGET_LANG}\n")
|
||||
print(f" Source : {SOURCE_LANG} → Target: {TARGET_LANG}")
|
||||
print(f" Render : {'ON' if RENDER_ENABLED else 'OFF'}\n")
|
||||
|
||||
# ── Process each page ─────────────────────────────────────────
|
||||
translated_dir = Path(CHAPTER_DIR) / "translated"
|
||||
succeeded = []
|
||||
failed = []
|
||||
failed = []
|
||||
|
||||
for i, page_path in enumerate(pages, start=1):
|
||||
print(f"\n[{i}/{len(pages)}] {page_path.name}")
|
||||
print(f"[{i}/{len(pages)}] {page_path.name}")
|
||||
workdir = make_page_workdir(CHAPTER_DIR, page_path.stem)
|
||||
ok = process_page(page_path, workdir, translator)
|
||||
ok = process_page(page_path, workdir, translator, renderer)
|
||||
if ok:
|
||||
succeeded.append(page_path.name)
|
||||
else:
|
||||
failed.append(page_path.name)
|
||||
|
||||
# ── Summary ───────────────────────────────────────────────────
|
||||
print(f"\n{'═'*60}")
|
||||
print(f" PIPELINE COMPLETE")
|
||||
print(f" ✅ {len(succeeded)} page(s) succeeded")
|
||||
print(f"\n{'═' * 70}")
|
||||
print("PIPELINE COMPLETE")
|
||||
print(f"✅ {len(succeeded)} page(s) succeeded")
|
||||
if failed:
|
||||
print(f" ❌ {len(failed)} page(s) failed:")
|
||||
print(f"❌ {len(failed)} page(s) failed:")
|
||||
for f in failed:
|
||||
print(f" • {f}")
|
||||
print(f"{'═'*60}\n")
|
||||
print(f" • {f}")
|
||||
print(f"{'═' * 70}\n")
|
||||
|
||||
# ── Pack CBZ ──────────────────────────────────────────────────
|
||||
print("Packing CBZ...")
|
||||
pack_cbz(CHAPTER_DIR, translated_dir, OUTPUT_CBZ)
|
||||
|
||||
|
||||
Reference in New Issue
Block a user