Added some fixes
This commit is contained in:
BIN
fonts/ComicNeue-Regular.ttf
Executable file
BIN
fonts/ComicNeue-Regular.ttf
Executable file
Binary file not shown.
Binary file not shown.
@@ -10,10 +10,8 @@ from PIL import Image, ImageDraw, ImageFont
|
|||||||
# CONFIG
|
# CONFIG
|
||||||
# ─────────────────────────────────────────────
|
# ─────────────────────────────────────────────
|
||||||
DEFAULT_FONT_CANDIDATES = [
|
DEFAULT_FONT_CANDIDATES = [
|
||||||
"fonts/AnimeAce2_reg.ttf",
|
|
||||||
"fonts/WildWordsRoman.ttf",
|
|
||||||
"fonts/ComicRelief-Regular.ttf",
|
"fonts/ComicRelief-Regular.ttf",
|
||||||
"fonts/NotoSans-Regular.ttf",
|
"fonts/ComicNeue-Regular.ttf",
|
||||||
]
|
]
|
||||||
DEFAULT_FONT_COLOR = (0, 0, 0)
|
DEFAULT_FONT_COLOR = (0, 0, 0)
|
||||||
DEFAULT_STROKE_COLOR = (255, 255, 255)
|
DEFAULT_STROKE_COLOR = (255, 255, 255)
|
||||||
@@ -501,7 +499,7 @@ def render_translations(
|
|||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
render_translations(
|
render_translations(
|
||||||
input_image="002-page.png",
|
input_image="001-page.png",
|
||||||
output_image="page_translated.png",
|
output_image="page_translated.png",
|
||||||
translations_file="output.txt",
|
translations_file="output.txt",
|
||||||
bubbles_file="bubbles.json",
|
bubbles_file="bubbles.json",
|
||||||
|
|||||||
@@ -1,3 +1,6 @@
|
|||||||
|
#!/usr/bin/env python3
|
||||||
|
# -*- coding: utf-8 -*-
|
||||||
|
|
||||||
import re
|
import re
|
||||||
import os
|
import os
|
||||||
import json
|
import json
|
||||||
@@ -8,7 +11,7 @@ from deep_translator import GoogleTranslator
|
|||||||
|
|
||||||
|
|
||||||
# ─────────────────────────────────────────────
|
# ─────────────────────────────────────────────
|
||||||
# CONFIG
|
# CONFIG
|
||||||
# ─────────────────────────────────────────────
|
# ─────────────────────────────────────────────
|
||||||
GLOSSARY = {
|
GLOSSARY = {
|
||||||
"ANYA": "ANYA",
|
"ANYA": "ANYA",
|
||||||
@@ -32,17 +35,17 @@ TITLE_PATTERNS = [
|
|||||||
|
|
||||||
NOISE_PATTERNS = [
|
NOISE_PATTERNS = [
|
||||||
r"^[^a-zA-Z0-9\?!.]+$",
|
r"^[^a-zA-Z0-9\?!.]+$",
|
||||||
r"^BOX[0-9A-Z]*$",
|
r"^BOX[0-9A-Z#\s]*$",
|
||||||
]
|
]
|
||||||
|
|
||||||
TOP_BAND_RATIO = 0.08
|
TOP_BAND_RATIO = 0.08
|
||||||
|
|
||||||
|
|
||||||
# ─────────────────────────────────────────────
|
# ─────────────────────────────────────────────
|
||||||
# TEXT HELPERS
|
# TEXT HELPERS
|
||||||
# ─────────────────────────────────────────────
|
# ─────────────────────────────────────────────
|
||||||
def normalize_text(text):
|
def normalize_text(text: str) -> str:
|
||||||
t = text.strip().upper()
|
t = (text or "").strip().upper()
|
||||||
t = t.replace("“", "\"").replace("”", "\"")
|
t = t.replace("“", "\"").replace("”", "\"")
|
||||||
t = t.replace("’", "'").replace("‘", "'")
|
t = t.replace("’", "'").replace("‘", "'")
|
||||||
t = t.replace("…", "...")
|
t = t.replace("…", "...")
|
||||||
@@ -54,13 +57,13 @@ def normalize_text(text):
|
|||||||
t = re.sub(r",\?", "?", t)
|
t = re.sub(r",\?", "?", t)
|
||||||
return t.strip()
|
return t.strip()
|
||||||
|
|
||||||
def apply_glossary(text):
|
def apply_glossary(text: str) -> str:
|
||||||
out = text
|
out = text or ""
|
||||||
for k in sorted(GLOSSARY.keys(), key=len, reverse=True):
|
for k in sorted(GLOSSARY.keys(), key=len, reverse=True):
|
||||||
out = re.sub(rf"\b{re.escape(k)}\b", GLOSSARY[k], out, flags=re.IGNORECASE)
|
out = re.sub(rf"\b{re.escape(k)}\b", GLOSSARY[k], out, flags=re.IGNORECASE)
|
||||||
return out
|
return out
|
||||||
|
|
||||||
def postprocess_translation_general(text):
|
def postprocess_translation_general(text: str) -> str:
|
||||||
t = normalize_text(text)
|
t = normalize_text(text)
|
||||||
t = re.sub(r"\s{2,}", " ", t).strip()
|
t = re.sub(r"\s{2,}", " ", t).strip()
|
||||||
t = re.sub(r"([!?]){3,}", r"\1\1", t)
|
t = re.sub(r"([!?]){3,}", r"\1\1", t)
|
||||||
@@ -69,23 +72,23 @@ def postprocess_translation_general(text):
|
|||||||
|
|
||||||
|
|
||||||
# ─────────────────────────────────────────────
|
# ─────────────────────────────────────────────
|
||||||
# FILTERS
|
# FILTERS
|
||||||
# ─────────────────────────────────────────────
|
# ─────────────────────────────────────────────
|
||||||
def is_sound_effect(text):
|
def is_sound_effect(text: str) -> bool:
|
||||||
cleaned = re.sub(r"[^a-z]", "", text.strip().lower())
|
cleaned = re.sub(r"[^a-z]", "", (text or "").strip().lower())
|
||||||
return any(re.fullmatch(p, cleaned, re.IGNORECASE) for p in SOUND_EFFECT_PATTERNS)
|
return any(re.fullmatch(p, cleaned, re.IGNORECASE) for p in SOUND_EFFECT_PATTERNS)
|
||||||
|
|
||||||
def is_title_text(text):
|
def is_title_text(text: str) -> bool:
|
||||||
t = text.strip().lower()
|
t = (text or "").strip().lower()
|
||||||
return any(re.fullmatch(p, t, re.IGNORECASE) for p in TITLE_PATTERNS)
|
return any(re.fullmatch(p, t, re.IGNORECASE) for p in TITLE_PATTERNS)
|
||||||
|
|
||||||
def is_noise_text(text):
|
def is_noise_text(text: str) -> bool:
|
||||||
t = text.strip()
|
t = (text or "").strip()
|
||||||
return any(re.fullmatch(p, t) for p in NOISE_PATTERNS)
|
return any(re.fullmatch(p, t) for p in NOISE_PATTERNS)
|
||||||
|
|
||||||
|
|
||||||
# ─────────────────────────────────────────────
|
# ─────────────────────────────────────────────
|
||||||
# GEOMETRY
|
# GEOMETRY
|
||||||
# ─────────────────────────────────────────────
|
# ─────────────────────────────────────────────
|
||||||
def quad_bbox(quad):
|
def quad_bbox(quad):
|
||||||
xs = [p[0] for p in quad]
|
xs = [p[0] for p in quad]
|
||||||
@@ -127,9 +130,9 @@ def overlap_or_near(a, b, gap=0):
|
|||||||
|
|
||||||
|
|
||||||
# ─────────────────────────────────────────────
|
# ─────────────────────────────────────────────
|
||||||
# QUALITY
|
# QUALITY / SCORING
|
||||||
# ─────────────────────────────────────────────
|
# ─────────────────────────────────────────────
|
||||||
def ocr_quality_score(text):
|
def ocr_quality_score(text: str) -> float:
|
||||||
if not text or len(text) < 2:
|
if not text or len(text) < 2:
|
||||||
return 0.0
|
return 0.0
|
||||||
alpha_ratio = sum(1 for c in text if c.isalpha()) / max(1, len(text))
|
alpha_ratio = sum(1 for c in text if c.isalpha()) / max(1, len(text))
|
||||||
@@ -141,21 +144,75 @@ def ocr_quality_score(text):
|
|||||||
bonus = 0.05 if re.search(r"[.!?]$", text) else 0.0
|
bonus = 0.05 if re.search(r"[.!?]$", text) else 0.0
|
||||||
return max(0.0, min(1.0, alpha_ratio - penalty + bonus))
|
return max(0.0, min(1.0, alpha_ratio - penalty + bonus))
|
||||||
|
|
||||||
|
def ocr_candidate_score(text: str) -> float:
|
||||||
|
if not text:
|
||||||
|
return 0.0
|
||||||
|
t = text.strip()
|
||||||
|
n = len(t)
|
||||||
|
if n == 0:
|
||||||
|
return 0.0
|
||||||
|
|
||||||
|
alpha = sum(c.isalpha() for c in t) / n
|
||||||
|
spaces = sum(c.isspace() for c in t) / n
|
||||||
|
punct_ok = sum(c in ".,!?'-:;()[]\"" for c in t) / n
|
||||||
|
bad = len(re.findall(r"[^\w\s\.\,\!\?\-\'\:\;\(\)\[\]\"]", t)) / n
|
||||||
|
|
||||||
|
penalty = 0.0
|
||||||
|
if re.search(r"\b[A-Z]\b", t):
|
||||||
|
penalty += 0.05
|
||||||
|
if re.search(r"[0-9]{2,}", t):
|
||||||
|
penalty += 0.08
|
||||||
|
if re.search(r"(..)\1\1", t):
|
||||||
|
penalty += 0.08
|
||||||
|
|
||||||
|
score = (0.62 * alpha) + (0.10 * spaces) + (0.20 * punct_ok) - (0.45 * bad) - penalty
|
||||||
|
return max(0.0, min(1.0, score))
|
||||||
|
|
||||||
|
|
||||||
# ─────────────────────────────────────────────
|
# ─────────────────────────────────────────────
|
||||||
# OCR RE-READ
|
# OCR MULTI-PASS
|
||||||
# ─────────────────────────────────────────────
|
# ─────────────────────────────────────────────
|
||||||
def preprocess_variant(crop_bgr, mode):
|
def preprocess_variant(crop_bgr, mode):
|
||||||
gray = cv2.cvtColor(crop_bgr, cv2.COLOR_BGR2GRAY)
|
gray = cv2.cvtColor(crop_bgr, cv2.COLOR_BGR2GRAY)
|
||||||
|
|
||||||
if mode == "raw":
|
if mode == "raw":
|
||||||
return gray
|
return gray
|
||||||
|
|
||||||
if mode == "clahe":
|
if mode == "clahe":
|
||||||
return cv2.createCLAHE(clipLimit=2.0, tileGridSize=(8, 8)).apply(gray)
|
return cv2.createCLAHE(clipLimit=2.0, tileGridSize=(8, 8)).apply(gray)
|
||||||
|
|
||||||
if mode == "adaptive":
|
if mode == "adaptive":
|
||||||
den = cv2.GaussianBlur(gray, (3, 3), 0)
|
den = cv2.GaussianBlur(gray, (3, 3), 0)
|
||||||
return cv2.adaptiveThreshold(den, 255, cv2.ADAPTIVE_THRESH_GAUSSIAN_C, cv2.THRESH_BINARY, 35, 11)
|
return cv2.adaptiveThreshold(
|
||||||
|
den, 255, cv2.ADAPTIVE_THRESH_GAUSSIAN_C,
|
||||||
|
cv2.THRESH_BINARY, 35, 11
|
||||||
|
)
|
||||||
|
|
||||||
|
if mode == "otsu":
|
||||||
|
den = cv2.GaussianBlur(gray, (3, 3), 0)
|
||||||
|
_, th = cv2.threshold(den, 0, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU)
|
||||||
|
return th
|
||||||
|
|
||||||
|
if mode == "invert":
|
||||||
|
return 255 - gray
|
||||||
|
|
||||||
return gray
|
return gray
|
||||||
|
|
||||||
|
def rotate_image_keep_bounds(img, angle_deg):
|
||||||
|
h, w = img.shape[:2]
|
||||||
|
c = (w / 2, h / 2)
|
||||||
|
M = cv2.getRotationMatrix2D(c, angle_deg, 1.0)
|
||||||
|
cos = abs(M[0, 0])
|
||||||
|
sin = abs(M[0, 1])
|
||||||
|
|
||||||
|
new_w = int((h * sin) + (w * cos))
|
||||||
|
new_h = int((h * cos) + (w * sin))
|
||||||
|
|
||||||
|
M[0, 2] += (new_w / 2) - c[0]
|
||||||
|
M[1, 2] += (new_h / 2) - c[1]
|
||||||
|
|
||||||
|
return cv2.warpAffine(img, M, (new_w, new_h), flags=cv2.INTER_CUBIC, borderValue=255)
|
||||||
|
|
||||||
def run_ocr_on_array(reader, arr):
|
def run_ocr_on_array(reader, arr):
|
||||||
tmp = "_tmp_ocr.png"
|
tmp = "_tmp_ocr.png"
|
||||||
cv2.imwrite(tmp, arr)
|
cv2.imwrite(tmp, arr)
|
||||||
@@ -165,35 +222,104 @@ def run_ocr_on_array(reader, arr):
|
|||||||
if os.path.exists(tmp):
|
if os.path.exists(tmp):
|
||||||
os.remove(tmp)
|
os.remove(tmp)
|
||||||
|
|
||||||
def reread_crop(image, bbox, reader, upscale=2.5, pad=18):
|
def rebuild_text_from_ocr_result(res):
|
||||||
|
if not res:
|
||||||
|
return ""
|
||||||
|
|
||||||
|
norm = []
|
||||||
|
for item in res:
|
||||||
|
if len(item) != 3:
|
||||||
|
continue
|
||||||
|
bbox, txt, conf = item
|
||||||
|
if not txt or not txt.strip():
|
||||||
|
continue
|
||||||
|
b = quad_bbox(bbox)
|
||||||
|
xc = (b[0] + b[2]) / 2.0
|
||||||
|
yc = (b[1] + b[3]) / 2.0
|
||||||
|
h = max(1.0, b[3] - b[1])
|
||||||
|
norm.append((b, txt, conf, xc, yc, h))
|
||||||
|
|
||||||
|
if not norm:
|
||||||
|
return ""
|
||||||
|
|
||||||
|
med_h = float(np.median([x[5] for x in norm]))
|
||||||
|
row_tol = max(6.0, med_h * 0.75)
|
||||||
|
|
||||||
|
norm.sort(key=lambda z: z[4]) # y
|
||||||
|
rows = []
|
||||||
|
for it in norm:
|
||||||
|
placed = False
|
||||||
|
for r in rows:
|
||||||
|
if abs(it[4] - r["yc"]) <= row_tol:
|
||||||
|
r["m"].append(it)
|
||||||
|
r["yc"] = float(np.mean([k[4] for k in r["m"]]))
|
||||||
|
placed = True
|
||||||
|
break
|
||||||
|
if not placed:
|
||||||
|
rows.append({"yc": it[4], "m": [it]})
|
||||||
|
|
||||||
|
rows.sort(key=lambda r: r["yc"])
|
||||||
|
|
||||||
|
lines = []
|
||||||
|
for r in rows:
|
||||||
|
mem = sorted(r["m"], key=lambda z: z[3]) # x
|
||||||
|
line = normalize_text(" ".join(x[1] for x in mem))
|
||||||
|
if line:
|
||||||
|
lines.append(line)
|
||||||
|
|
||||||
|
return normalize_text(" ".join(lines))
|
||||||
|
|
||||||
|
def reread_crop_robust(image, bbox, reader, upscale=3.0, pad=22):
|
||||||
ih, iw = image.shape[:2]
|
ih, iw = image.shape[:2]
|
||||||
x1, y1, x2, y2 = bbox
|
x1, y1, x2, y2 = bbox
|
||||||
x1 = max(0, int(x1 - pad)); y1 = max(0, int(y1 - pad))
|
x1 = max(0, int(x1 - pad))
|
||||||
x2 = min(iw, int(x2 + pad)); y2 = min(ih, int(y2 + pad))
|
y1 = max(0, int(y1 - pad))
|
||||||
|
x2 = min(iw, int(x2 + pad))
|
||||||
|
y2 = min(ih, int(y2 + pad))
|
||||||
crop = image[y1:y2, x1:x2]
|
crop = image[y1:y2, x1:x2]
|
||||||
if crop.size == 0:
|
if crop.size == 0:
|
||||||
return None
|
return None, 0.0
|
||||||
|
|
||||||
up = cv2.resize(crop, (int(crop.shape[1] * upscale), int(crop.shape[0] * upscale)), interpolation=cv2.INTER_CUBIC)
|
up = cv2.resize(
|
||||||
|
crop,
|
||||||
|
(int(crop.shape[1] * upscale), int(crop.shape[0] * upscale)),
|
||||||
|
interpolation=cv2.INTER_CUBIC
|
||||||
|
)
|
||||||
|
|
||||||
best = None
|
modes = ["raw", "clahe", "adaptive", "otsu", "invert"]
|
||||||
for mode in ("raw", "clahe", "adaptive"):
|
angles = [0.0, 1.5, -1.5]
|
||||||
|
|
||||||
|
best_text, best_score = "", 0.0
|
||||||
|
|
||||||
|
for mode in modes:
|
||||||
proc = preprocess_variant(up, mode)
|
proc = preprocess_variant(up, mode)
|
||||||
res = run_ocr_on_array(reader, proc)
|
|
||||||
if not res:
|
|
||||||
continue
|
|
||||||
res.sort(key=lambda r: (r[0][0][1], r[0][0][0]))
|
|
||||||
lines = [normalize_text(t) for _, t, _ in res if t.strip()]
|
|
||||||
merged = re.sub(r"\s{2,}", " ", " ".join(lines)).strip()
|
|
||||||
s = ocr_quality_score(merged)
|
|
||||||
if best is None or s > best[0]:
|
|
||||||
best = (s, merged)
|
|
||||||
|
|
||||||
return best[1] if best else None
|
if len(proc.shape) == 2:
|
||||||
|
proc3 = cv2.cvtColor(proc, cv2.COLOR_GRAY2BGR)
|
||||||
|
else:
|
||||||
|
proc3 = proc
|
||||||
|
|
||||||
|
for a in angles:
|
||||||
|
rot = rotate_image_keep_bounds(proc3, a)
|
||||||
|
if len(rot.shape) == 3:
|
||||||
|
rot_in = cv2.cvtColor(rot, cv2.COLOR_BGR2GRAY)
|
||||||
|
else:
|
||||||
|
rot_in = rot
|
||||||
|
|
||||||
|
res = run_ocr_on_array(reader, rot_in)
|
||||||
|
txt = rebuild_text_from_ocr_result(res)
|
||||||
|
sc = ocr_candidate_score(txt)
|
||||||
|
|
||||||
|
if sc > best_score:
|
||||||
|
best_text, best_score = txt, sc
|
||||||
|
|
||||||
|
if not best_text:
|
||||||
|
return None, 0.0
|
||||||
|
return best_text, best_score
|
||||||
|
|
||||||
|
|
||||||
# ─────────────────────────────────────────────
|
# ─────────────────────────────────────────────
|
||||||
# LINES + YELLOW BOXES
|
# LINES + YELLOW BOXES
|
||||||
# ─────────────────────────────────────────────
|
# ─────────────────────────────────────────────
|
||||||
def build_lines_from_indices(indices, ocr):
|
def build_lines_from_indices(indices, ocr):
|
||||||
if not indices:
|
if not indices:
|
||||||
@@ -233,14 +359,13 @@ def build_lines_from_indices(indices, ocr):
|
|||||||
|
|
||||||
return lines
|
return lines
|
||||||
|
|
||||||
|
|
||||||
def build_line_boxes_from_indices(indices, ocr):
|
def build_line_boxes_from_indices(indices, ocr):
|
||||||
"""
|
"""
|
||||||
Robust yellow-box generation with punctuation attachment:
|
Robust yellow-box generation with punctuation attachment:
|
||||||
- row grouping
|
- row grouping
|
||||||
- chunking by x gap
|
- chunking by x gap
|
||||||
- attach tiny punctuation/special tokens to nearest chunk
|
- attach tiny punctuation/special tokens to nearest chunk
|
||||||
- coverage guarantee
|
- token coverage guarantee
|
||||||
"""
|
"""
|
||||||
if not indices:
|
if not indices:
|
||||||
return []
|
return []
|
||||||
@@ -264,13 +389,13 @@ def build_line_boxes_from_indices(indices, ocr):
|
|||||||
pad = max(1, int(round(med_h * 0.12)))
|
pad = max(1, int(round(med_h * 0.12)))
|
||||||
|
|
||||||
def is_punct_like(t):
|
def is_punct_like(t):
|
||||||
raw = t.strip()
|
raw = (t or "").strip()
|
||||||
if raw == "":
|
if raw == "":
|
||||||
return True
|
return True
|
||||||
punct_ratio = sum(1 for c in raw if not c.isalnum()) / max(1, len(raw))
|
punct_ratio = sum(1 for c in raw if not c.isalnum()) / max(1, len(raw))
|
||||||
return punct_ratio >= 0.5 or len(raw) <= 2
|
return punct_ratio >= 0.5 or len(raw) <= 2
|
||||||
|
|
||||||
# 1) rows
|
# 1) row grouping
|
||||||
items_sorted = sorted(items, key=lambda x: x["yc"])
|
items_sorted = sorted(items, key=lambda x: x["yc"])
|
||||||
rows = []
|
rows = []
|
||||||
for it in items_sorted:
|
for it in items_sorted:
|
||||||
@@ -296,7 +421,7 @@ def build_line_boxes_from_indices(indices, ocr):
|
|||||||
normal = mem
|
normal = mem
|
||||||
punct = []
|
punct = []
|
||||||
|
|
||||||
# 2) chunk normal tokens
|
# 2) chunk normal by x-gap
|
||||||
chunks = []
|
chunks = []
|
||||||
cur = [normal[0]]
|
cur = [normal[0]]
|
||||||
for t in normal[1:]:
|
for t in normal[1:]:
|
||||||
@@ -310,18 +435,17 @@ def build_line_boxes_from_indices(indices, ocr):
|
|||||||
cur = [t]
|
cur = [t]
|
||||||
chunks.append(cur)
|
chunks.append(cur)
|
||||||
|
|
||||||
# 3) attach punctuation tokens
|
# 3) attach punct tokens to nearest chunk
|
||||||
for p in punct:
|
for p in punct:
|
||||||
pb = p["b"]
|
pb = p["b"]
|
||||||
pxc, pyc = p["xc"], p["yc"]
|
pxc, pyc = p["xc"], p["yc"]
|
||||||
|
|
||||||
best_k = -1
|
best_k = -1
|
||||||
best_score = 1e18
|
best_score = 1e18
|
||||||
|
|
||||||
for k, ch in enumerate(chunks):
|
for k, ch in enumerate(chunks):
|
||||||
ub = boxes_union_xyxy([x["b"] for x in ch])
|
ub = boxes_union_xyxy([x["b"] for x in ch])
|
||||||
cx = (ub[0] + ub[2]) / 2.0
|
cx = (ub[0] + ub[2]) / 2.0
|
||||||
cy = (ub[1] + ub[3]) / 2.0
|
cy = (ub[1] + ub[3]) / 2.0
|
||||||
|
|
||||||
dx = abs(pxc - cx)
|
dx = abs(pxc - cx)
|
||||||
dy = abs(pyc - cy)
|
dy = abs(pyc - cy)
|
||||||
score = dx + 1.8 * dy
|
score = dx + 1.8 * dy
|
||||||
@@ -339,22 +463,21 @@ def build_line_boxes_from_indices(indices, ocr):
|
|||||||
else:
|
else:
|
||||||
chunks.append([p])
|
chunks.append([p])
|
||||||
|
|
||||||
# 4) chunk boxes
|
# 4) emit chunk boxes
|
||||||
for ch in chunks:
|
for ch in chunks:
|
||||||
ub = boxes_union_xyxy([x["b"] for x in ch])
|
ub = boxes_union_xyxy([x["b"] for x in ch])
|
||||||
if ub:
|
if ub:
|
||||||
x1, y1, x2, y2 = ub
|
x1, y1, x2, y2 = ub
|
||||||
out_boxes.append((x1 - pad, y1 - pad, x2 + pad, y2 + pad))
|
out_boxes.append((x1 - pad, y1 - pad, x2 + pad, y2 + pad))
|
||||||
|
|
||||||
# 5) guarantee all tokens included
|
# 5) guarantee every token is inside some yellow box
|
||||||
token_boxes = [it["b"] for it in items]
|
token_boxes = [it["b"] for it in items]
|
||||||
|
|
||||||
def inside(tb, lb):
|
def inside(tb, lb):
|
||||||
return tb[0] >= lb[0] and tb[1] >= lb[1] and tb[2] <= lb[2] and tb[3] <= lb[3]
|
return tb[0] >= lb[0] and tb[1] >= lb[1] and tb[2] <= lb[2] and tb[3] <= lb[3]
|
||||||
|
|
||||||
for tb in token_boxes:
|
for tb in token_boxes:
|
||||||
ok = any(inside(tb, lb) for lb in out_boxes)
|
if not any(inside(tb, lb) for lb in out_boxes):
|
||||||
if not ok:
|
|
||||||
x1, y1, x2, y2 = tb
|
x1, y1, x2, y2 = tb
|
||||||
out_boxes.append((x1 - pad, y1 - pad, x2 + pad, y2 + pad))
|
out_boxes.append((x1 - pad, y1 - pad, x2 + pad, y2 + pad))
|
||||||
|
|
||||||
@@ -366,8 +489,8 @@ def build_line_boxes_from_indices(indices, ocr):
|
|||||||
ix1 = max(b[0], m[0]); iy1 = max(b[1], m[1])
|
ix1 = max(b[0], m[0]); iy1 = max(b[1], m[1])
|
||||||
ix2 = min(b[2], m[2]); iy2 = min(b[3], m[3])
|
ix2 = min(b[2], m[2]); iy2 = min(b[3], m[3])
|
||||||
inter = max(0, ix2 - ix1) * max(0, iy2 - iy1)
|
inter = max(0, ix2 - ix1) * max(0, iy2 - iy1)
|
||||||
a1 = max(1, (b[2]-b[0])*(b[3]-b[1]))
|
a1 = max(1, (b[2] - b[0]) * (b[3] - b[1]))
|
||||||
a2 = max(1, (m[2]-m[0])*(m[3]-m[1]))
|
a2 = max(1, (m[2] - m[0]) * (m[3] - m[1]))
|
||||||
iou = inter / float(a1 + a2 - inter) if (a1 + a2 - inter) > 0 else 0.0
|
iou = inter / float(a1 + a2 - inter) if (a1 + a2 - inter) > 0 else 0.0
|
||||||
if iou > 0.72:
|
if iou > 0.72:
|
||||||
merged[i] = boxes_union_xyxy([b, m])
|
merged[i] = boxes_union_xyxy([b, m])
|
||||||
@@ -381,7 +504,7 @@ def build_line_boxes_from_indices(indices, ocr):
|
|||||||
|
|
||||||
|
|
||||||
# ─────────────────────────────────────────────
|
# ─────────────────────────────────────────────
|
||||||
# GROUPING
|
# GROUPING
|
||||||
# ─────────────────────────────────────────────
|
# ─────────────────────────────────────────────
|
||||||
def auto_gap(image_path, base=18, ref_w=750):
|
def auto_gap(image_path, base=18, ref_w=750):
|
||||||
img = cv2.imread(image_path)
|
img = cv2.imread(image_path)
|
||||||
@@ -426,7 +549,13 @@ def group_tokens(ocr, image_shape, gap_px=18, bbox_padding=3):
|
|||||||
for i in range(n):
|
for i in range(n):
|
||||||
groups.setdefault(find(i), []).append(i)
|
groups.setdefault(find(i), []).append(i)
|
||||||
|
|
||||||
sorted_groups = sorted(groups.values(), key=lambda idxs: (min(boxes[i][1] for i in idxs), min(boxes[i][0] for i in idxs)))
|
sorted_groups = sorted(
|
||||||
|
groups.values(),
|
||||||
|
key=lambda idxs: (
|
||||||
|
min(boxes[i][1] for i in idxs),
|
||||||
|
min(boxes[i][0] for i in idxs)
|
||||||
|
)
|
||||||
|
)
|
||||||
|
|
||||||
bubbles = {}
|
bubbles = {}
|
||||||
bubble_boxes = {}
|
bubble_boxes = {}
|
||||||
@@ -436,6 +565,7 @@ def group_tokens(ocr, image_shape, gap_px=18, bbox_padding=3):
|
|||||||
ih, iw = image_shape[:2]
|
ih, iw = image_shape[:2]
|
||||||
for bid, idxs in enumerate(sorted_groups, start=1):
|
for bid, idxs in enumerate(sorted_groups, start=1):
|
||||||
idxs = sorted(idxs, key=lambda k: boxes[k][1])
|
idxs = sorted(idxs, key=lambda k: boxes[k][1])
|
||||||
|
|
||||||
lines = build_lines_from_indices(idxs, ocr)
|
lines = build_lines_from_indices(idxs, ocr)
|
||||||
quads = [ocr[k][0] for k in idxs]
|
quads = [ocr[k][0] for k in idxs]
|
||||||
ub = boxes_union_xyxy([quad_bbox(q) for q in quads])
|
ub = boxes_union_xyxy([quad_bbox(q) for q in quads])
|
||||||
@@ -443,8 +573,10 @@ def group_tokens(ocr, image_shape, gap_px=18, bbox_padding=3):
|
|||||||
continue
|
continue
|
||||||
|
|
||||||
x1, y1, x2, y2 = ub
|
x1, y1, x2, y2 = ub
|
||||||
x1 = max(0, x1 - bbox_padding); y1 = max(0, y1 - bbox_padding)
|
x1 = max(0, x1 - bbox_padding)
|
||||||
x2 = min(iw, x2 + bbox_padding); y2 = min(ih, y2 + bbox_padding)
|
y1 = max(0, y1 - bbox_padding)
|
||||||
|
x2 = min(iw, x2 + bbox_padding)
|
||||||
|
y2 = min(ih, y2 + bbox_padding)
|
||||||
|
|
||||||
bubbles[bid] = lines
|
bubbles[bid] = lines
|
||||||
bubble_boxes[bid] = (x1, y1, x2, y2)
|
bubble_boxes[bid] = (x1, y1, x2, y2)
|
||||||
@@ -455,23 +587,24 @@ def group_tokens(ocr, image_shape, gap_px=18, bbox_padding=3):
|
|||||||
|
|
||||||
|
|
||||||
# ─────────────────────────────────────────────
|
# ─────────────────────────────────────────────
|
||||||
# DEBUG
|
# DEBUG
|
||||||
# ─────────────────────────────────────────────
|
# ─────────────────────────────────────────────
|
||||||
def save_debug_clusters(image_path, ocr, bubble_boxes, bubble_indices, out_path="debug_clusters.png"):
|
def save_debug_clusters(image_path, ocr, bubble_boxes, bubble_indices, out_path="debug_clusters.png"):
|
||||||
img = cv2.imread(image_path)
|
img = cv2.imread(image_path)
|
||||||
if img is None:
|
if img is None:
|
||||||
return
|
return
|
||||||
|
|
||||||
# token quads
|
# OCR token quads
|
||||||
for bbox, txt, conf in ocr:
|
for bbox, txt, conf in ocr:
|
||||||
pts = np.array(bbox, dtype=np.int32)
|
pts = np.array(bbox, dtype=np.int32)
|
||||||
cv2.polylines(img, [pts], True, (180, 180, 180), 1)
|
cv2.polylines(img, [pts], True, (180, 180, 180), 1)
|
||||||
|
|
||||||
# bubble boxes + yellow line boxes
|
# Bubble + line boxes
|
||||||
for bid, bb in bubble_boxes.items():
|
for bid, bb in bubble_boxes.items():
|
||||||
x1, y1, x2, y2 = bb
|
x1, y1, x2, y2 = bb
|
||||||
cv2.rectangle(img, (x1, y1), (x2, y2), (0, 220, 0), 2)
|
cv2.rectangle(img, (x1, y1), (x2, y2), (0, 220, 0), 2)
|
||||||
cv2.putText(img, f"BOX#{bid}", (x1 + 2, y1 + 16), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 220, 0), 2)
|
cv2.putText(img, f"BOX#{bid}", (x1 + 2, y1 + 16),
|
||||||
|
cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 220, 0), 2)
|
||||||
|
|
||||||
idxs = bubble_indices.get(bid, [])
|
idxs = bubble_indices.get(bid, [])
|
||||||
line_boxes = build_line_boxes_from_indices(idxs, ocr)
|
line_boxes = build_line_boxes_from_indices(idxs, ocr)
|
||||||
@@ -485,7 +618,7 @@ def save_debug_clusters(image_path, ocr, bubble_boxes, bubble_indices, out_path=
|
|||||||
|
|
||||||
|
|
||||||
# ─────────────────────────────────────────────
|
# ─────────────────────────────────────────────
|
||||||
# EXPORT
|
# EXPORT
|
||||||
# ─────────────────────────────────────────────
|
# ─────────────────────────────────────────────
|
||||||
def estimate_reading_order(bbox_dict, mode="ltr"):
|
def estimate_reading_order(bbox_dict, mode="ltr"):
|
||||||
items = []
|
items = []
|
||||||
@@ -494,7 +627,7 @@ def estimate_reading_order(bbox_dict, mode="ltr"):
|
|||||||
cy = (y1 + y2) / 2.0
|
cy = (y1 + y2) / 2.0
|
||||||
items.append((bid, cx, cy))
|
items.append((bid, cx, cy))
|
||||||
|
|
||||||
items.sort(key=lambda t: t[2])
|
items.sort(key=lambda t: t[2]) # top to bottom
|
||||||
|
|
||||||
rows = []
|
rows = []
|
||||||
tol = 90
|
tol = 90
|
||||||
@@ -517,7 +650,6 @@ def estimate_reading_order(bbox_dict, mode="ltr"):
|
|||||||
|
|
||||||
return {bid: i + 1 for i, bid in enumerate(order)}
|
return {bid: i + 1 for i, bid in enumerate(order)}
|
||||||
|
|
||||||
|
|
||||||
def export_bubbles(filepath, bbox_dict, quads_dict, indices_dict, ocr, reading_map, image_shape):
|
def export_bubbles(filepath, bbox_dict, quads_dict, indices_dict, ocr, reading_map, image_shape):
|
||||||
out = {}
|
out = {}
|
||||||
|
|
||||||
@@ -536,11 +668,15 @@ def export_bubbles(filepath, bbox_dict, quads_dict, indices_dict, ocr, reading_m
|
|||||||
out[str(bid)] = {
|
out[str(bid)] = {
|
||||||
"x": int(x1), "y": int(y1), "w": int(x2 - x1), "h": int(y2 - y1),
|
"x": int(x1), "y": int(y1), "w": int(x2 - x1), "h": int(y2 - y1),
|
||||||
"reading_order": int(reading_map.get(bid, bid)),
|
"reading_order": int(reading_map.get(bid, bid)),
|
||||||
"quad_bboxes": [{"x": int(b[0]), "y": int(b[1]), "w": int(b[2]-b[0]), "h": int(b[3]-b[1])} for b in qboxes],
|
"quad_bboxes": [
|
||||||
"quads": [[[int(p[0]), int(p[1])] for p in q] for q in quads],
|
{"x": int(b[0]), "y": int(b[1]), "w": int(b[2] - b[0]), "h": int(b[3] - b[1])}
|
||||||
|
for b in qboxes
|
||||||
|
],
|
||||||
|
"quads": [
|
||||||
|
[[int(p[0]), int(p[1])] for p in q] for q in quads
|
||||||
|
],
|
||||||
"text_bbox": xyxy_to_xywh(text_union),
|
"text_bbox": xyxy_to_xywh(text_union),
|
||||||
|
|
||||||
# yellow geometry
|
|
||||||
"line_bboxes": [xyxy_to_xywh(lb) for lb in line_boxes_xyxy],
|
"line_bboxes": [xyxy_to_xywh(lb) for lb in line_boxes_xyxy],
|
||||||
"line_union_bbox": xyxy_to_xywh(line_union_xyxy) if line_union_xyxy else None,
|
"line_union_bbox": xyxy_to_xywh(line_union_xyxy) if line_union_xyxy else None,
|
||||||
"line_union_area": int(line_union_area),
|
"line_union_area": int(line_union_area),
|
||||||
@@ -551,7 +687,7 @@ def export_bubbles(filepath, bbox_dict, quads_dict, indices_dict, ocr, reading_m
|
|||||||
|
|
||||||
|
|
||||||
# ─────────────────────────────────────────────
|
# ─────────────────────────────────────────────
|
||||||
# MAIN
|
# MAIN
|
||||||
# ─────────────────────────────────────────────
|
# ─────────────────────────────────────────────
|
||||||
def translate_manga_text(
|
def translate_manga_text(
|
||||||
image_path,
|
image_path,
|
||||||
@@ -606,6 +742,7 @@ def translate_manga_text(
|
|||||||
skipped += 1
|
skipped += 1
|
||||||
continue
|
continue
|
||||||
|
|
||||||
|
# reduce false positives in very top strip
|
||||||
if qb[1] < int(ih * TOP_BAND_RATIO):
|
if qb[1] < int(ih * TOP_BAND_RATIO):
|
||||||
if conf < 0.70 and len(t) >= 5:
|
if conf < 0.70 and len(t) >= 5:
|
||||||
skipped += 1
|
skipped += 1
|
||||||
@@ -633,14 +770,28 @@ def translate_manga_text(
|
|||||||
|
|
||||||
translator = GoogleTranslator(source=source_lang, target=target_lang)
|
translator = GoogleTranslator(source=source_lang, target=target_lang)
|
||||||
|
|
||||||
|
# robust bubble text cleanup
|
||||||
clean_lines = {}
|
clean_lines = {}
|
||||||
for bid, lines in bubbles.items():
|
for bid, lines in bubbles.items():
|
||||||
txt = normalize_text(" ".join(lines))
|
base_txt = normalize_text(" ".join(lines))
|
||||||
q = ocr_quality_score(txt)
|
base_sc = ocr_candidate_score(base_txt)
|
||||||
if q < quality_threshold:
|
|
||||||
reread = reread_crop(image, bubble_boxes[bid], reader, upscale=2.5, pad=18)
|
# only robust reread on low quality
|
||||||
if reread:
|
if base_sc < quality_threshold:
|
||||||
txt = normalize_text(reread)
|
rr_txt, rr_sc = reread_crop_robust(
|
||||||
|
image,
|
||||||
|
bubble_boxes[bid],
|
||||||
|
reader,
|
||||||
|
upscale=3.0,
|
||||||
|
pad=22
|
||||||
|
)
|
||||||
|
if rr_txt and rr_sc > base_sc + 0.06:
|
||||||
|
txt = rr_txt
|
||||||
|
else:
|
||||||
|
txt = base_txt
|
||||||
|
else:
|
||||||
|
txt = base_txt
|
||||||
|
|
||||||
clean_lines[bid] = apply_glossary(txt)
|
clean_lines[bid] = apply_glossary(txt)
|
||||||
|
|
||||||
reading_map = estimate_reading_order(bubble_boxes, mode=reading_mode)
|
reading_map = estimate_reading_order(bubble_boxes, mode=reading_mode)
|
||||||
@@ -657,18 +808,24 @@ def translate_manga_text(
|
|||||||
src = clean_lines[bid].strip()
|
src = clean_lines[bid].strip()
|
||||||
if not src:
|
if not src:
|
||||||
continue
|
continue
|
||||||
flags = []
|
|
||||||
|
|
||||||
|
flags = []
|
||||||
try:
|
try:
|
||||||
tgt = translator.translate(src) or ""
|
tgt = translator.translate(src) or ""
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
tgt = f"[Translation error: {e}]"
|
tgt = f"[Translation error: {e}]"
|
||||||
|
flags.append("TRANSLATION_ERROR")
|
||||||
|
|
||||||
tgt = apply_glossary(postprocess_translation_general(tgt)).upper()
|
tgt = apply_glossary(postprocess_translation_general(tgt)).upper()
|
||||||
src_u = src.upper()
|
src_u = src.upper()
|
||||||
|
|
||||||
out_lines.append(f"#{bid}|{reading_map.get(bid,bid)}|{src_u}|{tgt}|{','.join(flags) if flags else '-'}")
|
out_lines.append(
|
||||||
print(f"#{bid:<7} {reading_map.get(bid,bid):<6} {src_u[:50]:<50} {tgt[:50]:<50} {','.join(flags) if flags else '-'}")
|
f"#{bid}|{reading_map.get(bid,bid)}|{src_u}|{tgt}|{','.join(flags) if flags else '-'}"
|
||||||
|
)
|
||||||
|
print(
|
||||||
|
f"#{bid:<7} {reading_map.get(bid,bid):<6} "
|
||||||
|
f"{src_u[:50]:<50} {tgt[:50]:<50} {','.join(flags) if flags else '-'}"
|
||||||
|
)
|
||||||
translated_count += 1
|
translated_count += 1
|
||||||
|
|
||||||
out_lines.append(divider)
|
out_lines.append(divider)
|
||||||
@@ -691,13 +848,13 @@ def translate_manga_text(
|
|||||||
print(f"Saved: {export_to_file}")
|
print(f"Saved: {export_to_file}")
|
||||||
print(f"Saved: {export_bubbles_to}")
|
print(f"Saved: {export_bubbles_to}")
|
||||||
if debug:
|
if debug:
|
||||||
print("Saved: debug_clusters.png (special chars included in yellow boxes)")
|
print("Saved: debug_clusters.png")
|
||||||
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
translate_manga_text(
|
translate_manga_text(
|
||||||
image_path="002-page.png",
|
image_path="001-page.png",
|
||||||
source_lang="en",
|
source_lang="it",
|
||||||
target_lang="ca",
|
target_lang="ca",
|
||||||
confidence_threshold=0.12,
|
confidence_threshold=0.12,
|
||||||
min_text_length=1,
|
min_text_length=1,
|
||||||
|
|||||||
209
pipeline.py
209
pipeline.py
@@ -2,60 +2,76 @@
|
|||||||
"""
|
"""
|
||||||
pipeline.py
|
pipeline.py
|
||||||
───────────────────────────────────────────────────────────────
|
───────────────────────────────────────────────────────────────
|
||||||
Translation-only pipeline for Dandadan_059_2022_Digital
|
Translation + render pipeline
|
||||||
|
|
||||||
Flow per page:
|
Flow per page:
|
||||||
1. Run translate_manga_text() → output.txt + bubbles.json
|
1) translate_manga_text() -> output.txt + bubbles.json (+ debug_clusters.png if DEBUG)
|
||||||
2. Copy original image to workdir for reference
|
2) render_translations() -> page_translated.png
|
||||||
|
3) Pack CBZ with originals + rendered pages + text outputs
|
||||||
|
|
||||||
Folder structure produced:
|
Folder structure:
|
||||||
Dandadan_059_2022_Digital_1r0n/
|
<CHAPTER_DIR>/
|
||||||
|
├── 000.png
|
||||||
|
├── 001.png
|
||||||
└── translated/
|
└── translated/
|
||||||
├── 00/
|
├── 000/
|
||||||
│ ├── output.txt ← translations to review
|
│ ├── output.txt
|
||||||
│ ├── bubbles.json ← bubble boxes
|
│ ├── bubbles.json
|
||||||
│ └── debug_clusters.png ← cluster debug (if DEBUG=True)
|
│ ├── page_translated.png
|
||||||
├── 01/
|
│ └── debug_clusters.png (optional)
|
||||||
|
├── 001/
|
||||||
│ └── ...
|
│ └── ...
|
||||||
└── ...
|
└── ...
|
||||||
|
|
||||||
Dandadan_059_translated.cbz ← original pages + translations
|
CBZ:
|
||||||
zipped for reference
|
- pages/<original pages>
|
||||||
|
- rendered/<page_stem>_translated.png
|
||||||
|
- translations/<page_stem>_output.txt
|
||||||
"""
|
"""
|
||||||
|
|
||||||
import os
|
import os
|
||||||
import sys
|
import sys
|
||||||
import shutil
|
|
||||||
import zipfile
|
import zipfile
|
||||||
import importlib.util
|
import importlib.util
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
|
|
||||||
|
|
||||||
# ─────────────────────────────────────────────
|
# ─────────────────────────────────────────────
|
||||||
# CONFIG — edit these as needed
|
# CONFIG
|
||||||
# ─────────────────────────────────────────────
|
# ─────────────────────────────────────────────
|
||||||
CHAPTER_DIR = "/Users/guillemhernandezsola/Downloads/Spy_x_Family_076_2023_Digital_1r0n"
|
CHAPTER_DIR = "/Users/guillemhernandezsola/Downloads/Dandadan_059_2022_Digital_1r0n"
|
||||||
OUTPUT_CBZ = "/Users/guillemhernandezsola/Downloads/Spy_x_Family_076_2023_Digital_1r0n_translated.cbz"
|
OUTPUT_CBZ = "/Users/guillemhernandezsola/Downloads/Dandadan_059_2022_Digital_1r0n_translated.cbz"
|
||||||
SOURCE_LANG = "en"
|
|
||||||
TARGET_LANG = "ca"
|
|
||||||
|
|
||||||
# manga-translator.py settings
|
SOURCE_LANG = "en"
|
||||||
|
TARGET_LANG = "ca"
|
||||||
|
|
||||||
|
# translator (NEW signature-compatible)
|
||||||
CONFIDENCE_THRESHOLD = 0.10
|
CONFIDENCE_THRESHOLD = 0.10
|
||||||
MIN_TEXT_LENGTH = 2
|
MIN_TEXT_LENGTH = 1
|
||||||
CLUSTER_EPS = "auto"
|
GAP_PX = "auto" # was cluster/proximity in old version
|
||||||
PROXIMITY_PX = 80
|
|
||||||
FILTER_SFX = True
|
FILTER_SFX = True
|
||||||
QUALITY_THRESHOLD = 0.5
|
QUALITY_THRESHOLD = 0.50
|
||||||
UPSCALE_FACTOR = 2.5
|
READING_MODE = "ltr"
|
||||||
BBOX_PADDING = 5
|
|
||||||
DEBUG = True
|
DEBUG = True
|
||||||
|
|
||||||
|
# renderer
|
||||||
|
RENDER_ENABLED = True
|
||||||
|
RENDER_OUTPUT_NAME = "page_translated.png"
|
||||||
|
|
||||||
|
# optional custom font list for renderer
|
||||||
|
FONT_CANDIDATES = [
|
||||||
|
"fonts/ComicNeue-Regular.ttf",
|
||||||
|
"fonts/ComicRelief-Regular.ttf"
|
||||||
|
]
|
||||||
|
|
||||||
|
|
||||||
# ─────────────────────────────────────────────
|
# ─────────────────────────────────────────────
|
||||||
# DYNAMIC MODULE LOADER
|
# DYNAMIC MODULE LOADER
|
||||||
# ─────────────────────────────────────────────
|
# ─────────────────────────────────────────────
|
||||||
def load_module(name, filepath):
|
def load_module(name, filepath):
|
||||||
spec = importlib.util.spec_from_file_location(name, filepath)
|
spec = importlib.util.spec_from_file_location(name, filepath)
|
||||||
|
if spec is None or spec.loader is None:
|
||||||
|
raise FileNotFoundError(f"Cannot load spec for {filepath}")
|
||||||
module = importlib.util.module_from_spec(spec)
|
module = importlib.util.module_from_spec(spec)
|
||||||
spec.loader.exec_module(module)
|
spec.loader.exec_module(module)
|
||||||
return module
|
return module
|
||||||
@@ -65,10 +81,10 @@ def load_module(name, filepath):
|
|||||||
# HELPERS
|
# HELPERS
|
||||||
# ─────────────────────────────────────────────
|
# ─────────────────────────────────────────────
|
||||||
def sorted_pages(chapter_dir):
|
def sorted_pages(chapter_dir):
|
||||||
exts = {".jpg", ".jpeg", ".png", ".webp"}
|
exts = {".jpg", ".jpeg", ".png", ".webp"}
|
||||||
pages = [
|
pages = [
|
||||||
p for p in Path(chapter_dir).iterdir()
|
p for p in Path(chapter_dir).iterdir()
|
||||||
if p.suffix.lower() in exts
|
if p.is_file() and p.suffix.lower() in exts
|
||||||
]
|
]
|
||||||
return sorted(pages, key=lambda p: p.stem)
|
return sorted(pages, key=lambda p: p.stem)
|
||||||
|
|
||||||
@@ -80,82 +96,97 @@ def make_page_workdir(chapter_dir, page_stem):
|
|||||||
|
|
||||||
|
|
||||||
def pack_cbz(chapter_dir, translated_dir, output_cbz):
|
def pack_cbz(chapter_dir, translated_dir, output_cbz):
|
||||||
"""
|
exts = {".jpg", ".jpeg", ".png", ".webp"}
|
||||||
Packs into CBZ:
|
|
||||||
- All original pages (from chapter_dir root)
|
pages = sorted(
|
||||||
- All output.txt (one per page subfolder)
|
|
||||||
Sorted by page stem for correct reading order.
|
|
||||||
"""
|
|
||||||
exts = {".jpg", ".jpeg", ".png", ".webp"}
|
|
||||||
pages = sorted(
|
|
||||||
[p for p in Path(chapter_dir).iterdir()
|
[p for p in Path(chapter_dir).iterdir()
|
||||||
if p.suffix.lower() in exts],
|
if p.is_file() and p.suffix.lower() in exts],
|
||||||
key=lambda p: p.stem
|
key=lambda p: p.stem
|
||||||
)
|
)
|
||||||
txts = sorted(
|
|
||||||
|
txts = sorted(
|
||||||
translated_dir.rglob("output.txt"),
|
translated_dir.rglob("output.txt"),
|
||||||
key=lambda p: p.parent.name
|
key=lambda p: p.parent.name
|
||||||
)
|
)
|
||||||
|
|
||||||
|
rendered = sorted(
|
||||||
|
translated_dir.rglob(RENDER_OUTPUT_NAME),
|
||||||
|
key=lambda p: p.parent.name
|
||||||
|
)
|
||||||
|
|
||||||
if not pages:
|
if not pages:
|
||||||
print("⚠️ No original pages found — CBZ not created.")
|
print("⚠️ No original pages found — CBZ not created.")
|
||||||
return
|
return
|
||||||
|
|
||||||
with zipfile.ZipFile(output_cbz, "w",
|
with zipfile.ZipFile(output_cbz, "w", compression=zipfile.ZIP_STORED) as zf:
|
||||||
compression=zipfile.ZIP_STORED) as zf:
|
# original pages
|
||||||
# Original pages
|
|
||||||
for img in pages:
|
for img in pages:
|
||||||
arcname = f"pages/{img.name}"
|
arcname = f"pages/{img.name}"
|
||||||
zf.write(img, arcname)
|
zf.write(img, arcname)
|
||||||
print(f" 🖼 {arcname}")
|
print(f" 🖼 {arcname}")
|
||||||
|
|
||||||
# Translation text files
|
# rendered pages
|
||||||
|
for rp in rendered:
|
||||||
|
arcname = f"rendered/{rp.parent.name}_translated.png"
|
||||||
|
zf.write(rp, arcname)
|
||||||
|
print(f" 🎨 {arcname}")
|
||||||
|
|
||||||
|
# text outputs
|
||||||
for txt in txts:
|
for txt in txts:
|
||||||
arcname = f"translations/{txt.parent.name}_output.txt"
|
arcname = f"translations/{txt.parent.name}_output.txt"
|
||||||
zf.write(txt, arcname)
|
zf.write(txt, arcname)
|
||||||
print(f" 📄 {arcname}")
|
print(f" 📄 {arcname}")
|
||||||
|
|
||||||
print(f"\n✅ CBZ saved → {output_cbz} "
|
print(
|
||||||
f"({len(pages)} page(s), {len(txts)} translation(s))")
|
f"\n✅ CBZ saved → {output_cbz} "
|
||||||
|
f"({len(pages)} original, {len(rendered)} rendered, {len(txts)} text)"
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
# ─────────────────────────────────────────────
|
# ─────────────────────────────────────────────
|
||||||
# PER-PAGE PIPELINE
|
# PER-PAGE PIPELINE
|
||||||
# ─────────────────────────────────────────────
|
# ─────────────────────────────────────────────
|
||||||
def process_page(page_path, workdir, translator_module):
|
def process_page(page_path, workdir, translator_module, renderer_module):
|
||||||
"""
|
"""
|
||||||
Runs translator for a single page.
|
Runs translator + renderer for one page.
|
||||||
All output files land in workdir.
|
All generated files are written inside workdir.
|
||||||
Returns True on success, False on failure.
|
|
||||||
"""
|
"""
|
||||||
print(f"\n{'─'*60}")
|
print(f"\n{'─' * 70}")
|
||||||
print(f" PAGE: {page_path.name}")
|
print(f"PAGE: {page_path.name}")
|
||||||
print(f"{'─'*60}")
|
print(f"{'─' * 70}")
|
||||||
|
|
||||||
orig_dir = os.getcwd()
|
orig_dir = os.getcwd()
|
||||||
try:
|
try:
|
||||||
# chdir into workdir so debug_clusters.png,
|
|
||||||
# temp files etc. all land there
|
|
||||||
os.chdir(workdir)
|
os.chdir(workdir)
|
||||||
|
|
||||||
|
# 1) translate
|
||||||
translator_module.translate_manga_text(
|
translator_module.translate_manga_text(
|
||||||
image_path = str(page_path.resolve()),
|
image_path= str(page_path.resolve()),
|
||||||
source_lang = SOURCE_LANG,
|
source_lang=SOURCE_LANG,
|
||||||
target_lang = TARGET_LANG,
|
target_lang=TARGET_LANG,
|
||||||
confidence_threshold = CONFIDENCE_THRESHOLD,
|
confidence_threshold=CONFIDENCE_THRESHOLD,
|
||||||
export_to_file = "output.txt",
|
min_text_length=MIN_TEXT_LENGTH,
|
||||||
export_bubbles_to = "bubbles.json",
|
gap_px=GAP_PX,
|
||||||
min_text_length = MIN_TEXT_LENGTH,
|
filter_sound_effects=FILTER_SFX,
|
||||||
cluster_eps = CLUSTER_EPS,
|
quality_threshold=QUALITY_THRESHOLD,
|
||||||
proximity_px = PROXIMITY_PX,
|
export_to_file="output.txt",
|
||||||
filter_sound_effects = FILTER_SFX,
|
export_bubbles_to="bubbles.json",
|
||||||
quality_threshold = QUALITY_THRESHOLD,
|
reading_mode=READING_MODE,
|
||||||
upscale_factor = UPSCALE_FACTOR,
|
debug=DEBUG
|
||||||
bbox_padding = BBOX_PADDING,
|
|
||||||
debug = DEBUG,
|
|
||||||
)
|
)
|
||||||
|
print(" ✅ translator done")
|
||||||
|
|
||||||
|
# 2) render
|
||||||
|
if RENDER_ENABLED:
|
||||||
|
renderer_module.render_translations(
|
||||||
|
input_image=str(page_path.resolve()),
|
||||||
|
output_image=RENDER_OUTPUT_NAME,
|
||||||
|
translations_file="output.txt",
|
||||||
|
bubbles_file="bubbles.json",
|
||||||
|
font_candidates=FONT_CANDIDATES
|
||||||
|
)
|
||||||
|
print(" ✅ renderer done")
|
||||||
|
|
||||||
print(f" ✅ Translated → {workdir}")
|
|
||||||
return True
|
return True
|
||||||
|
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
@@ -170,16 +201,20 @@ def process_page(page_path, workdir, translator_module):
|
|||||||
# MAIN
|
# MAIN
|
||||||
# ─────────────────────────────────────────────
|
# ─────────────────────────────────────────────
|
||||||
def main():
|
def main():
|
||||||
# ── Load translator module ────────────────────────────────────
|
print("Loading modules...")
|
||||||
print("Loading manga-translator.py...")
|
|
||||||
try:
|
try:
|
||||||
translator = load_module(
|
translator = load_module("manga_translator", "manga-translator.py")
|
||||||
"manga_translator", "manga-translator.py")
|
except Exception as e:
|
||||||
except FileNotFoundError as e:
|
print(f"❌ Could not load manga-translator.py: {e}")
|
||||||
print(f"❌ Could not load module: {e}")
|
sys.exit(1)
|
||||||
|
|
||||||
|
try:
|
||||||
|
renderer = load_module("manga_renderer", "manga-renderer.py")
|
||||||
|
except Exception as e:
|
||||||
|
print(f"❌ Could not load manga-renderer.py: {e}")
|
||||||
sys.exit(1)
|
sys.exit(1)
|
||||||
|
|
||||||
# ── Discover pages ────────────────────────────────────────────
|
|
||||||
pages = sorted_pages(CHAPTER_DIR)
|
pages = sorted_pages(CHAPTER_DIR)
|
||||||
if not pages:
|
if not pages:
|
||||||
print(f"❌ No images found in: {CHAPTER_DIR}")
|
print(f"❌ No images found in: {CHAPTER_DIR}")
|
||||||
@@ -187,33 +222,31 @@ def main():
|
|||||||
|
|
||||||
print(f"\n📖 Chapter : {CHAPTER_DIR}")
|
print(f"\n📖 Chapter : {CHAPTER_DIR}")
|
||||||
print(f" Pages : {len(pages)}")
|
print(f" Pages : {len(pages)}")
|
||||||
print(f" Source : {SOURCE_LANG} → Target: {TARGET_LANG}\n")
|
print(f" Source : {SOURCE_LANG} → Target: {TARGET_LANG}")
|
||||||
|
print(f" Render : {'ON' if RENDER_ENABLED else 'OFF'}\n")
|
||||||
|
|
||||||
# ── Process each page ─────────────────────────────────────────
|
|
||||||
translated_dir = Path(CHAPTER_DIR) / "translated"
|
translated_dir = Path(CHAPTER_DIR) / "translated"
|
||||||
succeeded = []
|
succeeded = []
|
||||||
failed = []
|
failed = []
|
||||||
|
|
||||||
for i, page_path in enumerate(pages, start=1):
|
for i, page_path in enumerate(pages, start=1):
|
||||||
print(f"\n[{i}/{len(pages)}] {page_path.name}")
|
print(f"[{i}/{len(pages)}] {page_path.name}")
|
||||||
workdir = make_page_workdir(CHAPTER_DIR, page_path.stem)
|
workdir = make_page_workdir(CHAPTER_DIR, page_path.stem)
|
||||||
ok = process_page(page_path, workdir, translator)
|
ok = process_page(page_path, workdir, translator, renderer)
|
||||||
if ok:
|
if ok:
|
||||||
succeeded.append(page_path.name)
|
succeeded.append(page_path.name)
|
||||||
else:
|
else:
|
||||||
failed.append(page_path.name)
|
failed.append(page_path.name)
|
||||||
|
|
||||||
# ── Summary ───────────────────────────────────────────────────
|
print(f"\n{'═' * 70}")
|
||||||
print(f"\n{'═'*60}")
|
print("PIPELINE COMPLETE")
|
||||||
print(f" PIPELINE COMPLETE")
|
print(f"✅ {len(succeeded)} page(s) succeeded")
|
||||||
print(f" ✅ {len(succeeded)} page(s) succeeded")
|
|
||||||
if failed:
|
if failed:
|
||||||
print(f" ❌ {len(failed)} page(s) failed:")
|
print(f"❌ {len(failed)} page(s) failed:")
|
||||||
for f in failed:
|
for f in failed:
|
||||||
print(f" • {f}")
|
print(f" • {f}")
|
||||||
print(f"{'═'*60}\n")
|
print(f"{'═' * 70}\n")
|
||||||
|
|
||||||
# ── Pack CBZ ──────────────────────────────────────────────────
|
|
||||||
print("Packing CBZ...")
|
print("Packing CBZ...")
|
||||||
pack_cbz(CHAPTER_DIR, translated_dir, OUTPUT_CBZ)
|
pack_cbz(CHAPTER_DIR, translated_dir, OUTPUT_CBZ)
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user