Added some fixes

This commit is contained in:
Guillem Hernandez Sola
2026-04-14 20:08:51 +02:00
parent 0069da706b
commit f95b7d32d4
5 changed files with 359 additions and 171 deletions

BIN
fonts/ComicNeue-Regular.ttf Executable file

Binary file not shown.

Binary file not shown.

View File

@@ -10,10 +10,8 @@ from PIL import Image, ImageDraw, ImageFont
# CONFIG
# ─────────────────────────────────────────────
DEFAULT_FONT_CANDIDATES = [
"fonts/AnimeAce2_reg.ttf",
"fonts/WildWordsRoman.ttf",
"fonts/ComicRelief-Regular.ttf",
"fonts/NotoSans-Regular.ttf",
"fonts/ComicNeue-Regular.ttf",
]
DEFAULT_FONT_COLOR = (0, 0, 0)
DEFAULT_STROKE_COLOR = (255, 255, 255)
@@ -501,7 +499,7 @@ def render_translations(
if __name__ == "__main__":
render_translations(
input_image="002-page.png",
input_image="001-page.png",
output_image="page_translated.png",
translations_file="output.txt",
bubbles_file="bubbles.json",

View File

@@ -1,3 +1,6 @@
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
import re
import os
import json
@@ -8,7 +11,7 @@ from deep_translator import GoogleTranslator
# ─────────────────────────────────────────────
# CONFIG
# CONFIG
# ─────────────────────────────────────────────
GLOSSARY = {
"ANYA": "ANYA",
@@ -32,17 +35,17 @@ TITLE_PATTERNS = [
NOISE_PATTERNS = [
r"^[^a-zA-Z0-9\?!.]+$",
r"^BOX[0-9A-Z]*$",
r"^BOX[0-9A-Z#\s]*$",
]
TOP_BAND_RATIO = 0.08
# ─────────────────────────────────────────────
# TEXT HELPERS
# TEXT HELPERS
# ─────────────────────────────────────────────
def normalize_text(text):
t = text.strip().upper()
def normalize_text(text: str) -> str:
t = (text or "").strip().upper()
t = t.replace("", "\"").replace("", "\"")
t = t.replace("", "'").replace("", "'")
t = t.replace("", "...")
@@ -54,13 +57,13 @@ def normalize_text(text):
t = re.sub(r",\?", "?", t)
return t.strip()
def apply_glossary(text):
out = text
def apply_glossary(text: str) -> str:
out = text or ""
for k in sorted(GLOSSARY.keys(), key=len, reverse=True):
out = re.sub(rf"\b{re.escape(k)}\b", GLOSSARY[k], out, flags=re.IGNORECASE)
return out
def postprocess_translation_general(text):
def postprocess_translation_general(text: str) -> str:
t = normalize_text(text)
t = re.sub(r"\s{2,}", " ", t).strip()
t = re.sub(r"([!?]){3,}", r"\1\1", t)
@@ -69,23 +72,23 @@ def postprocess_translation_general(text):
# ─────────────────────────────────────────────
# FILTERS
# FILTERS
# ─────────────────────────────────────────────
def is_sound_effect(text):
cleaned = re.sub(r"[^a-z]", "", text.strip().lower())
def is_sound_effect(text: str) -> bool:
cleaned = re.sub(r"[^a-z]", "", (text or "").strip().lower())
return any(re.fullmatch(p, cleaned, re.IGNORECASE) for p in SOUND_EFFECT_PATTERNS)
def is_title_text(text):
t = text.strip().lower()
def is_title_text(text: str) -> bool:
t = (text or "").strip().lower()
return any(re.fullmatch(p, t, re.IGNORECASE) for p in TITLE_PATTERNS)
def is_noise_text(text):
t = text.strip()
def is_noise_text(text: str) -> bool:
t = (text or "").strip()
return any(re.fullmatch(p, t) for p in NOISE_PATTERNS)
# ─────────────────────────────────────────────
# GEOMETRY
# GEOMETRY
# ─────────────────────────────────────────────
def quad_bbox(quad):
xs = [p[0] for p in quad]
@@ -127,9 +130,9 @@ def overlap_or_near(a, b, gap=0):
# ─────────────────────────────────────────────
# QUALITY
# QUALITY / SCORING
# ─────────────────────────────────────────────
def ocr_quality_score(text):
def ocr_quality_score(text: str) -> float:
if not text or len(text) < 2:
return 0.0
alpha_ratio = sum(1 for c in text if c.isalpha()) / max(1, len(text))
@@ -141,21 +144,75 @@ def ocr_quality_score(text):
bonus = 0.05 if re.search(r"[.!?]$", text) else 0.0
return max(0.0, min(1.0, alpha_ratio - penalty + bonus))
def ocr_candidate_score(text: str) -> float:
if not text:
return 0.0
t = text.strip()
n = len(t)
if n == 0:
return 0.0
alpha = sum(c.isalpha() for c in t) / n
spaces = sum(c.isspace() for c in t) / n
punct_ok = sum(c in ".,!?'-:;()[]\"" for c in t) / n
bad = len(re.findall(r"[^\w\s\.\,\!\?\-\'\:\;\(\)\[\]\"]", t)) / n
penalty = 0.0
if re.search(r"\b[A-Z]\b", t):
penalty += 0.05
if re.search(r"[0-9]{2,}", t):
penalty += 0.08
if re.search(r"(..)\1\1", t):
penalty += 0.08
score = (0.62 * alpha) + (0.10 * spaces) + (0.20 * punct_ok) - (0.45 * bad) - penalty
return max(0.0, min(1.0, score))
# ─────────────────────────────────────────────
# OCR RE-READ
# OCR MULTI-PASS
# ─────────────────────────────────────────────
def preprocess_variant(crop_bgr, mode):
gray = cv2.cvtColor(crop_bgr, cv2.COLOR_BGR2GRAY)
if mode == "raw":
return gray
if mode == "clahe":
return cv2.createCLAHE(clipLimit=2.0, tileGridSize=(8, 8)).apply(gray)
if mode == "adaptive":
den = cv2.GaussianBlur(gray, (3, 3), 0)
return cv2.adaptiveThreshold(den, 255, cv2.ADAPTIVE_THRESH_GAUSSIAN_C, cv2.THRESH_BINARY, 35, 11)
return cv2.adaptiveThreshold(
den, 255, cv2.ADAPTIVE_THRESH_GAUSSIAN_C,
cv2.THRESH_BINARY, 35, 11
)
if mode == "otsu":
den = cv2.GaussianBlur(gray, (3, 3), 0)
_, th = cv2.threshold(den, 0, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU)
return th
if mode == "invert":
return 255 - gray
return gray
def rotate_image_keep_bounds(img, angle_deg):
h, w = img.shape[:2]
c = (w / 2, h / 2)
M = cv2.getRotationMatrix2D(c, angle_deg, 1.0)
cos = abs(M[0, 0])
sin = abs(M[0, 1])
new_w = int((h * sin) + (w * cos))
new_h = int((h * cos) + (w * sin))
M[0, 2] += (new_w / 2) - c[0]
M[1, 2] += (new_h / 2) - c[1]
return cv2.warpAffine(img, M, (new_w, new_h), flags=cv2.INTER_CUBIC, borderValue=255)
def run_ocr_on_array(reader, arr):
tmp = "_tmp_ocr.png"
cv2.imwrite(tmp, arr)
@@ -165,35 +222,104 @@ def run_ocr_on_array(reader, arr):
if os.path.exists(tmp):
os.remove(tmp)
def reread_crop(image, bbox, reader, upscale=2.5, pad=18):
def rebuild_text_from_ocr_result(res):
if not res:
return ""
norm = []
for item in res:
if len(item) != 3:
continue
bbox, txt, conf = item
if not txt or not txt.strip():
continue
b = quad_bbox(bbox)
xc = (b[0] + b[2]) / 2.0
yc = (b[1] + b[3]) / 2.0
h = max(1.0, b[3] - b[1])
norm.append((b, txt, conf, xc, yc, h))
if not norm:
return ""
med_h = float(np.median([x[5] for x in norm]))
row_tol = max(6.0, med_h * 0.75)
norm.sort(key=lambda z: z[4]) # y
rows = []
for it in norm:
placed = False
for r in rows:
if abs(it[4] - r["yc"]) <= row_tol:
r["m"].append(it)
r["yc"] = float(np.mean([k[4] for k in r["m"]]))
placed = True
break
if not placed:
rows.append({"yc": it[4], "m": [it]})
rows.sort(key=lambda r: r["yc"])
lines = []
for r in rows:
mem = sorted(r["m"], key=lambda z: z[3]) # x
line = normalize_text(" ".join(x[1] for x in mem))
if line:
lines.append(line)
return normalize_text(" ".join(lines))
def reread_crop_robust(image, bbox, reader, upscale=3.0, pad=22):
ih, iw = image.shape[:2]
x1, y1, x2, y2 = bbox
x1 = max(0, int(x1 - pad)); y1 = max(0, int(y1 - pad))
x2 = min(iw, int(x2 + pad)); y2 = min(ih, int(y2 + pad))
x1 = max(0, int(x1 - pad))
y1 = max(0, int(y1 - pad))
x2 = min(iw, int(x2 + pad))
y2 = min(ih, int(y2 + pad))
crop = image[y1:y2, x1:x2]
if crop.size == 0:
return None
return None, 0.0
up = cv2.resize(crop, (int(crop.shape[1] * upscale), int(crop.shape[0] * upscale)), interpolation=cv2.INTER_CUBIC)
up = cv2.resize(
crop,
(int(crop.shape[1] * upscale), int(crop.shape[0] * upscale)),
interpolation=cv2.INTER_CUBIC
)
best = None
for mode in ("raw", "clahe", "adaptive"):
modes = ["raw", "clahe", "adaptive", "otsu", "invert"]
angles = [0.0, 1.5, -1.5]
best_text, best_score = "", 0.0
for mode in modes:
proc = preprocess_variant(up, mode)
res = run_ocr_on_array(reader, proc)
if not res:
continue
res.sort(key=lambda r: (r[0][0][1], r[0][0][0]))
lines = [normalize_text(t) for _, t, _ in res if t.strip()]
merged = re.sub(r"\s{2,}", " ", " ".join(lines)).strip()
s = ocr_quality_score(merged)
if best is None or s > best[0]:
best = (s, merged)
return best[1] if best else None
if len(proc.shape) == 2:
proc3 = cv2.cvtColor(proc, cv2.COLOR_GRAY2BGR)
else:
proc3 = proc
for a in angles:
rot = rotate_image_keep_bounds(proc3, a)
if len(rot.shape) == 3:
rot_in = cv2.cvtColor(rot, cv2.COLOR_BGR2GRAY)
else:
rot_in = rot
res = run_ocr_on_array(reader, rot_in)
txt = rebuild_text_from_ocr_result(res)
sc = ocr_candidate_score(txt)
if sc > best_score:
best_text, best_score = txt, sc
if not best_text:
return None, 0.0
return best_text, best_score
# ─────────────────────────────────────────────
# LINES + YELLOW BOXES
# LINES + YELLOW BOXES
# ─────────────────────────────────────────────
def build_lines_from_indices(indices, ocr):
if not indices:
@@ -233,14 +359,13 @@ def build_lines_from_indices(indices, ocr):
return lines
def build_line_boxes_from_indices(indices, ocr):
"""
Robust yellow-box generation with punctuation attachment:
- row grouping
- chunking by x gap
- attach tiny punctuation/special tokens to nearest chunk
- coverage guarantee
- token coverage guarantee
"""
if not indices:
return []
@@ -264,13 +389,13 @@ def build_line_boxes_from_indices(indices, ocr):
pad = max(1, int(round(med_h * 0.12)))
def is_punct_like(t):
raw = t.strip()
raw = (t or "").strip()
if raw == "":
return True
punct_ratio = sum(1 for c in raw if not c.isalnum()) / max(1, len(raw))
return punct_ratio >= 0.5 or len(raw) <= 2
# 1) rows
# 1) row grouping
items_sorted = sorted(items, key=lambda x: x["yc"])
rows = []
for it in items_sorted:
@@ -296,7 +421,7 @@ def build_line_boxes_from_indices(indices, ocr):
normal = mem
punct = []
# 2) chunk normal tokens
# 2) chunk normal by x-gap
chunks = []
cur = [normal[0]]
for t in normal[1:]:
@@ -310,18 +435,17 @@ def build_line_boxes_from_indices(indices, ocr):
cur = [t]
chunks.append(cur)
# 3) attach punctuation tokens
# 3) attach punct tokens to nearest chunk
for p in punct:
pb = p["b"]
pxc, pyc = p["xc"], p["yc"]
best_k = -1
best_score = 1e18
for k, ch in enumerate(chunks):
ub = boxes_union_xyxy([x["b"] for x in ch])
cx = (ub[0] + ub[2]) / 2.0
cy = (ub[1] + ub[3]) / 2.0
dx = abs(pxc - cx)
dy = abs(pyc - cy)
score = dx + 1.8 * dy
@@ -339,22 +463,21 @@ def build_line_boxes_from_indices(indices, ocr):
else:
chunks.append([p])
# 4) chunk boxes
# 4) emit chunk boxes
for ch in chunks:
ub = boxes_union_xyxy([x["b"] for x in ch])
if ub:
x1, y1, x2, y2 = ub
out_boxes.append((x1 - pad, y1 - pad, x2 + pad, y2 + pad))
# 5) guarantee all tokens included
# 5) guarantee every token is inside some yellow box
token_boxes = [it["b"] for it in items]
def inside(tb, lb):
return tb[0] >= lb[0] and tb[1] >= lb[1] and tb[2] <= lb[2] and tb[3] <= lb[3]
for tb in token_boxes:
ok = any(inside(tb, lb) for lb in out_boxes)
if not ok:
if not any(inside(tb, lb) for lb in out_boxes):
x1, y1, x2, y2 = tb
out_boxes.append((x1 - pad, y1 - pad, x2 + pad, y2 + pad))
@@ -366,8 +489,8 @@ def build_line_boxes_from_indices(indices, ocr):
ix1 = max(b[0], m[0]); iy1 = max(b[1], m[1])
ix2 = min(b[2], m[2]); iy2 = min(b[3], m[3])
inter = max(0, ix2 - ix1) * max(0, iy2 - iy1)
a1 = max(1, (b[2]-b[0])*(b[3]-b[1]))
a2 = max(1, (m[2]-m[0])*(m[3]-m[1]))
a1 = max(1, (b[2] - b[0]) * (b[3] - b[1]))
a2 = max(1, (m[2] - m[0]) * (m[3] - m[1]))
iou = inter / float(a1 + a2 - inter) if (a1 + a2 - inter) > 0 else 0.0
if iou > 0.72:
merged[i] = boxes_union_xyxy([b, m])
@@ -381,7 +504,7 @@ def build_line_boxes_from_indices(indices, ocr):
# ─────────────────────────────────────────────
# GROUPING
# GROUPING
# ─────────────────────────────────────────────
def auto_gap(image_path, base=18, ref_w=750):
img = cv2.imread(image_path)
@@ -426,7 +549,13 @@ def group_tokens(ocr, image_shape, gap_px=18, bbox_padding=3):
for i in range(n):
groups.setdefault(find(i), []).append(i)
sorted_groups = sorted(groups.values(), key=lambda idxs: (min(boxes[i][1] for i in idxs), min(boxes[i][0] for i in idxs)))
sorted_groups = sorted(
groups.values(),
key=lambda idxs: (
min(boxes[i][1] for i in idxs),
min(boxes[i][0] for i in idxs)
)
)
bubbles = {}
bubble_boxes = {}
@@ -436,6 +565,7 @@ def group_tokens(ocr, image_shape, gap_px=18, bbox_padding=3):
ih, iw = image_shape[:2]
for bid, idxs in enumerate(sorted_groups, start=1):
idxs = sorted(idxs, key=lambda k: boxes[k][1])
lines = build_lines_from_indices(idxs, ocr)
quads = [ocr[k][0] for k in idxs]
ub = boxes_union_xyxy([quad_bbox(q) for q in quads])
@@ -443,8 +573,10 @@ def group_tokens(ocr, image_shape, gap_px=18, bbox_padding=3):
continue
x1, y1, x2, y2 = ub
x1 = max(0, x1 - bbox_padding); y1 = max(0, y1 - bbox_padding)
x2 = min(iw, x2 + bbox_padding); y2 = min(ih, y2 + bbox_padding)
x1 = max(0, x1 - bbox_padding)
y1 = max(0, y1 - bbox_padding)
x2 = min(iw, x2 + bbox_padding)
y2 = min(ih, y2 + bbox_padding)
bubbles[bid] = lines
bubble_boxes[bid] = (x1, y1, x2, y2)
@@ -455,23 +587,24 @@ def group_tokens(ocr, image_shape, gap_px=18, bbox_padding=3):
# ─────────────────────────────────────────────
# DEBUG
# DEBUG
# ─────────────────────────────────────────────
def save_debug_clusters(image_path, ocr, bubble_boxes, bubble_indices, out_path="debug_clusters.png"):
img = cv2.imread(image_path)
if img is None:
return
# token quads
# OCR token quads
for bbox, txt, conf in ocr:
pts = np.array(bbox, dtype=np.int32)
cv2.polylines(img, [pts], True, (180, 180, 180), 1)
# bubble boxes + yellow line boxes
# Bubble + line boxes
for bid, bb in bubble_boxes.items():
x1, y1, x2, y2 = bb
cv2.rectangle(img, (x1, y1), (x2, y2), (0, 220, 0), 2)
cv2.putText(img, f"BOX#{bid}", (x1 + 2, y1 + 16), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 220, 0), 2)
cv2.putText(img, f"BOX#{bid}", (x1 + 2, y1 + 16),
cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 220, 0), 2)
idxs = bubble_indices.get(bid, [])
line_boxes = build_line_boxes_from_indices(idxs, ocr)
@@ -485,7 +618,7 @@ def save_debug_clusters(image_path, ocr, bubble_boxes, bubble_indices, out_path=
# ─────────────────────────────────────────────
# EXPORT
# EXPORT
# ─────────────────────────────────────────────
def estimate_reading_order(bbox_dict, mode="ltr"):
items = []
@@ -494,7 +627,7 @@ def estimate_reading_order(bbox_dict, mode="ltr"):
cy = (y1 + y2) / 2.0
items.append((bid, cx, cy))
items.sort(key=lambda t: t[2])
items.sort(key=lambda t: t[2]) # top to bottom
rows = []
tol = 90
@@ -517,7 +650,6 @@ def estimate_reading_order(bbox_dict, mode="ltr"):
return {bid: i + 1 for i, bid in enumerate(order)}
def export_bubbles(filepath, bbox_dict, quads_dict, indices_dict, ocr, reading_map, image_shape):
out = {}
@@ -536,11 +668,15 @@ def export_bubbles(filepath, bbox_dict, quads_dict, indices_dict, ocr, reading_m
out[str(bid)] = {
"x": int(x1), "y": int(y1), "w": int(x2 - x1), "h": int(y2 - y1),
"reading_order": int(reading_map.get(bid, bid)),
"quad_bboxes": [{"x": int(b[0]), "y": int(b[1]), "w": int(b[2]-b[0]), "h": int(b[3]-b[1])} for b in qboxes],
"quads": [[[int(p[0]), int(p[1])] for p in q] for q in quads],
"quad_bboxes": [
{"x": int(b[0]), "y": int(b[1]), "w": int(b[2] - b[0]), "h": int(b[3] - b[1])}
for b in qboxes
],
"quads": [
[[int(p[0]), int(p[1])] for p in q] for q in quads
],
"text_bbox": xyxy_to_xywh(text_union),
# yellow geometry
"line_bboxes": [xyxy_to_xywh(lb) for lb in line_boxes_xyxy],
"line_union_bbox": xyxy_to_xywh(line_union_xyxy) if line_union_xyxy else None,
"line_union_area": int(line_union_area),
@@ -551,7 +687,7 @@ def export_bubbles(filepath, bbox_dict, quads_dict, indices_dict, ocr, reading_m
# ─────────────────────────────────────────────
# MAIN
# MAIN
# ─────────────────────────────────────────────
def translate_manga_text(
image_path,
@@ -606,6 +742,7 @@ def translate_manga_text(
skipped += 1
continue
# reduce false positives in very top strip
if qb[1] < int(ih * TOP_BAND_RATIO):
if conf < 0.70 and len(t) >= 5:
skipped += 1
@@ -633,14 +770,28 @@ def translate_manga_text(
translator = GoogleTranslator(source=source_lang, target=target_lang)
# robust bubble text cleanup
clean_lines = {}
for bid, lines in bubbles.items():
txt = normalize_text(" ".join(lines))
q = ocr_quality_score(txt)
if q < quality_threshold:
reread = reread_crop(image, bubble_boxes[bid], reader, upscale=2.5, pad=18)
if reread:
txt = normalize_text(reread)
base_txt = normalize_text(" ".join(lines))
base_sc = ocr_candidate_score(base_txt)
# only robust reread on low quality
if base_sc < quality_threshold:
rr_txt, rr_sc = reread_crop_robust(
image,
bubble_boxes[bid],
reader,
upscale=3.0,
pad=22
)
if rr_txt and rr_sc > base_sc + 0.06:
txt = rr_txt
else:
txt = base_txt
else:
txt = base_txt
clean_lines[bid] = apply_glossary(txt)
reading_map = estimate_reading_order(bubble_boxes, mode=reading_mode)
@@ -657,18 +808,24 @@ def translate_manga_text(
src = clean_lines[bid].strip()
if not src:
continue
flags = []
flags = []
try:
tgt = translator.translate(src) or ""
except Exception as e:
tgt = f"[Translation error: {e}]"
flags.append("TRANSLATION_ERROR")
tgt = apply_glossary(postprocess_translation_general(tgt)).upper()
src_u = src.upper()
out_lines.append(f"#{bid}|{reading_map.get(bid,bid)}|{src_u}|{tgt}|{','.join(flags) if flags else '-'}")
print(f"#{bid:<7} {reading_map.get(bid,bid):<6} {src_u[:50]:<50} {tgt[:50]:<50} {','.join(flags) if flags else '-'}")
out_lines.append(
f"#{bid}|{reading_map.get(bid,bid)}|{src_u}|{tgt}|{','.join(flags) if flags else '-'}"
)
print(
f"#{bid:<7} {reading_map.get(bid,bid):<6} "
f"{src_u[:50]:<50} {tgt[:50]:<50} {','.join(flags) if flags else '-'}"
)
translated_count += 1
out_lines.append(divider)
@@ -691,13 +848,13 @@ def translate_manga_text(
print(f"Saved: {export_to_file}")
print(f"Saved: {export_bubbles_to}")
if debug:
print("Saved: debug_clusters.png (special chars included in yellow boxes)")
print("Saved: debug_clusters.png")
if __name__ == "__main__":
translate_manga_text(
image_path="002-page.png",
source_lang="en",
image_path="001-page.png",
source_lang="it",
target_lang="ca",
confidence_threshold=0.12,
min_text_length=1,

View File

@@ -2,60 +2,76 @@
"""
pipeline.py
───────────────────────────────────────────────────────────────
Translation-only pipeline for Dandadan_059_2022_Digital
Translation + render pipeline
Flow per page:
1. Run translate_manga_text() output.txt + bubbles.json
2. Copy original image to workdir for reference
1) translate_manga_text() -> output.txt + bubbles.json (+ debug_clusters.png if DEBUG)
2) render_translations() -> page_translated.png
3) Pack CBZ with originals + rendered pages + text outputs
Folder structure produced:
Dandadan_059_2022_Digital_1r0n/
Folder structure:
<CHAPTER_DIR>/
├── 000.png
├── 001.png
└── translated/
├── 00/
│ ├── output.txt ← translations to review
│ ├── bubbles.json ← bubble boxes
── debug_clusters.png ← cluster debug (if DEBUG=True)
├── 01/
├── 000/
│ ├── output.txt
│ ├── bubbles.json
── page_translated.png
│ └── debug_clusters.png (optional)
├── 001/
│ └── ...
└── ...
Dandadan_059_translated.cbz ← original pages + translations
zipped for reference
CBZ:
- pages/<original pages>
- rendered/<page_stem>_translated.png
- translations/<page_stem>_output.txt
"""
import os
import sys
import shutil
import zipfile
import importlib.util
from pathlib import Path
# ─────────────────────────────────────────────
# CONFIG — edit these as needed
# CONFIG
# ─────────────────────────────────────────────
CHAPTER_DIR = "/Users/guillemhernandezsola/Downloads/Spy_x_Family_076_2023_Digital_1r0n"
OUTPUT_CBZ = "/Users/guillemhernandezsola/Downloads/Spy_x_Family_076_2023_Digital_1r0n_translated.cbz"
SOURCE_LANG = "en"
TARGET_LANG = "ca"
CHAPTER_DIR = "/Users/guillemhernandezsola/Downloads/Dandadan_059_2022_Digital_1r0n"
OUTPUT_CBZ = "/Users/guillemhernandezsola/Downloads/Dandadan_059_2022_Digital_1r0n_translated.cbz"
# manga-translator.py settings
SOURCE_LANG = "en"
TARGET_LANG = "ca"
# translator (NEW signature-compatible)
CONFIDENCE_THRESHOLD = 0.10
MIN_TEXT_LENGTH = 2
CLUSTER_EPS = "auto"
PROXIMITY_PX = 80
MIN_TEXT_LENGTH = 1
GAP_PX = "auto" # was cluster/proximity in old version
FILTER_SFX = True
QUALITY_THRESHOLD = 0.5
UPSCALE_FACTOR = 2.5
BBOX_PADDING = 5
QUALITY_THRESHOLD = 0.50
READING_MODE = "ltr"
DEBUG = True
# renderer
RENDER_ENABLED = True
RENDER_OUTPUT_NAME = "page_translated.png"
# optional custom font list for renderer
FONT_CANDIDATES = [
"fonts/ComicNeue-Regular.ttf",
"fonts/ComicRelief-Regular.ttf"
]
# ─────────────────────────────────────────────
# DYNAMIC MODULE LOADER
# ─────────────────────────────────────────────
def load_module(name, filepath):
spec = importlib.util.spec_from_file_location(name, filepath)
spec = importlib.util.spec_from_file_location(name, filepath)
if spec is None or spec.loader is None:
raise FileNotFoundError(f"Cannot load spec for {filepath}")
module = importlib.util.module_from_spec(spec)
spec.loader.exec_module(module)
return module
@@ -65,10 +81,10 @@ def load_module(name, filepath):
# HELPERS
# ─────────────────────────────────────────────
def sorted_pages(chapter_dir):
exts = {".jpg", ".jpeg", ".png", ".webp"}
exts = {".jpg", ".jpeg", ".png", ".webp"}
pages = [
p for p in Path(chapter_dir).iterdir()
if p.suffix.lower() in exts
if p.is_file() and p.suffix.lower() in exts
]
return sorted(pages, key=lambda p: p.stem)
@@ -80,82 +96,97 @@ def make_page_workdir(chapter_dir, page_stem):
def pack_cbz(chapter_dir, translated_dir, output_cbz):
"""
Packs into CBZ:
- All original pages (from chapter_dir root)
- All output.txt (one per page subfolder)
Sorted by page stem for correct reading order.
"""
exts = {".jpg", ".jpeg", ".png", ".webp"}
pages = sorted(
exts = {".jpg", ".jpeg", ".png", ".webp"}
pages = sorted(
[p for p in Path(chapter_dir).iterdir()
if p.suffix.lower() in exts],
if p.is_file() and p.suffix.lower() in exts],
key=lambda p: p.stem
)
txts = sorted(
txts = sorted(
translated_dir.rglob("output.txt"),
key=lambda p: p.parent.name
)
rendered = sorted(
translated_dir.rglob(RENDER_OUTPUT_NAME),
key=lambda p: p.parent.name
)
if not pages:
print("⚠️ No original pages found — CBZ not created.")
return
with zipfile.ZipFile(output_cbz, "w",
compression=zipfile.ZIP_STORED) as zf:
# Original pages
with zipfile.ZipFile(output_cbz, "w", compression=zipfile.ZIP_STORED) as zf:
# original pages
for img in pages:
arcname = f"pages/{img.name}"
zf.write(img, arcname)
print(f" 🖼 {arcname}")
# Translation text files
# rendered pages
for rp in rendered:
arcname = f"rendered/{rp.parent.name}_translated.png"
zf.write(rp, arcname)
print(f" 🎨 {arcname}")
# text outputs
for txt in txts:
arcname = f"translations/{txt.parent.name}_output.txt"
zf.write(txt, arcname)
print(f" 📄 {arcname}")
print(f"\n✅ CBZ saved → {output_cbz} "
f"({len(pages)} page(s), {len(txts)} translation(s))")
print(
f"\n✅ CBZ saved → {output_cbz} "
f"({len(pages)} original, {len(rendered)} rendered, {len(txts)} text)"
)
# ─────────────────────────────────────────────
# PER-PAGE PIPELINE
# ─────────────────────────────────────────────
def process_page(page_path, workdir, translator_module):
def process_page(page_path, workdir, translator_module, renderer_module):
"""
Runs translator for a single page.
All output files land in workdir.
Returns True on success, False on failure.
Runs translator + renderer for one page.
All generated files are written inside workdir.
"""
print(f"\n{''*60}")
print(f" PAGE: {page_path.name}")
print(f"{''*60}")
print(f"\n{'' * 70}")
print(f"PAGE: {page_path.name}")
print(f"{'' * 70}")
orig_dir = os.getcwd()
try:
# chdir into workdir so debug_clusters.png,
# temp files etc. all land there
os.chdir(workdir)
# 1) translate
translator_module.translate_manga_text(
image_path = str(page_path.resolve()),
source_lang = SOURCE_LANG,
target_lang = TARGET_LANG,
confidence_threshold = CONFIDENCE_THRESHOLD,
export_to_file = "output.txt",
export_bubbles_to = "bubbles.json",
min_text_length = MIN_TEXT_LENGTH,
cluster_eps = CLUSTER_EPS,
proximity_px = PROXIMITY_PX,
filter_sound_effects = FILTER_SFX,
quality_threshold = QUALITY_THRESHOLD,
upscale_factor = UPSCALE_FACTOR,
bbox_padding = BBOX_PADDING,
debug = DEBUG,
image_path= str(page_path.resolve()),
source_lang=SOURCE_LANG,
target_lang=TARGET_LANG,
confidence_threshold=CONFIDENCE_THRESHOLD,
min_text_length=MIN_TEXT_LENGTH,
gap_px=GAP_PX,
filter_sound_effects=FILTER_SFX,
quality_threshold=QUALITY_THRESHOLD,
export_to_file="output.txt",
export_bubbles_to="bubbles.json",
reading_mode=READING_MODE,
debug=DEBUG
)
print(" ✅ translator done")
# 2) render
if RENDER_ENABLED:
renderer_module.render_translations(
input_image=str(page_path.resolve()),
output_image=RENDER_OUTPUT_NAME,
translations_file="output.txt",
bubbles_file="bubbles.json",
font_candidates=FONT_CANDIDATES
)
print(" ✅ renderer done")
print(f" ✅ Translated → {workdir}")
return True
except Exception as e:
@@ -170,16 +201,20 @@ def process_page(page_path, workdir, translator_module):
# MAIN
# ─────────────────────────────────────────────
def main():
# ── Load translator module ────────────────────────────────────
print("Loading manga-translator.py...")
print("Loading modules...")
try:
translator = load_module(
"manga_translator", "manga-translator.py")
except FileNotFoundError as e:
print(f"❌ Could not load module: {e}")
translator = load_module("manga_translator", "manga-translator.py")
except Exception as e:
print(f"❌ Could not load manga-translator.py: {e}")
sys.exit(1)
try:
renderer = load_module("manga_renderer", "manga-renderer.py")
except Exception as e:
print(f"❌ Could not load manga-renderer.py: {e}")
sys.exit(1)
# ── Discover pages ────────────────────────────────────────────
pages = sorted_pages(CHAPTER_DIR)
if not pages:
print(f"❌ No images found in: {CHAPTER_DIR}")
@@ -187,33 +222,31 @@ def main():
print(f"\n📖 Chapter : {CHAPTER_DIR}")
print(f" Pages : {len(pages)}")
print(f" Source : {SOURCE_LANG} Target: {TARGET_LANG}\n")
print(f" Source : {SOURCE_LANG} Target: {TARGET_LANG}")
print(f" Render : {'ON' if RENDER_ENABLED else 'OFF'}\n")
# ── Process each page ─────────────────────────────────────────
translated_dir = Path(CHAPTER_DIR) / "translated"
succeeded = []
failed = []
failed = []
for i, page_path in enumerate(pages, start=1):
print(f"\n[{i}/{len(pages)}] {page_path.name}")
print(f"[{i}/{len(pages)}] {page_path.name}")
workdir = make_page_workdir(CHAPTER_DIR, page_path.stem)
ok = process_page(page_path, workdir, translator)
ok = process_page(page_path, workdir, translator, renderer)
if ok:
succeeded.append(page_path.name)
else:
failed.append(page_path.name)
# ── Summary ───────────────────────────────────────────────────
print(f"\n{''*60}")
print(f" PIPELINE COMPLETE")
print(f"{len(succeeded)} page(s) succeeded")
print(f"\n{'' * 70}")
print("PIPELINE COMPLETE")
print(f"{len(succeeded)} page(s) succeeded")
if failed:
print(f" {len(failed)} page(s) failed:")
print(f"{len(failed)} page(s) failed:")
for f in failed:
print(f" {f}")
print(f"{''*60}\n")
print(f"{f}")
print(f"{'' * 70}\n")
# ── Pack CBZ ──────────────────────────────────────────────────
print("Packing CBZ...")
pack_cbz(CHAPTER_DIR, translated_dir, OUTPUT_CBZ)