diff --git a/fonts/ComicNeue-Regular.ttf b/fonts/ComicNeue-Regular.ttf new file mode 100755 index 0000000..6525a27 Binary files /dev/null and b/fonts/ComicNeue-Regular.ttf differ diff --git a/fonts/ComicRelief-Bold.ttf b/fonts/ComicRelief-Bold.ttf deleted file mode 100755 index 7b86246..0000000 Binary files a/fonts/ComicRelief-Bold.ttf and /dev/null differ diff --git a/manga-renderer.py b/manga-renderer.py index 5dbd36d..ebc2c11 100644 --- a/manga-renderer.py +++ b/manga-renderer.py @@ -10,10 +10,8 @@ from PIL import Image, ImageDraw, ImageFont # CONFIG # ───────────────────────────────────────────── DEFAULT_FONT_CANDIDATES = [ - "fonts/AnimeAce2_reg.ttf", - "fonts/WildWordsRoman.ttf", "fonts/ComicRelief-Regular.ttf", - "fonts/NotoSans-Regular.ttf", + "fonts/ComicNeue-Regular.ttf", ] DEFAULT_FONT_COLOR = (0, 0, 0) DEFAULT_STROKE_COLOR = (255, 255, 255) @@ -501,7 +499,7 @@ def render_translations( if __name__ == "__main__": render_translations( - input_image="002-page.png", + input_image="001-page.png", output_image="page_translated.png", translations_file="output.txt", bubbles_file="bubbles.json", diff --git a/manga-translator.py b/manga-translator.py index 8ce9f3b..0aff648 100644 --- a/manga-translator.py +++ b/manga-translator.py @@ -1,3 +1,6 @@ +#!/usr/bin/env python3 +# -*- coding: utf-8 -*- + import re import os import json @@ -8,7 +11,7 @@ from deep_translator import GoogleTranslator # ───────────────────────────────────────────── -# CONFIG +# CONFIG # ───────────────────────────────────────────── GLOSSARY = { "ANYA": "ANYA", @@ -32,17 +35,17 @@ TITLE_PATTERNS = [ NOISE_PATTERNS = [ r"^[^a-zA-Z0-9\?!.]+$", - r"^BOX[0-9A-Z]*$", + r"^BOX[0-9A-Z#\s]*$", ] TOP_BAND_RATIO = 0.08 # ───────────────────────────────────────────── -# TEXT HELPERS +# TEXT HELPERS # ───────────────────────────────────────────── -def normalize_text(text): - t = text.strip().upper() +def normalize_text(text: str) -> str: + t = (text or "").strip().upper() t = t.replace("“", "\"").replace("”", "\"") t = t.replace("’", "'").replace("‘", "'") t = t.replace("…", "...") @@ -54,13 +57,13 @@ def normalize_text(text): t = re.sub(r",\?", "?", t) return t.strip() -def apply_glossary(text): - out = text +def apply_glossary(text: str) -> str: + out = text or "" for k in sorted(GLOSSARY.keys(), key=len, reverse=True): out = re.sub(rf"\b{re.escape(k)}\b", GLOSSARY[k], out, flags=re.IGNORECASE) return out -def postprocess_translation_general(text): +def postprocess_translation_general(text: str) -> str: t = normalize_text(text) t = re.sub(r"\s{2,}", " ", t).strip() t = re.sub(r"([!?]){3,}", r"\1\1", t) @@ -69,23 +72,23 @@ def postprocess_translation_general(text): # ───────────────────────────────────────────── -# FILTERS +# FILTERS # ───────────────────────────────────────────── -def is_sound_effect(text): - cleaned = re.sub(r"[^a-z]", "", text.strip().lower()) +def is_sound_effect(text: str) -> bool: + cleaned = re.sub(r"[^a-z]", "", (text or "").strip().lower()) return any(re.fullmatch(p, cleaned, re.IGNORECASE) for p in SOUND_EFFECT_PATTERNS) -def is_title_text(text): - t = text.strip().lower() +def is_title_text(text: str) -> bool: + t = (text or "").strip().lower() return any(re.fullmatch(p, t, re.IGNORECASE) for p in TITLE_PATTERNS) -def is_noise_text(text): - t = text.strip() +def is_noise_text(text: str) -> bool: + t = (text or "").strip() return any(re.fullmatch(p, t) for p in NOISE_PATTERNS) # ───────────────────────────────────────────── -# GEOMETRY +# GEOMETRY # ───────────────────────────────────────────── def quad_bbox(quad): xs = [p[0] for p in quad] @@ -127,9 +130,9 @@ def overlap_or_near(a, b, gap=0): # ───────────────────────────────────────────── -# QUALITY +# QUALITY / SCORING # ───────────────────────────────────────────── -def ocr_quality_score(text): +def ocr_quality_score(text: str) -> float: if not text or len(text) < 2: return 0.0 alpha_ratio = sum(1 for c in text if c.isalpha()) / max(1, len(text)) @@ -141,21 +144,75 @@ def ocr_quality_score(text): bonus = 0.05 if re.search(r"[.!?]$", text) else 0.0 return max(0.0, min(1.0, alpha_ratio - penalty + bonus)) +def ocr_candidate_score(text: str) -> float: + if not text: + return 0.0 + t = text.strip() + n = len(t) + if n == 0: + return 0.0 + + alpha = sum(c.isalpha() for c in t) / n + spaces = sum(c.isspace() for c in t) / n + punct_ok = sum(c in ".,!?'-:;()[]\"" for c in t) / n + bad = len(re.findall(r"[^\w\s\.\,\!\?\-\'\:\;\(\)\[\]\"]", t)) / n + + penalty = 0.0 + if re.search(r"\b[A-Z]\b", t): + penalty += 0.05 + if re.search(r"[0-9]{2,}", t): + penalty += 0.08 + if re.search(r"(..)\1\1", t): + penalty += 0.08 + + score = (0.62 * alpha) + (0.10 * spaces) + (0.20 * punct_ok) - (0.45 * bad) - penalty + return max(0.0, min(1.0, score)) + # ───────────────────────────────────────────── -# OCR RE-READ +# OCR MULTI-PASS # ───────────────────────────────────────────── def preprocess_variant(crop_bgr, mode): gray = cv2.cvtColor(crop_bgr, cv2.COLOR_BGR2GRAY) + if mode == "raw": return gray + if mode == "clahe": return cv2.createCLAHE(clipLimit=2.0, tileGridSize=(8, 8)).apply(gray) + if mode == "adaptive": den = cv2.GaussianBlur(gray, (3, 3), 0) - return cv2.adaptiveThreshold(den, 255, cv2.ADAPTIVE_THRESH_GAUSSIAN_C, cv2.THRESH_BINARY, 35, 11) + return cv2.adaptiveThreshold( + den, 255, cv2.ADAPTIVE_THRESH_GAUSSIAN_C, + cv2.THRESH_BINARY, 35, 11 + ) + + if mode == "otsu": + den = cv2.GaussianBlur(gray, (3, 3), 0) + _, th = cv2.threshold(den, 0, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU) + return th + + if mode == "invert": + return 255 - gray + return gray +def rotate_image_keep_bounds(img, angle_deg): + h, w = img.shape[:2] + c = (w / 2, h / 2) + M = cv2.getRotationMatrix2D(c, angle_deg, 1.0) + cos = abs(M[0, 0]) + sin = abs(M[0, 1]) + + new_w = int((h * sin) + (w * cos)) + new_h = int((h * cos) + (w * sin)) + + M[0, 2] += (new_w / 2) - c[0] + M[1, 2] += (new_h / 2) - c[1] + + return cv2.warpAffine(img, M, (new_w, new_h), flags=cv2.INTER_CUBIC, borderValue=255) + def run_ocr_on_array(reader, arr): tmp = "_tmp_ocr.png" cv2.imwrite(tmp, arr) @@ -165,35 +222,104 @@ def run_ocr_on_array(reader, arr): if os.path.exists(tmp): os.remove(tmp) -def reread_crop(image, bbox, reader, upscale=2.5, pad=18): +def rebuild_text_from_ocr_result(res): + if not res: + return "" + + norm = [] + for item in res: + if len(item) != 3: + continue + bbox, txt, conf = item + if not txt or not txt.strip(): + continue + b = quad_bbox(bbox) + xc = (b[0] + b[2]) / 2.0 + yc = (b[1] + b[3]) / 2.0 + h = max(1.0, b[3] - b[1]) + norm.append((b, txt, conf, xc, yc, h)) + + if not norm: + return "" + + med_h = float(np.median([x[5] for x in norm])) + row_tol = max(6.0, med_h * 0.75) + + norm.sort(key=lambda z: z[4]) # y + rows = [] + for it in norm: + placed = False + for r in rows: + if abs(it[4] - r["yc"]) <= row_tol: + r["m"].append(it) + r["yc"] = float(np.mean([k[4] for k in r["m"]])) + placed = True + break + if not placed: + rows.append({"yc": it[4], "m": [it]}) + + rows.sort(key=lambda r: r["yc"]) + + lines = [] + for r in rows: + mem = sorted(r["m"], key=lambda z: z[3]) # x + line = normalize_text(" ".join(x[1] for x in mem)) + if line: + lines.append(line) + + return normalize_text(" ".join(lines)) + +def reread_crop_robust(image, bbox, reader, upscale=3.0, pad=22): ih, iw = image.shape[:2] x1, y1, x2, y2 = bbox - x1 = max(0, int(x1 - pad)); y1 = max(0, int(y1 - pad)) - x2 = min(iw, int(x2 + pad)); y2 = min(ih, int(y2 + pad)) + x1 = max(0, int(x1 - pad)) + y1 = max(0, int(y1 - pad)) + x2 = min(iw, int(x2 + pad)) + y2 = min(ih, int(y2 + pad)) crop = image[y1:y2, x1:x2] if crop.size == 0: - return None + return None, 0.0 - up = cv2.resize(crop, (int(crop.shape[1] * upscale), int(crop.shape[0] * upscale)), interpolation=cv2.INTER_CUBIC) + up = cv2.resize( + crop, + (int(crop.shape[1] * upscale), int(crop.shape[0] * upscale)), + interpolation=cv2.INTER_CUBIC + ) - best = None - for mode in ("raw", "clahe", "adaptive"): + modes = ["raw", "clahe", "adaptive", "otsu", "invert"] + angles = [0.0, 1.5, -1.5] + + best_text, best_score = "", 0.0 + + for mode in modes: proc = preprocess_variant(up, mode) - res = run_ocr_on_array(reader, proc) - if not res: - continue - res.sort(key=lambda r: (r[0][0][1], r[0][0][0])) - lines = [normalize_text(t) for _, t, _ in res if t.strip()] - merged = re.sub(r"\s{2,}", " ", " ".join(lines)).strip() - s = ocr_quality_score(merged) - if best is None or s > best[0]: - best = (s, merged) - return best[1] if best else None + if len(proc.shape) == 2: + proc3 = cv2.cvtColor(proc, cv2.COLOR_GRAY2BGR) + else: + proc3 = proc + + for a in angles: + rot = rotate_image_keep_bounds(proc3, a) + if len(rot.shape) == 3: + rot_in = cv2.cvtColor(rot, cv2.COLOR_BGR2GRAY) + else: + rot_in = rot + + res = run_ocr_on_array(reader, rot_in) + txt = rebuild_text_from_ocr_result(res) + sc = ocr_candidate_score(txt) + + if sc > best_score: + best_text, best_score = txt, sc + + if not best_text: + return None, 0.0 + return best_text, best_score # ───────────────────────────────────────────── -# LINES + YELLOW BOXES +# LINES + YELLOW BOXES # ───────────────────────────────────────────── def build_lines_from_indices(indices, ocr): if not indices: @@ -233,14 +359,13 @@ def build_lines_from_indices(indices, ocr): return lines - def build_line_boxes_from_indices(indices, ocr): """ Robust yellow-box generation with punctuation attachment: - row grouping - chunking by x gap - attach tiny punctuation/special tokens to nearest chunk - - coverage guarantee + - token coverage guarantee """ if not indices: return [] @@ -264,13 +389,13 @@ def build_line_boxes_from_indices(indices, ocr): pad = max(1, int(round(med_h * 0.12))) def is_punct_like(t): - raw = t.strip() + raw = (t or "").strip() if raw == "": return True punct_ratio = sum(1 for c in raw if not c.isalnum()) / max(1, len(raw)) return punct_ratio >= 0.5 or len(raw) <= 2 - # 1) rows + # 1) row grouping items_sorted = sorted(items, key=lambda x: x["yc"]) rows = [] for it in items_sorted: @@ -296,7 +421,7 @@ def build_line_boxes_from_indices(indices, ocr): normal = mem punct = [] - # 2) chunk normal tokens + # 2) chunk normal by x-gap chunks = [] cur = [normal[0]] for t in normal[1:]: @@ -310,18 +435,17 @@ def build_line_boxes_from_indices(indices, ocr): cur = [t] chunks.append(cur) - # 3) attach punctuation tokens + # 3) attach punct tokens to nearest chunk for p in punct: pb = p["b"] pxc, pyc = p["xc"], p["yc"] - best_k = -1 best_score = 1e18 + for k, ch in enumerate(chunks): ub = boxes_union_xyxy([x["b"] for x in ch]) cx = (ub[0] + ub[2]) / 2.0 cy = (ub[1] + ub[3]) / 2.0 - dx = abs(pxc - cx) dy = abs(pyc - cy) score = dx + 1.8 * dy @@ -339,22 +463,21 @@ def build_line_boxes_from_indices(indices, ocr): else: chunks.append([p]) - # 4) chunk boxes + # 4) emit chunk boxes for ch in chunks: ub = boxes_union_xyxy([x["b"] for x in ch]) if ub: x1, y1, x2, y2 = ub out_boxes.append((x1 - pad, y1 - pad, x2 + pad, y2 + pad)) - # 5) guarantee all tokens included + # 5) guarantee every token is inside some yellow box token_boxes = [it["b"] for it in items] def inside(tb, lb): return tb[0] >= lb[0] and tb[1] >= lb[1] and tb[2] <= lb[2] and tb[3] <= lb[3] for tb in token_boxes: - ok = any(inside(tb, lb) for lb in out_boxes) - if not ok: + if not any(inside(tb, lb) for lb in out_boxes): x1, y1, x2, y2 = tb out_boxes.append((x1 - pad, y1 - pad, x2 + pad, y2 + pad)) @@ -366,8 +489,8 @@ def build_line_boxes_from_indices(indices, ocr): ix1 = max(b[0], m[0]); iy1 = max(b[1], m[1]) ix2 = min(b[2], m[2]); iy2 = min(b[3], m[3]) inter = max(0, ix2 - ix1) * max(0, iy2 - iy1) - a1 = max(1, (b[2]-b[0])*(b[3]-b[1])) - a2 = max(1, (m[2]-m[0])*(m[3]-m[1])) + a1 = max(1, (b[2] - b[0]) * (b[3] - b[1])) + a2 = max(1, (m[2] - m[0]) * (m[3] - m[1])) iou = inter / float(a1 + a2 - inter) if (a1 + a2 - inter) > 0 else 0.0 if iou > 0.72: merged[i] = boxes_union_xyxy([b, m]) @@ -381,7 +504,7 @@ def build_line_boxes_from_indices(indices, ocr): # ───────────────────────────────────────────── -# GROUPING +# GROUPING # ───────────────────────────────────────────── def auto_gap(image_path, base=18, ref_w=750): img = cv2.imread(image_path) @@ -426,7 +549,13 @@ def group_tokens(ocr, image_shape, gap_px=18, bbox_padding=3): for i in range(n): groups.setdefault(find(i), []).append(i) - sorted_groups = sorted(groups.values(), key=lambda idxs: (min(boxes[i][1] for i in idxs), min(boxes[i][0] for i in idxs))) + sorted_groups = sorted( + groups.values(), + key=lambda idxs: ( + min(boxes[i][1] for i in idxs), + min(boxes[i][0] for i in idxs) + ) + ) bubbles = {} bubble_boxes = {} @@ -436,6 +565,7 @@ def group_tokens(ocr, image_shape, gap_px=18, bbox_padding=3): ih, iw = image_shape[:2] for bid, idxs in enumerate(sorted_groups, start=1): idxs = sorted(idxs, key=lambda k: boxes[k][1]) + lines = build_lines_from_indices(idxs, ocr) quads = [ocr[k][0] for k in idxs] ub = boxes_union_xyxy([quad_bbox(q) for q in quads]) @@ -443,8 +573,10 @@ def group_tokens(ocr, image_shape, gap_px=18, bbox_padding=3): continue x1, y1, x2, y2 = ub - x1 = max(0, x1 - bbox_padding); y1 = max(0, y1 - bbox_padding) - x2 = min(iw, x2 + bbox_padding); y2 = min(ih, y2 + bbox_padding) + x1 = max(0, x1 - bbox_padding) + y1 = max(0, y1 - bbox_padding) + x2 = min(iw, x2 + bbox_padding) + y2 = min(ih, y2 + bbox_padding) bubbles[bid] = lines bubble_boxes[bid] = (x1, y1, x2, y2) @@ -455,23 +587,24 @@ def group_tokens(ocr, image_shape, gap_px=18, bbox_padding=3): # ───────────────────────────────────────────── -# DEBUG +# DEBUG # ───────────────────────────────────────────── def save_debug_clusters(image_path, ocr, bubble_boxes, bubble_indices, out_path="debug_clusters.png"): img = cv2.imread(image_path) if img is None: return - # token quads + # OCR token quads for bbox, txt, conf in ocr: pts = np.array(bbox, dtype=np.int32) cv2.polylines(img, [pts], True, (180, 180, 180), 1) - # bubble boxes + yellow line boxes + # Bubble + line boxes for bid, bb in bubble_boxes.items(): x1, y1, x2, y2 = bb cv2.rectangle(img, (x1, y1), (x2, y2), (0, 220, 0), 2) - cv2.putText(img, f"BOX#{bid}", (x1 + 2, y1 + 16), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 220, 0), 2) + cv2.putText(img, f"BOX#{bid}", (x1 + 2, y1 + 16), + cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 220, 0), 2) idxs = bubble_indices.get(bid, []) line_boxes = build_line_boxes_from_indices(idxs, ocr) @@ -485,7 +618,7 @@ def save_debug_clusters(image_path, ocr, bubble_boxes, bubble_indices, out_path= # ───────────────────────────────────────────── -# EXPORT +# EXPORT # ───────────────────────────────────────────── def estimate_reading_order(bbox_dict, mode="ltr"): items = [] @@ -494,7 +627,7 @@ def estimate_reading_order(bbox_dict, mode="ltr"): cy = (y1 + y2) / 2.0 items.append((bid, cx, cy)) - items.sort(key=lambda t: t[2]) + items.sort(key=lambda t: t[2]) # top to bottom rows = [] tol = 90 @@ -517,7 +650,6 @@ def estimate_reading_order(bbox_dict, mode="ltr"): return {bid: i + 1 for i, bid in enumerate(order)} - def export_bubbles(filepath, bbox_dict, quads_dict, indices_dict, ocr, reading_map, image_shape): out = {} @@ -536,11 +668,15 @@ def export_bubbles(filepath, bbox_dict, quads_dict, indices_dict, ocr, reading_m out[str(bid)] = { "x": int(x1), "y": int(y1), "w": int(x2 - x1), "h": int(y2 - y1), "reading_order": int(reading_map.get(bid, bid)), - "quad_bboxes": [{"x": int(b[0]), "y": int(b[1]), "w": int(b[2]-b[0]), "h": int(b[3]-b[1])} for b in qboxes], - "quads": [[[int(p[0]), int(p[1])] for p in q] for q in quads], + "quad_bboxes": [ + {"x": int(b[0]), "y": int(b[1]), "w": int(b[2] - b[0]), "h": int(b[3] - b[1])} + for b in qboxes + ], + "quads": [ + [[int(p[0]), int(p[1])] for p in q] for q in quads + ], "text_bbox": xyxy_to_xywh(text_union), - # yellow geometry "line_bboxes": [xyxy_to_xywh(lb) for lb in line_boxes_xyxy], "line_union_bbox": xyxy_to_xywh(line_union_xyxy) if line_union_xyxy else None, "line_union_area": int(line_union_area), @@ -551,7 +687,7 @@ def export_bubbles(filepath, bbox_dict, quads_dict, indices_dict, ocr, reading_m # ───────────────────────────────────────────── -# MAIN +# MAIN # ───────────────────────────────────────────── def translate_manga_text( image_path, @@ -606,6 +742,7 @@ def translate_manga_text( skipped += 1 continue + # reduce false positives in very top strip if qb[1] < int(ih * TOP_BAND_RATIO): if conf < 0.70 and len(t) >= 5: skipped += 1 @@ -633,14 +770,28 @@ def translate_manga_text( translator = GoogleTranslator(source=source_lang, target=target_lang) + # robust bubble text cleanup clean_lines = {} for bid, lines in bubbles.items(): - txt = normalize_text(" ".join(lines)) - q = ocr_quality_score(txt) - if q < quality_threshold: - reread = reread_crop(image, bubble_boxes[bid], reader, upscale=2.5, pad=18) - if reread: - txt = normalize_text(reread) + base_txt = normalize_text(" ".join(lines)) + base_sc = ocr_candidate_score(base_txt) + + # only robust reread on low quality + if base_sc < quality_threshold: + rr_txt, rr_sc = reread_crop_robust( + image, + bubble_boxes[bid], + reader, + upscale=3.0, + pad=22 + ) + if rr_txt and rr_sc > base_sc + 0.06: + txt = rr_txt + else: + txt = base_txt + else: + txt = base_txt + clean_lines[bid] = apply_glossary(txt) reading_map = estimate_reading_order(bubble_boxes, mode=reading_mode) @@ -657,18 +808,24 @@ def translate_manga_text( src = clean_lines[bid].strip() if not src: continue - flags = [] + flags = [] try: tgt = translator.translate(src) or "" except Exception as e: tgt = f"[Translation error: {e}]" + flags.append("TRANSLATION_ERROR") tgt = apply_glossary(postprocess_translation_general(tgt)).upper() src_u = src.upper() - out_lines.append(f"#{bid}|{reading_map.get(bid,bid)}|{src_u}|{tgt}|{','.join(flags) if flags else '-'}") - print(f"#{bid:<7} {reading_map.get(bid,bid):<6} {src_u[:50]:<50} {tgt[:50]:<50} {','.join(flags) if flags else '-'}") + out_lines.append( + f"#{bid}|{reading_map.get(bid,bid)}|{src_u}|{tgt}|{','.join(flags) if flags else '-'}" + ) + print( + f"#{bid:<7} {reading_map.get(bid,bid):<6} " + f"{src_u[:50]:<50} {tgt[:50]:<50} {','.join(flags) if flags else '-'}" + ) translated_count += 1 out_lines.append(divider) @@ -691,13 +848,13 @@ def translate_manga_text( print(f"Saved: {export_to_file}") print(f"Saved: {export_bubbles_to}") if debug: - print("Saved: debug_clusters.png (special chars included in yellow boxes)") + print("Saved: debug_clusters.png") if __name__ == "__main__": translate_manga_text( - image_path="002-page.png", - source_lang="en", + image_path="001-page.png", + source_lang="it", target_lang="ca", confidence_threshold=0.12, min_text_length=1, diff --git a/pipeline.py b/pipeline.py index 0ad6015..9625c56 100644 --- a/pipeline.py +++ b/pipeline.py @@ -2,60 +2,76 @@ """ pipeline.py ─────────────────────────────────────────────────────────────── -Translation-only pipeline for Dandadan_059_2022_Digital +Translation + render pipeline Flow per page: - 1. Run translate_manga_text() → output.txt + bubbles.json - 2. Copy original image to workdir for reference + 1) translate_manga_text() -> output.txt + bubbles.json (+ debug_clusters.png if DEBUG) + 2) render_translations() -> page_translated.png + 3) Pack CBZ with originals + rendered pages + text outputs -Folder structure produced: - Dandadan_059_2022_Digital_1r0n/ +Folder structure: + / + ├── 000.png + ├── 001.png └── translated/ - ├── 00/ - │ ├── output.txt ← translations to review - │ ├── bubbles.json ← bubble boxes - │ └── debug_clusters.png ← cluster debug (if DEBUG=True) - ├── 01/ + ├── 000/ + │ ├── output.txt + │ ├── bubbles.json + │ ├── page_translated.png + │ └── debug_clusters.png (optional) + ├── 001/ │ └── ... └── ... - Dandadan_059_translated.cbz ← original pages + translations - zipped for reference +CBZ: + - pages/ + - rendered/_translated.png + - translations/_output.txt """ import os import sys -import shutil import zipfile import importlib.util from pathlib import Path # ───────────────────────────────────────────── -# CONFIG — edit these as needed +# CONFIG # ───────────────────────────────────────────── -CHAPTER_DIR = "/Users/guillemhernandezsola/Downloads/Spy_x_Family_076_2023_Digital_1r0n" -OUTPUT_CBZ = "/Users/guillemhernandezsola/Downloads/Spy_x_Family_076_2023_Digital_1r0n_translated.cbz" -SOURCE_LANG = "en" -TARGET_LANG = "ca" +CHAPTER_DIR = "/Users/guillemhernandezsola/Downloads/Dandadan_059_2022_Digital_1r0n" +OUTPUT_CBZ = "/Users/guillemhernandezsola/Downloads/Dandadan_059_2022_Digital_1r0n_translated.cbz" -# manga-translator.py settings +SOURCE_LANG = "en" +TARGET_LANG = "ca" + +# translator (NEW signature-compatible) CONFIDENCE_THRESHOLD = 0.10 -MIN_TEXT_LENGTH = 2 -CLUSTER_EPS = "auto" -PROXIMITY_PX = 80 +MIN_TEXT_LENGTH = 1 +GAP_PX = "auto" # was cluster/proximity in old version FILTER_SFX = True -QUALITY_THRESHOLD = 0.5 -UPSCALE_FACTOR = 2.5 -BBOX_PADDING = 5 +QUALITY_THRESHOLD = 0.50 +READING_MODE = "ltr" DEBUG = True +# renderer +RENDER_ENABLED = True +RENDER_OUTPUT_NAME = "page_translated.png" + +# optional custom font list for renderer +FONT_CANDIDATES = [ + "fonts/ComicNeue-Regular.ttf", + "fonts/ComicRelief-Regular.ttf" +] + # ───────────────────────────────────────────── # DYNAMIC MODULE LOADER # ───────────────────────────────────────────── def load_module(name, filepath): - spec = importlib.util.spec_from_file_location(name, filepath) + spec = importlib.util.spec_from_file_location(name, filepath) + if spec is None or spec.loader is None: + raise FileNotFoundError(f"Cannot load spec for {filepath}") module = importlib.util.module_from_spec(spec) spec.loader.exec_module(module) return module @@ -65,10 +81,10 @@ def load_module(name, filepath): # HELPERS # ───────────────────────────────────────────── def sorted_pages(chapter_dir): - exts = {".jpg", ".jpeg", ".png", ".webp"} + exts = {".jpg", ".jpeg", ".png", ".webp"} pages = [ p for p in Path(chapter_dir).iterdir() - if p.suffix.lower() in exts + if p.is_file() and p.suffix.lower() in exts ] return sorted(pages, key=lambda p: p.stem) @@ -80,82 +96,97 @@ def make_page_workdir(chapter_dir, page_stem): def pack_cbz(chapter_dir, translated_dir, output_cbz): - """ - Packs into CBZ: - - All original pages (from chapter_dir root) - - All output.txt (one per page subfolder) - Sorted by page stem for correct reading order. - """ - exts = {".jpg", ".jpeg", ".png", ".webp"} - pages = sorted( + exts = {".jpg", ".jpeg", ".png", ".webp"} + + pages = sorted( [p for p in Path(chapter_dir).iterdir() - if p.suffix.lower() in exts], + if p.is_file() and p.suffix.lower() in exts], key=lambda p: p.stem ) - txts = sorted( + + txts = sorted( translated_dir.rglob("output.txt"), key=lambda p: p.parent.name ) + rendered = sorted( + translated_dir.rglob(RENDER_OUTPUT_NAME), + key=lambda p: p.parent.name + ) + if not pages: print("⚠️ No original pages found — CBZ not created.") return - with zipfile.ZipFile(output_cbz, "w", - compression=zipfile.ZIP_STORED) as zf: - # Original pages + with zipfile.ZipFile(output_cbz, "w", compression=zipfile.ZIP_STORED) as zf: + # original pages for img in pages: arcname = f"pages/{img.name}" zf.write(img, arcname) print(f" 🖼 {arcname}") - # Translation text files + # rendered pages + for rp in rendered: + arcname = f"rendered/{rp.parent.name}_translated.png" + zf.write(rp, arcname) + print(f" 🎨 {arcname}") + + # text outputs for txt in txts: arcname = f"translations/{txt.parent.name}_output.txt" zf.write(txt, arcname) print(f" 📄 {arcname}") - print(f"\n✅ CBZ saved → {output_cbz} " - f"({len(pages)} page(s), {len(txts)} translation(s))") + print( + f"\n✅ CBZ saved → {output_cbz} " + f"({len(pages)} original, {len(rendered)} rendered, {len(txts)} text)" + ) # ───────────────────────────────────────────── # PER-PAGE PIPELINE # ───────────────────────────────────────────── -def process_page(page_path, workdir, translator_module): +def process_page(page_path, workdir, translator_module, renderer_module): """ - Runs translator for a single page. - All output files land in workdir. - Returns True on success, False on failure. + Runs translator + renderer for one page. + All generated files are written inside workdir. """ - print(f"\n{'─'*60}") - print(f" PAGE: {page_path.name}") - print(f"{'─'*60}") + print(f"\n{'─' * 70}") + print(f"PAGE: {page_path.name}") + print(f"{'─' * 70}") orig_dir = os.getcwd() try: - # chdir into workdir so debug_clusters.png, - # temp files etc. all land there os.chdir(workdir) + # 1) translate translator_module.translate_manga_text( - image_path = str(page_path.resolve()), - source_lang = SOURCE_LANG, - target_lang = TARGET_LANG, - confidence_threshold = CONFIDENCE_THRESHOLD, - export_to_file = "output.txt", - export_bubbles_to = "bubbles.json", - min_text_length = MIN_TEXT_LENGTH, - cluster_eps = CLUSTER_EPS, - proximity_px = PROXIMITY_PX, - filter_sound_effects = FILTER_SFX, - quality_threshold = QUALITY_THRESHOLD, - upscale_factor = UPSCALE_FACTOR, - bbox_padding = BBOX_PADDING, - debug = DEBUG, + image_path= str(page_path.resolve()), + source_lang=SOURCE_LANG, + target_lang=TARGET_LANG, + confidence_threshold=CONFIDENCE_THRESHOLD, + min_text_length=MIN_TEXT_LENGTH, + gap_px=GAP_PX, + filter_sound_effects=FILTER_SFX, + quality_threshold=QUALITY_THRESHOLD, + export_to_file="output.txt", + export_bubbles_to="bubbles.json", + reading_mode=READING_MODE, + debug=DEBUG ) + print(" ✅ translator done") + + # 2) render + if RENDER_ENABLED: + renderer_module.render_translations( + input_image=str(page_path.resolve()), + output_image=RENDER_OUTPUT_NAME, + translations_file="output.txt", + bubbles_file="bubbles.json", + font_candidates=FONT_CANDIDATES + ) + print(" ✅ renderer done") - print(f" ✅ Translated → {workdir}") return True except Exception as e: @@ -170,16 +201,20 @@ def process_page(page_path, workdir, translator_module): # MAIN # ───────────────────────────────────────────── def main(): - # ── Load translator module ──────────────────────────────────── - print("Loading manga-translator.py...") + print("Loading modules...") + try: - translator = load_module( - "manga_translator", "manga-translator.py") - except FileNotFoundError as e: - print(f"❌ Could not load module: {e}") + translator = load_module("manga_translator", "manga-translator.py") + except Exception as e: + print(f"❌ Could not load manga-translator.py: {e}") + sys.exit(1) + + try: + renderer = load_module("manga_renderer", "manga-renderer.py") + except Exception as e: + print(f"❌ Could not load manga-renderer.py: {e}") sys.exit(1) - # ── Discover pages ──────────────────────────────────────────── pages = sorted_pages(CHAPTER_DIR) if not pages: print(f"❌ No images found in: {CHAPTER_DIR}") @@ -187,33 +222,31 @@ def main(): print(f"\n📖 Chapter : {CHAPTER_DIR}") print(f" Pages : {len(pages)}") - print(f" Source : {SOURCE_LANG} → Target: {TARGET_LANG}\n") + print(f" Source : {SOURCE_LANG} → Target: {TARGET_LANG}") + print(f" Render : {'ON' if RENDER_ENABLED else 'OFF'}\n") - # ── Process each page ───────────────────────────────────────── translated_dir = Path(CHAPTER_DIR) / "translated" succeeded = [] - failed = [] + failed = [] for i, page_path in enumerate(pages, start=1): - print(f"\n[{i}/{len(pages)}] {page_path.name}") + print(f"[{i}/{len(pages)}] {page_path.name}") workdir = make_page_workdir(CHAPTER_DIR, page_path.stem) - ok = process_page(page_path, workdir, translator) + ok = process_page(page_path, workdir, translator, renderer) if ok: succeeded.append(page_path.name) else: failed.append(page_path.name) - # ── Summary ─────────────────────────────────────────────────── - print(f"\n{'═'*60}") - print(f" PIPELINE COMPLETE") - print(f" ✅ {len(succeeded)} page(s) succeeded") + print(f"\n{'═' * 70}") + print("PIPELINE COMPLETE") + print(f"✅ {len(succeeded)} page(s) succeeded") if failed: - print(f" ❌ {len(failed)} page(s) failed:") + print(f"❌ {len(failed)} page(s) failed:") for f in failed: - print(f" • {f}") - print(f"{'═'*60}\n") + print(f" • {f}") + print(f"{'═' * 70}\n") - # ── Pack CBZ ────────────────────────────────────────────────── print("Packing CBZ...") pack_cbz(CHAPTER_DIR, translated_dir, OUTPUT_CBZ)