Added some fixes

2026-04-14 20:08:51 +02:00
parent 0069da706b
commit f95b7d32d4
5 changed files with 359 additions and 171 deletions
--- a/fonts/ComicNeue-Regular.ttf
+++ b/fonts/ComicNeue-Regular.ttf
--- a/fonts/ComicRelief-Bold.ttf
+++ b/fonts/ComicRelief-Bold.ttf
--- a/manga-renderer.py
+++ b/manga-renderer.py
@@ -10,10 +10,8 @@ from PIL import Image, ImageDraw, ImageFont
 #  CONFIG
 # ─────────────────────────────────────────────
 DEFAULT_FONT_CANDIDATES = [
-    "fonts/AnimeAce2_reg.ttf",
-    "fonts/WildWordsRoman.ttf",
    "fonts/ComicRelief-Regular.ttf",
-    "fonts/NotoSans-Regular.ttf",
+    "fonts/ComicNeue-Regular.ttf",
 ]
 DEFAULT_FONT_COLOR = (0, 0, 0)
 DEFAULT_STROKE_COLOR = (255, 255, 255)
@@ -501,7 +499,7 @@ def render_translations(

 if __name__ == "__main__":
    render_translations(
-        input_image="002-page.png",
+        input_image="001-page.png",
        output_image="page_translated.png",
        translations_file="output.txt",
        bubbles_file="bubbles.json",
--- a/manga-translator.py
+++ b/manga-translator.py
@@ -1,3 +1,6 @@
+#!/usr/bin/env python3
+# -*- coding: utf-8 -*-
+
 import re
 import os
 import json
@@ -8,7 +11,7 @@ from deep_translator import GoogleTranslator


 # ─────────────────────────────────────────────
-#  CONFIG
+# CONFIG
 # ─────────────────────────────────────────────
 GLOSSARY = {
    "ANYA": "ANYA",
@@ -32,17 +35,17 @@ TITLE_PATTERNS = [

 NOISE_PATTERNS = [
    r"^[^a-zA-Z0-9\?!.]+$",
-    r"^BOX[0-9A-Z]*$",
+    r"^BOX[0-9A-Z#\s]*$",
 ]

 TOP_BAND_RATIO = 0.08


 # ─────────────────────────────────────────────
-#  TEXT HELPERS
+# TEXT HELPERS
 # ─────────────────────────────────────────────
-def normalize_text(text):
-    t = text.strip().upper()
+def normalize_text(text: str) -> str:
+    t = (text or "").strip().upper()
    t = t.replace("“", "\"").replace("”", "\"")
    t = t.replace("’", "'").replace("‘", "'")
    t = t.replace("…", "...")
@@ -54,13 +57,13 @@ def normalize_text(text):
    t = re.sub(r",\?", "?", t)
    return t.strip()

-def apply_glossary(text):
-    out = text
+def apply_glossary(text: str) -> str:
+    out = text or ""
    for k in sorted(GLOSSARY.keys(), key=len, reverse=True):
        out = re.sub(rf"\b{re.escape(k)}\b", GLOSSARY[k], out, flags=re.IGNORECASE)
    return out

-def postprocess_translation_general(text):
+def postprocess_translation_general(text: str) -> str:
    t = normalize_text(text)
    t = re.sub(r"\s{2,}", " ", t).strip()
    t = re.sub(r"([!?]){3,}", r"\1\1", t)
@@ -69,23 +72,23 @@ def postprocess_translation_general(text):


 # ─────────────────────────────────────────────
-#  FILTERS
+# FILTERS
 # ─────────────────────────────────────────────
-def is_sound_effect(text):
-    cleaned = re.sub(r"[^a-z]", "", text.strip().lower())
+def is_sound_effect(text: str) -> bool:
+    cleaned = re.sub(r"[^a-z]", "", (text or "").strip().lower())
    return any(re.fullmatch(p, cleaned, re.IGNORECASE) for p in SOUND_EFFECT_PATTERNS)

-def is_title_text(text):
-    t = text.strip().lower()
+def is_title_text(text: str) -> bool:
+    t = (text or "").strip().lower()
    return any(re.fullmatch(p, t, re.IGNORECASE) for p in TITLE_PATTERNS)

-def is_noise_text(text):
-    t = text.strip()
+def is_noise_text(text: str) -> bool:
+    t = (text or "").strip()
    return any(re.fullmatch(p, t) for p in NOISE_PATTERNS)


 # ─────────────────────────────────────────────
-#  GEOMETRY
+# GEOMETRY
 # ─────────────────────────────────────────────
 def quad_bbox(quad):
    xs = [p[0] for p in quad]
@@ -127,9 +130,9 @@ def overlap_or_near(a, b, gap=0):


 # ─────────────────────────────────────────────
-#  QUALITY
+# QUALITY / SCORING
 # ─────────────────────────────────────────────
-def ocr_quality_score(text):
+def ocr_quality_score(text: str) -> float:
    if not text or len(text) < 2:
        return 0.0
    alpha_ratio = sum(1 for c in text if c.isalpha()) / max(1, len(text))
@@ -141,21 +144,75 @@ def ocr_quality_score(text):
    bonus = 0.05 if re.search(r"[.!?]$", text) else 0.0
    return max(0.0, min(1.0, alpha_ratio - penalty + bonus))

+def ocr_candidate_score(text: str) -> float:
+    if not text:
+        return 0.0
+    t = text.strip()
+    n = len(t)
+    if n == 0:
+        return 0.0
+
+    alpha = sum(c.isalpha() for c in t) / n
+    spaces = sum(c.isspace() for c in t) / n
+    punct_ok = sum(c in ".,!?'-:;()[]\"" for c in t) / n
+    bad = len(re.findall(r"[^\w\s\.\,\!\?\-\'\:\;\(\)\[\]\"]", t)) / n
+
+    penalty = 0.0
+    if re.search(r"\b[A-Z]\b", t):
+        penalty += 0.05
+    if re.search(r"[0-9]{2,}", t):
+        penalty += 0.08
+    if re.search(r"(..)\1\1", t):
+        penalty += 0.08
+
+    score = (0.62 * alpha) + (0.10 * spaces) + (0.20 * punct_ok) - (0.45 * bad) - penalty
+    return max(0.0, min(1.0, score))
+

 # ─────────────────────────────────────────────
-#  OCR RE-READ
+# OCR MULTI-PASS
 # ─────────────────────────────────────────────
 def preprocess_variant(crop_bgr, mode):
    gray = cv2.cvtColor(crop_bgr, cv2.COLOR_BGR2GRAY)
+
    if mode == "raw":
        return gray
+
    if mode == "clahe":
        return cv2.createCLAHE(clipLimit=2.0, tileGridSize=(8, 8)).apply(gray)
+
    if mode == "adaptive":
        den = cv2.GaussianBlur(gray, (3, 3), 0)
-        return cv2.adaptiveThreshold(den, 255, cv2.ADAPTIVE_THRESH_GAUSSIAN_C, cv2.THRESH_BINARY, 35, 11)
+        return cv2.adaptiveThreshold(
+            den, 255, cv2.ADAPTIVE_THRESH_GAUSSIAN_C,
+            cv2.THRESH_BINARY, 35, 11
+        )
+
+    if mode == "otsu":
+        den = cv2.GaussianBlur(gray, (3, 3), 0)
+        _, th = cv2.threshold(den, 0, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU)
+        return th
+
+    if mode == "invert":
+        return 255 - gray
+
    return gray

+def rotate_image_keep_bounds(img, angle_deg):
+    h, w = img.shape[:2]
+    c = (w / 2, h / 2)
+    M = cv2.getRotationMatrix2D(c, angle_deg, 1.0)
+    cos = abs(M[0, 0])
+    sin = abs(M[0, 1])
+
+    new_w = int((h * sin) + (w * cos))
+    new_h = int((h * cos) + (w * sin))
+
+    M[0, 2] += (new_w / 2) - c[0]
+    M[1, 2] += (new_h / 2) - c[1]
+
+    return cv2.warpAffine(img, M, (new_w, new_h), flags=cv2.INTER_CUBIC, borderValue=255)
+
 def run_ocr_on_array(reader, arr):
    tmp = "_tmp_ocr.png"
    cv2.imwrite(tmp, arr)
@@ -165,35 +222,104 @@ def run_ocr_on_array(reader, arr):
        if os.path.exists(tmp):
            os.remove(tmp)

-def reread_crop(image, bbox, reader, upscale=2.5, pad=18):
+def rebuild_text_from_ocr_result(res):
+    if not res:
+        return ""
+
+    norm = []
+    for item in res:
+        if len(item) != 3:
+            continue
+        bbox, txt, conf = item
+        if not txt or not txt.strip():
+            continue
+        b = quad_bbox(bbox)
+        xc = (b[0] + b[2]) / 2.0
+        yc = (b[1] + b[3]) / 2.0
+        h = max(1.0, b[3] - b[1])
+        norm.append((b, txt, conf, xc, yc, h))
+
+    if not norm:
+        return ""
+
+    med_h = float(np.median([x[5] for x in norm]))
+    row_tol = max(6.0, med_h * 0.75)
+
+    norm.sort(key=lambda z: z[4])  # y
+    rows = []
+    for it in norm:
+        placed = False
+        for r in rows:
+            if abs(it[4] - r["yc"]) <= row_tol:
+                r["m"].append(it)
+                r["yc"] = float(np.mean([k[4] for k in r["m"]]))
+                placed = True
+                break
+        if not placed:
+            rows.append({"yc": it[4], "m": [it]})
+
+    rows.sort(key=lambda r: r["yc"])
+
+    lines = []
+    for r in rows:
+        mem = sorted(r["m"], key=lambda z: z[3])  # x
+        line = normalize_text(" ".join(x[1] for x in mem))
+        if line:
+            lines.append(line)
+
+    return normalize_text(" ".join(lines))
+
+def reread_crop_robust(image, bbox, reader, upscale=3.0, pad=22):
    ih, iw = image.shape[:2]
    x1, y1, x2, y2 = bbox
-    x1 = max(0, int(x1 - pad)); y1 = max(0, int(y1 - pad))
-    x2 = min(iw, int(x2 + pad)); y2 = min(ih, int(y2 + pad))
+    x1 = max(0, int(x1 - pad))
+    y1 = max(0, int(y1 - pad))
+    x2 = min(iw, int(x2 + pad))
+    y2 = min(ih, int(y2 + pad))
    crop = image[y1:y2, x1:x2]
    if crop.size == 0:
-        return None
+        return None, 0.0

-    up = cv2.resize(crop, (int(crop.shape[1] * upscale), int(crop.shape[0] * upscale)), interpolation=cv2.INTER_CUBIC)
+    up = cv2.resize(
+        crop,
+        (int(crop.shape[1] * upscale), int(crop.shape[0] * upscale)),
+        interpolation=cv2.INTER_CUBIC
+    )

-    best = None
-    for mode in ("raw", "clahe", "adaptive"):
+    modes = ["raw", "clahe", "adaptive", "otsu", "invert"]
+    angles = [0.0, 1.5, -1.5]
+
+    best_text, best_score = "", 0.0
+
+    for mode in modes:
        proc = preprocess_variant(up, mode)
-        res = run_ocr_on_array(reader, proc)
-        if not res:
-            continue
-        res.sort(key=lambda r: (r[0][0][1], r[0][0][0]))
-        lines = [normalize_text(t) for _, t, _ in res if t.strip()]
-        merged = re.sub(r"\s{2,}", " ", " ".join(lines)).strip()
-        s = ocr_quality_score(merged)
-        if best is None or s > best[0]:
-            best = (s, merged)

-    return best[1] if best else None
+        if len(proc.shape) == 2:
+            proc3 = cv2.cvtColor(proc, cv2.COLOR_GRAY2BGR)
+        else:
+            proc3 = proc
+
+        for a in angles:
+            rot = rotate_image_keep_bounds(proc3, a)
+            if len(rot.shape) == 3:
+                rot_in = cv2.cvtColor(rot, cv2.COLOR_BGR2GRAY)
+            else:
+                rot_in = rot
+
+            res = run_ocr_on_array(reader, rot_in)
+            txt = rebuild_text_from_ocr_result(res)
+            sc = ocr_candidate_score(txt)
+
+            if sc > best_score:
+                best_text, best_score = txt, sc
+
+    if not best_text:
+        return None, 0.0
+    return best_text, best_score


 # ─────────────────────────────────────────────
-#  LINES + YELLOW BOXES
+# LINES + YELLOW BOXES
 # ─────────────────────────────────────────────
 def build_lines_from_indices(indices, ocr):
    if not indices:
@@ -233,14 +359,13 @@ def build_lines_from_indices(indices, ocr):

    return lines

-
 def build_line_boxes_from_indices(indices, ocr):
    """
    Robust yellow-box generation with punctuation attachment:
    - row grouping
    - chunking by x gap
    - attach tiny punctuation/special tokens to nearest chunk
-    - coverage guarantee
+    - token coverage guarantee
    """
    if not indices:
        return []
@@ -264,13 +389,13 @@ def build_line_boxes_from_indices(indices, ocr):
    pad = max(1, int(round(med_h * 0.12)))

    def is_punct_like(t):
-        raw = t.strip()
+        raw = (t or "").strip()
        if raw == "":
            return True
        punct_ratio = sum(1 for c in raw if not c.isalnum()) / max(1, len(raw))
        return punct_ratio >= 0.5 or len(raw) <= 2

-    # 1) rows
+    # 1) row grouping
    items_sorted = sorted(items, key=lambda x: x["yc"])
    rows = []
    for it in items_sorted:
@@ -296,7 +421,7 @@ def build_line_boxes_from_indices(indices, ocr):
            normal = mem
            punct = []

-        # 2) chunk normal tokens
+        # 2) chunk normal by x-gap
        chunks = []
        cur = [normal[0]]
        for t in normal[1:]:
@@ -310,18 +435,17 @@ def build_line_boxes_from_indices(indices, ocr):
                cur = [t]
        chunks.append(cur)

-        # 3) attach punctuation tokens
+        # 3) attach punct tokens to nearest chunk
        for p in punct:
            pb = p["b"]
            pxc, pyc = p["xc"], p["yc"]
-
            best_k = -1
            best_score = 1e18
+
            for k, ch in enumerate(chunks):
                ub = boxes_union_xyxy([x["b"] for x in ch])
                cx = (ub[0] + ub[2]) / 2.0
                cy = (ub[1] + ub[3]) / 2.0
-
                dx = abs(pxc - cx)
                dy = abs(pyc - cy)
                score = dx + 1.8 * dy
@@ -339,22 +463,21 @@ def build_line_boxes_from_indices(indices, ocr):
            else:
                chunks.append([p])

-        # 4) chunk boxes
+        # 4) emit chunk boxes
        for ch in chunks:
            ub = boxes_union_xyxy([x["b"] for x in ch])
            if ub:
                x1, y1, x2, y2 = ub
                out_boxes.append((x1 - pad, y1 - pad, x2 + pad, y2 + pad))

-    # 5) guarantee all tokens included
+    # 5) guarantee every token is inside some yellow box
    token_boxes = [it["b"] for it in items]

    def inside(tb, lb):
        return tb[0] >= lb[0] and tb[1] >= lb[1] and tb[2] <= lb[2] and tb[3] <= lb[3]

    for tb in token_boxes:
-        ok = any(inside(tb, lb) for lb in out_boxes)
-        if not ok:
+        if not any(inside(tb, lb) for lb in out_boxes):
            x1, y1, x2, y2 = tb
            out_boxes.append((x1 - pad, y1 - pad, x2 + pad, y2 + pad))

@@ -366,8 +489,8 @@ def build_line_boxes_from_indices(indices, ocr):
            ix1 = max(b[0], m[0]); iy1 = max(b[1], m[1])
            ix2 = min(b[2], m[2]); iy2 = min(b[3], m[3])
            inter = max(0, ix2 - ix1) * max(0, iy2 - iy1)
-            a1 = max(1, (b[2]-b[0])*(b[3]-b[1]))
-            a2 = max(1, (m[2]-m[0])*(m[3]-m[1]))
+            a1 = max(1, (b[2] - b[0]) * (b[3] - b[1]))
+            a2 = max(1, (m[2] - m[0]) * (m[3] - m[1]))
            iou = inter / float(a1 + a2 - inter) if (a1 + a2 - inter) > 0 else 0.0
            if iou > 0.72:
                merged[i] = boxes_union_xyxy([b, m])
@@ -381,7 +504,7 @@ def build_line_boxes_from_indices(indices, ocr):


 # ─────────────────────────────────────────────
-#  GROUPING
+# GROUPING
 # ─────────────────────────────────────────────
 def auto_gap(image_path, base=18, ref_w=750):
    img = cv2.imread(image_path)
@@ -426,7 +549,13 @@ def group_tokens(ocr, image_shape, gap_px=18, bbox_padding=3):
    for i in range(n):
        groups.setdefault(find(i), []).append(i)

-    sorted_groups = sorted(groups.values(), key=lambda idxs: (min(boxes[i][1] for i in idxs), min(boxes[i][0] for i in idxs)))
+    sorted_groups = sorted(
+        groups.values(),
+        key=lambda idxs: (
+            min(boxes[i][1] for i in idxs),
+            min(boxes[i][0] for i in idxs)
+        )
+    )

    bubbles = {}
    bubble_boxes = {}
@@ -436,6 +565,7 @@ def group_tokens(ocr, image_shape, gap_px=18, bbox_padding=3):
    ih, iw = image_shape[:2]
    for bid, idxs in enumerate(sorted_groups, start=1):
        idxs = sorted(idxs, key=lambda k: boxes[k][1])
+
        lines = build_lines_from_indices(idxs, ocr)
        quads = [ocr[k][0] for k in idxs]
        ub = boxes_union_xyxy([quad_bbox(q) for q in quads])
@@ -443,8 +573,10 @@ def group_tokens(ocr, image_shape, gap_px=18, bbox_padding=3):
            continue

        x1, y1, x2, y2 = ub
-        x1 = max(0, x1 - bbox_padding); y1 = max(0, y1 - bbox_padding)
-        x2 = min(iw, x2 + bbox_padding); y2 = min(ih, y2 + bbox_padding)
+        x1 = max(0, x1 - bbox_padding)
+        y1 = max(0, y1 - bbox_padding)
+        x2 = min(iw, x2 + bbox_padding)
+        y2 = min(ih, y2 + bbox_padding)

        bubbles[bid] = lines
        bubble_boxes[bid] = (x1, y1, x2, y2)
@@ -455,23 +587,24 @@ def group_tokens(ocr, image_shape, gap_px=18, bbox_padding=3):


 # ─────────────────────────────────────────────
-#  DEBUG
+# DEBUG
 # ─────────────────────────────────────────────
 def save_debug_clusters(image_path, ocr, bubble_boxes, bubble_indices, out_path="debug_clusters.png"):
    img = cv2.imread(image_path)
    if img is None:
        return

-    # token quads
+    # OCR token quads
    for bbox, txt, conf in ocr:
        pts = np.array(bbox, dtype=np.int32)
        cv2.polylines(img, [pts], True, (180, 180, 180), 1)

-    # bubble boxes + yellow line boxes
+    # Bubble + line boxes
    for bid, bb in bubble_boxes.items():
        x1, y1, x2, y2 = bb
        cv2.rectangle(img, (x1, y1), (x2, y2), (0, 220, 0), 2)
-        cv2.putText(img, f"BOX#{bid}", (x1 + 2, y1 + 16), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 220, 0), 2)
+        cv2.putText(img, f"BOX#{bid}", (x1 + 2, y1 + 16),
+                    cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 220, 0), 2)

        idxs = bubble_indices.get(bid, [])
        line_boxes = build_line_boxes_from_indices(idxs, ocr)
@@ -485,7 +618,7 @@ def save_debug_clusters(image_path, ocr, bubble_boxes, bubble_indices, out_path=


 # ─────────────────────────────────────────────
-#  EXPORT
+# EXPORT
 # ─────────────────────────────────────────────
 def estimate_reading_order(bbox_dict, mode="ltr"):
    items = []
@@ -494,7 +627,7 @@ def estimate_reading_order(bbox_dict, mode="ltr"):
        cy = (y1 + y2) / 2.0
        items.append((bid, cx, cy))

-    items.sort(key=lambda t: t[2])
+    items.sort(key=lambda t: t[2])  # top to bottom

    rows = []
    tol = 90
@@ -517,7 +650,6 @@ def estimate_reading_order(bbox_dict, mode="ltr"):

    return {bid: i + 1 for i, bid in enumerate(order)}

-
 def export_bubbles(filepath, bbox_dict, quads_dict, indices_dict, ocr, reading_map, image_shape):
    out = {}

@@ -536,11 +668,15 @@ def export_bubbles(filepath, bbox_dict, quads_dict, indices_dict, ocr, reading_m
        out[str(bid)] = {
            "x": int(x1), "y": int(y1), "w": int(x2 - x1), "h": int(y2 - y1),
            "reading_order": int(reading_map.get(bid, bid)),
-            "quad_bboxes": [{"x": int(b[0]), "y": int(b[1]), "w": int(b[2]-b[0]), "h": int(b[3]-b[1])} for b in qboxes],
-            "quads": [[[int(p[0]), int(p[1])] for p in q] for q in quads],
+            "quad_bboxes": [
+                {"x": int(b[0]), "y": int(b[1]), "w": int(b[2] - b[0]), "h": int(b[3] - b[1])}
+                for b in qboxes
+            ],
+            "quads": [
+                [[int(p[0]), int(p[1])] for p in q] for q in quads
+            ],
            "text_bbox": xyxy_to_xywh(text_union),

-            # yellow geometry
            "line_bboxes": [xyxy_to_xywh(lb) for lb in line_boxes_xyxy],
            "line_union_bbox": xyxy_to_xywh(line_union_xyxy) if line_union_xyxy else None,
            "line_union_area": int(line_union_area),
@@ -551,7 +687,7 @@ def export_bubbles(filepath, bbox_dict, quads_dict, indices_dict, ocr, reading_m


 # ─────────────────────────────────────────────
-#  MAIN
+# MAIN
 # ─────────────────────────────────────────────
 def translate_manga_text(
    image_path,
@@ -606,6 +742,7 @@ def translate_manga_text(
            skipped += 1
            continue

+        # reduce false positives in very top strip
        if qb[1] < int(ih * TOP_BAND_RATIO):
            if conf < 0.70 and len(t) >= 5:
                skipped += 1
@@ -633,14 +770,28 @@ def translate_manga_text(

    translator = GoogleTranslator(source=source_lang, target=target_lang)

+    # robust bubble text cleanup
    clean_lines = {}
    for bid, lines in bubbles.items():
-        txt = normalize_text(" ".join(lines))
-        q = ocr_quality_score(txt)
-        if q < quality_threshold:
-            reread = reread_crop(image, bubble_boxes[bid], reader, upscale=2.5, pad=18)
-            if reread:
-                txt = normalize_text(reread)
+        base_txt = normalize_text(" ".join(lines))
+        base_sc = ocr_candidate_score(base_txt)
+
+        # only robust reread on low quality
+        if base_sc < quality_threshold:
+            rr_txt, rr_sc = reread_crop_robust(
+                image,
+                bubble_boxes[bid],
+                reader,
+                upscale=3.0,
+                pad=22
+            )
+            if rr_txt and rr_sc > base_sc + 0.06:
+                txt = rr_txt
+            else:
+                txt = base_txt
+        else:
+            txt = base_txt
+
        clean_lines[bid] = apply_glossary(txt)

    reading_map = estimate_reading_order(bubble_boxes, mode=reading_mode)
@@ -657,18 +808,24 @@ def translate_manga_text(
        src = clean_lines[bid].strip()
        if not src:
            continue
-        flags = []

+        flags = []
        try:
            tgt = translator.translate(src) or ""
        except Exception as e:
            tgt = f"[Translation error: {e}]"
+            flags.append("TRANSLATION_ERROR")

        tgt = apply_glossary(postprocess_translation_general(tgt)).upper()
        src_u = src.upper()

-        out_lines.append(f"#{bid}|{reading_map.get(bid,bid)}|{src_u}|{tgt}|{','.join(flags) if flags else '-'}")
-        print(f"#{bid:<7} {reading_map.get(bid,bid):<6} {src_u[:50]:<50} {tgt[:50]:<50} {','.join(flags) if flags else '-'}")
+        out_lines.append(
+            f"#{bid}|{reading_map.get(bid,bid)}|{src_u}|{tgt}|{','.join(flags) if flags else '-'}"
+        )
+        print(
+            f"#{bid:<7} {reading_map.get(bid,bid):<6} "
+            f"{src_u[:50]:<50} {tgt[:50]:<50} {','.join(flags) if flags else '-'}"
+        )
        translated_count += 1

    out_lines.append(divider)
@@ -691,13 +848,13 @@ def translate_manga_text(
    print(f"Saved: {export_to_file}")
    print(f"Saved: {export_bubbles_to}")
    if debug:
-        print("Saved: debug_clusters.png (special chars included in yellow boxes)")
+        print("Saved: debug_clusters.png")


 if __name__ == "__main__":
    translate_manga_text(
-        image_path="002-page.png",
-        source_lang="en",
+        image_path="001-page.png",
+        source_lang="it",
        target_lang="ca",
        confidence_threshold=0.12,
        min_text_length=1,
--- a/pipeline.py
+++ b/pipeline.py
@@ -2,60 +2,76 @@
 """
 pipeline.py
 ───────────────────────────────────────────────────────────────
-Translation-only pipeline for Dandadan_059_2022_Digital
+Translation + render pipeline

 Flow per page:
-  1. Run translate_manga_text()  → output.txt + bubbles.json
-  2. Copy original image to workdir for reference
+  1) translate_manga_text() -> output.txt + bubbles.json (+ debug_clusters.png if DEBUG)
+  2) render_translations()  -> page_translated.png
+  3) Pack CBZ with originals + rendered pages + text outputs

-Folder structure produced:
-  Dandadan_059_2022_Digital_1r0n/
+Folder structure:
+  <CHAPTER_DIR>/
+  ├── 000.png
+  ├── 001.png
  └── translated/
-      ├── 00/
-      │   ├── output.txt            ← translations to review
-      │   ├── bubbles.json          ← bubble boxes
-      │   └── debug_clusters.png    ← cluster debug (if DEBUG=True)
-      ├── 01/
+      ├── 000/
+      │   ├── output.txt
+      │   ├── bubbles.json
+      │   ├── page_translated.png
+      │   └── debug_clusters.png (optional)
+      ├── 001/
      │   └── ...
      └── ...

-  Dandadan_059_translated.cbz       ← original pages + translations
-                                       zipped for reference
+CBZ:
+  - pages/<original pages>
+  - rendered/<page_stem>_translated.png
+  - translations/<page_stem>_output.txt
 """

 import os
 import sys
-import shutil
 import zipfile
 import importlib.util
 from pathlib import Path


 # ─────────────────────────────────────────────
-#  CONFIG  — edit these as needed
+#  CONFIG
 # ─────────────────────────────────────────────
-CHAPTER_DIR  = "/Users/guillemhernandezsola/Downloads/Spy_x_Family_076_2023_Digital_1r0n"
-OUTPUT_CBZ   = "/Users/guillemhernandezsola/Downloads/Spy_x_Family_076_2023_Digital_1r0n_translated.cbz"
-SOURCE_LANG  = "en"
-TARGET_LANG  = "ca"
+CHAPTER_DIR = "/Users/guillemhernandezsola/Downloads/Dandadan_059_2022_Digital_1r0n"
+OUTPUT_CBZ  = "/Users/guillemhernandezsola/Downloads/Dandadan_059_2022_Digital_1r0n_translated.cbz"

-# manga-translator.py settings
+SOURCE_LANG = "en"
+TARGET_LANG = "ca"
+
+# translator (NEW signature-compatible)
 CONFIDENCE_THRESHOLD = 0.10
-MIN_TEXT_LENGTH      = 2
-CLUSTER_EPS          = "auto"
-PROXIMITY_PX         = 80
+MIN_TEXT_LENGTH      = 1
+GAP_PX               = "auto"      # was cluster/proximity in old version
 FILTER_SFX           = True
-QUALITY_THRESHOLD    = 0.5
-UPSCALE_FACTOR       = 2.5
-BBOX_PADDING         = 5
+QUALITY_THRESHOLD    = 0.50
+READING_MODE         = "ltr"
 DEBUG                = True

+# renderer
+RENDER_ENABLED       = True
+RENDER_OUTPUT_NAME   = "page_translated.png"
+
+# optional custom font list for renderer
+FONT_CANDIDATES = [
+    "fonts/ComicNeue-Regular.ttf",
+    "fonts/ComicRelief-Regular.ttf"
+]
+

 # ─────────────────────────────────────────────
 #  DYNAMIC MODULE LOADER
 # ─────────────────────────────────────────────
 def load_module(name, filepath):
-    spec   = importlib.util.spec_from_file_location(name, filepath)
+    spec = importlib.util.spec_from_file_location(name, filepath)
+    if spec is None or spec.loader is None:
+        raise FileNotFoundError(f"Cannot load spec for {filepath}")
    module = importlib.util.module_from_spec(spec)
    spec.loader.exec_module(module)
    return module
@@ -65,10 +81,10 @@ def load_module(name, filepath):
 #  HELPERS
 # ─────────────────────────────────────────────
 def sorted_pages(chapter_dir):
-    exts  = {".jpg", ".jpeg", ".png", ".webp"}
+    exts = {".jpg", ".jpeg", ".png", ".webp"}
    pages = [
        p for p in Path(chapter_dir).iterdir()
-        if p.suffix.lower() in exts
+        if p.is_file() and p.suffix.lower() in exts
    ]
    return sorted(pages, key=lambda p: p.stem)

@@ -80,82 +96,97 @@ def make_page_workdir(chapter_dir, page_stem):


 def pack_cbz(chapter_dir, translated_dir, output_cbz):
-    """
-    Packs into CBZ:
-      - All original pages  (from chapter_dir root)
-      - All output.txt      (one per page subfolder)
-    Sorted by page stem for correct reading order.
-    """
-    exts   = {".jpg", ".jpeg", ".png", ".webp"}
-    pages  = sorted(
+    exts = {".jpg", ".jpeg", ".png", ".webp"}
+
+    pages = sorted(
        [p for p in Path(chapter_dir).iterdir()
-         if p.suffix.lower() in exts],
+         if p.is_file() and p.suffix.lower() in exts],
        key=lambda p: p.stem
    )
-    txts   = sorted(
+
+    txts = sorted(
        translated_dir.rglob("output.txt"),
        key=lambda p: p.parent.name
    )

+    rendered = sorted(
+        translated_dir.rglob(RENDER_OUTPUT_NAME),
+        key=lambda p: p.parent.name
+    )
+
    if not pages:
        print("⚠️  No original pages found — CBZ not created.")
        return

-    with zipfile.ZipFile(output_cbz, "w",
-                         compression=zipfile.ZIP_STORED) as zf:
-        # Original pages
+    with zipfile.ZipFile(output_cbz, "w", compression=zipfile.ZIP_STORED) as zf:
+        # original pages
        for img in pages:
            arcname = f"pages/{img.name}"
            zf.write(img, arcname)
            print(f"  🖼  {arcname}")

-        # Translation text files
+        # rendered pages
+        for rp in rendered:
+            arcname = f"rendered/{rp.parent.name}_translated.png"
+            zf.write(rp, arcname)
+            print(f"  🎨 {arcname}")
+
+        # text outputs
        for txt in txts:
            arcname = f"translations/{txt.parent.name}_output.txt"
            zf.write(txt, arcname)
            print(f"  📄 {arcname}")

-    print(f"\n✅ CBZ saved → {output_cbz}  "
-          f"({len(pages)} page(s), {len(txts)} translation(s))")
+    print(
+        f"\n✅ CBZ saved → {output_cbz} "
+        f"({len(pages)} original, {len(rendered)} rendered, {len(txts)} text)"
+    )


 # ─────────────────────────────────────────────
 #  PER-PAGE PIPELINE
 # ─────────────────────────────────────────────
-def process_page(page_path, workdir, translator_module):
+def process_page(page_path, workdir, translator_module, renderer_module):
    """
-    Runs translator for a single page.
-    All output files land in workdir.
-    Returns True on success, False on failure.
+    Runs translator + renderer for one page.
+    All generated files are written inside workdir.
    """
-    print(f"\n{'─'*60}")
-    print(f"  PAGE: {page_path.name}")
-    print(f"{'─'*60}")
+    print(f"\n{'─' * 70}")
+    print(f"PAGE: {page_path.name}")
+    print(f"{'─' * 70}")

    orig_dir = os.getcwd()
    try:
-        # chdir into workdir so debug_clusters.png,
-        # temp files etc. all land there
        os.chdir(workdir)

+        # 1) translate
        translator_module.translate_manga_text(
-            image_path           = str(page_path.resolve()),
-            source_lang          = SOURCE_LANG,
-            target_lang          = TARGET_LANG,
-            confidence_threshold = CONFIDENCE_THRESHOLD,
-            export_to_file       = "output.txt",
-            export_bubbles_to    = "bubbles.json",
-            min_text_length      = MIN_TEXT_LENGTH,
-            cluster_eps          = CLUSTER_EPS,
-            proximity_px         = PROXIMITY_PX,
-            filter_sound_effects = FILTER_SFX,
-            quality_threshold    = QUALITY_THRESHOLD,
-            upscale_factor       = UPSCALE_FACTOR,
-            bbox_padding         = BBOX_PADDING,
-            debug                = DEBUG,
+            image_path= str(page_path.resolve()),
+            source_lang=SOURCE_LANG,
+            target_lang=TARGET_LANG,
+            confidence_threshold=CONFIDENCE_THRESHOLD,
+            min_text_length=MIN_TEXT_LENGTH,
+            gap_px=GAP_PX,
+            filter_sound_effects=FILTER_SFX,
+            quality_threshold=QUALITY_THRESHOLD,
+            export_to_file="output.txt",
+            export_bubbles_to="bubbles.json",
+            reading_mode=READING_MODE,
+            debug=DEBUG
        )
+        print("  ✅ translator done")
+
+        # 2) render
+        if RENDER_ENABLED:
+            renderer_module.render_translations(
+                input_image=str(page_path.resolve()),
+                output_image=RENDER_OUTPUT_NAME,
+                translations_file="output.txt",
+                bubbles_file="bubbles.json",
+                font_candidates=FONT_CANDIDATES
+            )
+            print("  ✅ renderer done")

-        print(f"  ✅ Translated → {workdir}")
        return True

    except Exception as e:
@@ -170,16 +201,20 @@ def process_page(page_path, workdir, translator_module):
 #  MAIN
 # ─────────────────────────────────────────────
 def main():
-    # ── Load translator module ────────────────────────────────────
-    print("Loading manga-translator.py...")
+    print("Loading modules...")
+
    try:
-        translator = load_module(
-            "manga_translator", "manga-translator.py")
-    except FileNotFoundError as e:
-        print(f"❌ Could not load module: {e}")
+        translator = load_module("manga_translator", "manga-translator.py")
+    except Exception as e:
+        print(f"❌ Could not load manga-translator.py: {e}")
+        sys.exit(1)
+
+    try:
+        renderer = load_module("manga_renderer", "manga-renderer.py")
+    except Exception as e:
+        print(f"❌ Could not load manga-renderer.py: {e}")
        sys.exit(1)

-    # ── Discover pages ────────────────────────────────────────────
    pages = sorted_pages(CHAPTER_DIR)
    if not pages:
        print(f"❌ No images found in: {CHAPTER_DIR}")
@@ -187,33 +222,31 @@ def main():

    print(f"\n📖 Chapter : {CHAPTER_DIR}")
    print(f"   Pages   : {len(pages)}")
-    print(f"   Source  : {SOURCE_LANG}  →  Target: {TARGET_LANG}\n")
+    print(f"   Source  : {SOURCE_LANG} → Target: {TARGET_LANG}")
+    print(f"   Render  : {'ON' if RENDER_ENABLED else 'OFF'}\n")

-    # ── Process each page ─────────────────────────────────────────
    translated_dir = Path(CHAPTER_DIR) / "translated"
    succeeded = []
-    failed    = []
+    failed = []

    for i, page_path in enumerate(pages, start=1):
-        print(f"\n[{i}/{len(pages)}] {page_path.name}")
+        print(f"[{i}/{len(pages)}] {page_path.name}")
        workdir = make_page_workdir(CHAPTER_DIR, page_path.stem)
-        ok      = process_page(page_path, workdir, translator)
+        ok = process_page(page_path, workdir, translator, renderer)
        if ok:
            succeeded.append(page_path.name)
        else:
            failed.append(page_path.name)

-    # ── Summary ───────────────────────────────────────────────────
-    print(f"\n{'═'*60}")
-    print(f"  PIPELINE COMPLETE")
-    print(f"  ✅ {len(succeeded)} page(s) succeeded")
+    print(f"\n{'═' * 70}")
+    print("PIPELINE COMPLETE")
+    print(f"✅ {len(succeeded)} page(s) succeeded")
    if failed:
-        print(f"  ❌ {len(failed)} page(s) failed:")
+        print(f"❌ {len(failed)} page(s) failed:")
        for f in failed:
-            print(f"     • {f}")
-    print(f"{'═'*60}\n")
+            print(f"   • {f}")
+    print(f"{'═' * 70}\n")

-    # ── Pack CBZ ──────────────────────────────────────────────────
    print("Packing CBZ...")
    pack_cbz(CHAPTER_DIR, translated_dir, OUTPUT_CBZ)