Added some fixes

2026-04-14 20:08:51 +02:00
parent 0069da706b
commit f95b7d32d4
5 changed files with 359 additions and 171 deletions
--- a/fonts/ComicNeue-Regular.ttf
+++ b/fonts/ComicNeue-Regular.ttf
--- a/fonts/ComicRelief-Bold.ttf
+++ b/fonts/ComicRelief-Bold.ttf
--- a/manga-renderer.py
+++ b/manga-renderer.py
@@ -10,10 +10,8 @@ from PIL import Image, ImageDraw, ImageFont
 #  CONFIG
 # ─────────────────────────────────────────────
 DEFAULT_FONT_CANDIDATES = [
    "fonts/AnimeAce2_reg.ttf",
    "fonts/WildWordsRoman.ttf",
    "fonts/ComicRelief-Regular.ttf",
-    "fonts/NotoSans-Regular.ttf",
+    "fonts/ComicNeue-Regular.ttf",
 ]
 DEFAULT_FONT_COLOR = (0, 0, 0)
 DEFAULT_STROKE_COLOR = (255, 255, 255)
@@ -501,7 +499,7 @@ def render_translations(
 if __name__ == "__main__":
    render_translations(
-        input_image="002-page.png",
+        input_image="001-page.png",
        output_image="page_translated.png",
        translations_file="output.txt",
        bubbles_file="bubbles.json",
--- a/manga-translator.py
+++ b/manga-translator.py
@@ -1,3 +1,6 @@
 #!/usr/bin/env python3
 # -*- coding: utf-8 -*-
 import re
 import os
 import json
@@ -8,7 +11,7 @@ from deep_translator import GoogleTranslator
 # ─────────────────────────────────────────────
-#  CONFIG
+# CONFIG
 # ─────────────────────────────────────────────
 GLOSSARY = {
    "ANYA": "ANYA",
@@ -32,17 +35,17 @@ TITLE_PATTERNS = [
 NOISE_PATTERNS = [
    r"^[^a-zA-Z0-9\?!.]+$",
-    r"^BOX[0-9A-Z]*$",
+    r"^BOX[0-9A-Z#\s]*$",
 ]
 TOP_BAND_RATIO = 0.08
 # ─────────────────────────────────────────────
-#  TEXT HELPERS
+# TEXT HELPERS
 # ─────────────────────────────────────────────
-def normalize_text(text):
+def normalize_text(text: str) -> str:
-    t = text.strip().upper()
+    t = (text or "").strip().upper()
    t = t.replace("“", "\"").replace("”", "\"")
    t = t.replace("’", "'").replace("‘", "'")
    t = t.replace("…", "...")
@@ -54,13 +57,13 @@ def normalize_text(text):
    t = re.sub(r",\?", "?", t)
    return t.strip()
-def apply_glossary(text):
+def apply_glossary(text: str) -> str:
-    out = text
+    out = text or ""
    for k in sorted(GLOSSARY.keys(), key=len, reverse=True):
        out = re.sub(rf"\b{re.escape(k)}\b", GLOSSARY[k], out, flags=re.IGNORECASE)
    return out
-def postprocess_translation_general(text):
+def postprocess_translation_general(text: str) -> str:
    t = normalize_text(text)
    t = re.sub(r"\s{2,}", " ", t).strip()
    t = re.sub(r"([!?]){3,}", r"\1\1", t)
@@ -69,23 +72,23 @@ def postprocess_translation_general(text):
 # ─────────────────────────────────────────────
-#  FILTERS
+# FILTERS
 # ─────────────────────────────────────────────
-def is_sound_effect(text):
+def is_sound_effect(text: str) -> bool:
-    cleaned = re.sub(r"[^a-z]", "", text.strip().lower())
+    cleaned = re.sub(r"[^a-z]", "", (text or "").strip().lower())
    return any(re.fullmatch(p, cleaned, re.IGNORECASE) for p in SOUND_EFFECT_PATTERNS)
-def is_title_text(text):
+def is_title_text(text: str) -> bool:
-    t = text.strip().lower()
+    t = (text or "").strip().lower()
    return any(re.fullmatch(p, t, re.IGNORECASE) for p in TITLE_PATTERNS)
-def is_noise_text(text):
+def is_noise_text(text: str) -> bool:
-    t = text.strip()
+    t = (text or "").strip()
    return any(re.fullmatch(p, t) for p in NOISE_PATTERNS)
 # ─────────────────────────────────────────────
-#  GEOMETRY
+# GEOMETRY
 # ─────────────────────────────────────────────
 def quad_bbox(quad):
    xs = [p[0] for p in quad]
@@ -127,9 +130,9 @@ def overlap_or_near(a, b, gap=0):
 # ─────────────────────────────────────────────
-#  QUALITY
+# QUALITY / SCORING
 # ─────────────────────────────────────────────
-def ocr_quality_score(text):
+def ocr_quality_score(text: str) -> float:
    if not text or len(text) < 2:
        return 0.0
    alpha_ratio = sum(1 for c in text if c.isalpha()) / max(1, len(text))
@@ -141,21 +144,75 @@ def ocr_quality_score(text):
    bonus = 0.05 if re.search(r"[.!?]$", text) else 0.0
    return max(0.0, min(1.0, alpha_ratio - penalty + bonus))
 def ocr_candidate_score(text: str) -> float:
    if not text:
        return 0.0
    t = text.strip()
    n = len(t)
    if n == 0:
        return 0.0
    alpha = sum(c.isalpha() for c in t) / n
    spaces = sum(c.isspace() for c in t) / n
    punct_ok = sum(c in ".,!?'-:;()[]\"" for c in t) / n
    bad = len(re.findall(r"[^\w\s\.\,\!\?\-\'\:\;\(\)\[\]\"]", t)) / n
    penalty = 0.0
    if re.search(r"\b[A-Z]\b", t):
        penalty += 0.05
    if re.search(r"[0-9]{2,}", t):
        penalty += 0.08
    if re.search(r"(..)\1\1", t):
        penalty += 0.08
    score = (0.62 * alpha) + (0.10 * spaces) + (0.20 * punct_ok) - (0.45 * bad) - penalty
    return max(0.0, min(1.0, score))
 # ─────────────────────────────────────────────
-#  OCR RE-READ
+# OCR MULTI-PASS
 # ─────────────────────────────────────────────
 def preprocess_variant(crop_bgr, mode):
    gray = cv2.cvtColor(crop_bgr, cv2.COLOR_BGR2GRAY)
    if mode == "raw":
        return gray
    if mode == "clahe":
        return cv2.createCLAHE(clipLimit=2.0, tileGridSize=(8, 8)).apply(gray)
    if mode == "adaptive":
        den = cv2.GaussianBlur(gray, (3, 3), 0)
-        return cv2.adaptiveThreshold(den, 255, cv2.ADAPTIVE_THRESH_GAUSSIAN_C, cv2.THRESH_BINARY, 35, 11)
+        return cv2.adaptiveThreshold(
            den, 255, cv2.ADAPTIVE_THRESH_GAUSSIAN_C,
            cv2.THRESH_BINARY, 35, 11
        )
    if mode == "otsu":
        den = cv2.GaussianBlur(gray, (3, 3), 0)
        _, th = cv2.threshold(den, 0, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU)
        return th
    if mode == "invert":
        return 255 - gray
    return gray
 def rotate_image_keep_bounds(img, angle_deg):
    h, w = img.shape[:2]
    c = (w / 2, h / 2)
    M = cv2.getRotationMatrix2D(c, angle_deg, 1.0)
    cos = abs(M[0, 0])
    sin = abs(M[0, 1])
    new_w = int((h * sin) + (w * cos))
    new_h = int((h * cos) + (w * sin))
    M[0, 2] += (new_w / 2) - c[0]
    M[1, 2] += (new_h / 2) - c[1]
    return cv2.warpAffine(img, M, (new_w, new_h), flags=cv2.INTER_CUBIC, borderValue=255)
 def run_ocr_on_array(reader, arr):
    tmp = "_tmp_ocr.png"
    cv2.imwrite(tmp, arr)
@@ -165,35 +222,104 @@ def run_ocr_on_array(reader, arr):
        if os.path.exists(tmp):
            os.remove(tmp)
-def reread_crop(image, bbox, reader, upscale=2.5, pad=18):
+def rebuild_text_from_ocr_result(res):
    if not res:
        return ""
    norm = []
    for item in res:
        if len(item) != 3:
            continue
        bbox, txt, conf = item
        if not txt or not txt.strip():
            continue
        b = quad_bbox(bbox)
        xc = (b[0] + b[2]) / 2.0
        yc = (b[1] + b[3]) / 2.0
        h = max(1.0, b[3] - b[1])
        norm.append((b, txt, conf, xc, yc, h))
    if not norm:
        return ""
    med_h = float(np.median([x[5] for x in norm]))
    row_tol = max(6.0, med_h * 0.75)
    norm.sort(key=lambda z: z[4])  # y
    rows = []
    for it in norm:
        placed = False
        for r in rows:
            if abs(it[4] - r["yc"]) <= row_tol:
                r["m"].append(it)
                r["yc"] = float(np.mean([k[4] for k in r["m"]]))
                placed = True
                break
        if not placed:
            rows.append({"yc": it[4], "m": [it]})
    rows.sort(key=lambda r: r["yc"])
    lines = []
    for r in rows:
        mem = sorted(r["m"], key=lambda z: z[3])  # x
        line = normalize_text(" ".join(x[1] for x in mem))
        if line:
            lines.append(line)
    return normalize_text(" ".join(lines))
 def reread_crop_robust(image, bbox, reader, upscale=3.0, pad=22):
    ih, iw = image.shape[:2]
    x1, y1, x2, y2 = bbox
-    x1 = max(0, int(x1 - pad)); y1 = max(0, int(y1 - pad))
+    x1 = max(0, int(x1 - pad))
-    x2 = min(iw, int(x2 + pad)); y2 = min(ih, int(y2 + pad))
+    y1 = max(0, int(y1 - pad))
    x2 = min(iw, int(x2 + pad))
    y2 = min(ih, int(y2 + pad))
    crop = image[y1:y2, x1:x2]
    if crop.size == 0:
-        return None
+        return None, 0.0
-    up = cv2.resize(crop, (int(crop.shape[1] * upscale), int(crop.shape[0] * upscale)), interpolation=cv2.INTER_CUBIC)
+    up = cv2.resize(
        crop,
        (int(crop.shape[1] * upscale), int(crop.shape[0] * upscale)),
        interpolation=cv2.INTER_CUBIC
    )
-    best = None
+    modes = ["raw", "clahe", "adaptive", "otsu", "invert"]
-    for mode in ("raw", "clahe", "adaptive"):
+    angles = [0.0, 1.5, -1.5]
    best_text, best_score = "", 0.0
    for mode in modes:
        proc = preprocess_variant(up, mode)
        res = run_ocr_on_array(reader, proc)
        if not res:
            continue
        res.sort(key=lambda r: (r[0][0][1], r[0][0][0]))
        lines = [normalize_text(t) for _, t, _ in res if t.strip()]
        merged = re.sub(r"\s{2,}", " ", " ".join(lines)).strip()
        s = ocr_quality_score(merged)
        if best is None or s > best[0]:
            best = (s, merged)
-    return best[1] if best else None
+        if len(proc.shape) == 2:
            proc3 = cv2.cvtColor(proc, cv2.COLOR_GRAY2BGR)
        else:
            proc3 = proc
        for a in angles:
            rot = rotate_image_keep_bounds(proc3, a)
            if len(rot.shape) == 3:
                rot_in = cv2.cvtColor(rot, cv2.COLOR_BGR2GRAY)
            else:
                rot_in = rot
            res = run_ocr_on_array(reader, rot_in)
            txt = rebuild_text_from_ocr_result(res)
            sc = ocr_candidate_score(txt)
            if sc > best_score:
                best_text, best_score = txt, sc
    if not best_text:
        return None, 0.0
    return best_text, best_score
 # ─────────────────────────────────────────────
-#  LINES + YELLOW BOXES
+# LINES + YELLOW BOXES
 # ─────────────────────────────────────────────
 def build_lines_from_indices(indices, ocr):
    if not indices:
@@ -233,14 +359,13 @@ def build_lines_from_indices(indices, ocr):
    return lines
 def build_line_boxes_from_indices(indices, ocr):
    """
    Robust yellow-box generation with punctuation attachment:
    - row grouping
    - chunking by x gap
    - attach tiny punctuation/special tokens to nearest chunk
-    - coverage guarantee
+    - token coverage guarantee
    """
    if not indices:
        return []
@@ -264,13 +389,13 @@ def build_line_boxes_from_indices(indices, ocr):
    pad = max(1, int(round(med_h * 0.12)))
    def is_punct_like(t):
-        raw = t.strip()
+        raw = (t or "").strip()
        if raw == "":
            return True
        punct_ratio = sum(1 for c in raw if not c.isalnum()) / max(1, len(raw))
        return punct_ratio >= 0.5 or len(raw) <= 2
-    # 1) rows
+    # 1) row grouping
    items_sorted = sorted(items, key=lambda x: x["yc"])
    rows = []
    for it in items_sorted:
@@ -296,7 +421,7 @@ def build_line_boxes_from_indices(indices, ocr):
            normal = mem
            punct = []
-        # 2) chunk normal tokens
+        # 2) chunk normal by x-gap
        chunks = []
        cur = [normal[0]]
        for t in normal[1:]:
@@ -310,18 +435,17 @@ def build_line_boxes_from_indices(indices, ocr):
                cur = [t]
        chunks.append(cur)
-        # 3) attach punctuation tokens
+        # 3) attach punct tokens to nearest chunk
        for p in punct:
            pb = p["b"]
            pxc, pyc = p["xc"], p["yc"]
            best_k = -1
            best_score = 1e18
            for k, ch in enumerate(chunks):
                ub = boxes_union_xyxy([x["b"] for x in ch])
                cx = (ub[0] + ub[2]) / 2.0
                cy = (ub[1] + ub[3]) / 2.0
                dx = abs(pxc - cx)
                dy = abs(pyc - cy)
                score = dx + 1.8 * dy
@@ -339,22 +463,21 @@ def build_line_boxes_from_indices(indices, ocr):
            else:
                chunks.append([p])
-        # 4) chunk boxes
+        # 4) emit chunk boxes
        for ch in chunks:
            ub = boxes_union_xyxy([x["b"] for x in ch])
            if ub:
                x1, y1, x2, y2 = ub
                out_boxes.append((x1 - pad, y1 - pad, x2 + pad, y2 + pad))
-    # 5) guarantee all tokens included
+    # 5) guarantee every token is inside some yellow box
    token_boxes = [it["b"] for it in items]
    def inside(tb, lb):
        return tb[0] >= lb[0] and tb[1] >= lb[1] and tb[2] <= lb[2] and tb[3] <= lb[3]
    for tb in token_boxes:
-        ok = any(inside(tb, lb) for lb in out_boxes)
+        if not any(inside(tb, lb) for lb in out_boxes):
        if not ok:
            x1, y1, x2, y2 = tb
            out_boxes.append((x1 - pad, y1 - pad, x2 + pad, y2 + pad))
@@ -366,8 +489,8 @@ def build_line_boxes_from_indices(indices, ocr):
            ix1 = max(b[0], m[0]); iy1 = max(b[1], m[1])
            ix2 = min(b[2], m[2]); iy2 = min(b[3], m[3])
            inter = max(0, ix2 - ix1) * max(0, iy2 - iy1)
-            a1 = max(1, (b[2]-b[0])*(b[3]-b[1]))
+            a1 = max(1, (b[2] - b[0]) * (b[3] - b[1]))
-            a2 = max(1, (m[2]-m[0])*(m[3]-m[1]))
+            a2 = max(1, (m[2] - m[0]) * (m[3] - m[1]))
            iou = inter / float(a1 + a2 - inter) if (a1 + a2 - inter) > 0 else 0.0
            if iou > 0.72:
                merged[i] = boxes_union_xyxy([b, m])
@@ -381,7 +504,7 @@ def build_line_boxes_from_indices(indices, ocr):
 # ─────────────────────────────────────────────
-#  GROUPING
+# GROUPING
 # ─────────────────────────────────────────────
 def auto_gap(image_path, base=18, ref_w=750):
    img = cv2.imread(image_path)
@@ -426,7 +549,13 @@ def group_tokens(ocr, image_shape, gap_px=18, bbox_padding=3):
    for i in range(n):
        groups.setdefault(find(i), []).append(i)
-    sorted_groups = sorted(groups.values(), key=lambda idxs: (min(boxes[i][1] for i in idxs), min(boxes[i][0] for i in idxs)))
+    sorted_groups = sorted(
        groups.values(),
        key=lambda idxs: (
            min(boxes[i][1] for i in idxs),
            min(boxes[i][0] for i in idxs)
        )
    )
    bubbles = {}
    bubble_boxes = {}
@@ -436,6 +565,7 @@ def group_tokens(ocr, image_shape, gap_px=18, bbox_padding=3):
    ih, iw = image_shape[:2]
    for bid, idxs in enumerate(sorted_groups, start=1):
        idxs = sorted(idxs, key=lambda k: boxes[k][1])
        lines = build_lines_from_indices(idxs, ocr)
        quads = [ocr[k][0] for k in idxs]
        ub = boxes_union_xyxy([quad_bbox(q) for q in quads])
@@ -443,8 +573,10 @@ def group_tokens(ocr, image_shape, gap_px=18, bbox_padding=3):
            continue
        x1, y1, x2, y2 = ub
-        x1 = max(0, x1 - bbox_padding); y1 = max(0, y1 - bbox_padding)
+        x1 = max(0, x1 - bbox_padding)
-        x2 = min(iw, x2 + bbox_padding); y2 = min(ih, y2 + bbox_padding)
+        y1 = max(0, y1 - bbox_padding)
        x2 = min(iw, x2 + bbox_padding)
        y2 = min(ih, y2 + bbox_padding)
        bubbles[bid] = lines
        bubble_boxes[bid] = (x1, y1, x2, y2)
@@ -455,23 +587,24 @@ def group_tokens(ocr, image_shape, gap_px=18, bbox_padding=3):
 # ─────────────────────────────────────────────
-#  DEBUG
+# DEBUG
 # ─────────────────────────────────────────────
 def save_debug_clusters(image_path, ocr, bubble_boxes, bubble_indices, out_path="debug_clusters.png"):
    img = cv2.imread(image_path)
    if img is None:
        return
-    # token quads
+    # OCR token quads
    for bbox, txt, conf in ocr:
        pts = np.array(bbox, dtype=np.int32)
        cv2.polylines(img, [pts], True, (180, 180, 180), 1)
-    # bubble boxes + yellow line boxes
+    # Bubble + line boxes
    for bid, bb in bubble_boxes.items():
        x1, y1, x2, y2 = bb
        cv2.rectangle(img, (x1, y1), (x2, y2), (0, 220, 0), 2)
-        cv2.putText(img, f"BOX#{bid}", (x1 + 2, y1 + 16), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 220, 0), 2)
+        cv2.putText(img, f"BOX#{bid}", (x1 + 2, y1 + 16),
                    cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 220, 0), 2)
        idxs = bubble_indices.get(bid, [])
        line_boxes = build_line_boxes_from_indices(idxs, ocr)
@@ -485,7 +618,7 @@ def save_debug_clusters(image_path, ocr, bubble_boxes, bubble_indices, out_path=
 # ─────────────────────────────────────────────
-#  EXPORT
+# EXPORT
 # ─────────────────────────────────────────────
 def estimate_reading_order(bbox_dict, mode="ltr"):
    items = []
@@ -494,7 +627,7 @@ def estimate_reading_order(bbox_dict, mode="ltr"):
        cy = (y1 + y2) / 2.0
        items.append((bid, cx, cy))
-    items.sort(key=lambda t: t[2])
+    items.sort(key=lambda t: t[2])  # top to bottom
    rows = []
    tol = 90
@@ -517,7 +650,6 @@ def estimate_reading_order(bbox_dict, mode="ltr"):
    return {bid: i + 1 for i, bid in enumerate(order)}
 def export_bubbles(filepath, bbox_dict, quads_dict, indices_dict, ocr, reading_map, image_shape):
    out = {}
@@ -536,11 +668,15 @@ def export_bubbles(filepath, bbox_dict, quads_dict, indices_dict, ocr, reading_m
        out[str(bid)] = {
            "x": int(x1), "y": int(y1), "w": int(x2 - x1), "h": int(y2 - y1),
            "reading_order": int(reading_map.get(bid, bid)),
-            "quad_bboxes": [{"x": int(b[0]), "y": int(b[1]), "w": int(b[2]-b[0]), "h": int(b[3]-b[1])} for b in qboxes],
+            "quad_bboxes": [
-            "quads": [[[int(p[0]), int(p[1])] for p in q] for q in quads],
+                {"x": int(b[0]), "y": int(b[1]), "w": int(b[2] - b[0]), "h": int(b[3] - b[1])}
                for b in qboxes
            ],
            "quads": [
                [[int(p[0]), int(p[1])] for p in q] for q in quads
            ],
            "text_bbox": xyxy_to_xywh(text_union),
            # yellow geometry
            "line_bboxes": [xyxy_to_xywh(lb) for lb in line_boxes_xyxy],
            "line_union_bbox": xyxy_to_xywh(line_union_xyxy) if line_union_xyxy else None,
            "line_union_area": int(line_union_area),
@@ -551,7 +687,7 @@ def export_bubbles(filepath, bbox_dict, quads_dict, indices_dict, ocr, reading_m
 # ─────────────────────────────────────────────
-#  MAIN
+# MAIN
 # ─────────────────────────────────────────────
 def translate_manga_text(
    image_path,
@@ -606,6 +742,7 @@ def translate_manga_text(
            skipped += 1
            continue
        # reduce false positives in very top strip
        if qb[1] < int(ih * TOP_BAND_RATIO):
            if conf < 0.70 and len(t) >= 5:
                skipped += 1
@@ -633,14 +770,28 @@ def translate_manga_text(
    translator = GoogleTranslator(source=source_lang, target=target_lang)
    # robust bubble text cleanup
    clean_lines = {}
    for bid, lines in bubbles.items():
-        txt = normalize_text(" ".join(lines))
+        base_txt = normalize_text(" ".join(lines))
-        q = ocr_quality_score(txt)
+        base_sc = ocr_candidate_score(base_txt)
-        if q < quality_threshold:
+
-            reread = reread_crop(image, bubble_boxes[bid], reader, upscale=2.5, pad=18)
+        # only robust reread on low quality
-            if reread:
+        if base_sc < quality_threshold:
-                txt = normalize_text(reread)
+            rr_txt, rr_sc = reread_crop_robust(
                image,
                bubble_boxes[bid],
                reader,
                upscale=3.0,
                pad=22
            )
            if rr_txt and rr_sc > base_sc + 0.06:
                txt = rr_txt
            else:
                txt = base_txt
        else:
            txt = base_txt
        clean_lines[bid] = apply_glossary(txt)
    reading_map = estimate_reading_order(bubble_boxes, mode=reading_mode)
@@ -657,18 +808,24 @@ def translate_manga_text(
        src = clean_lines[bid].strip()
        if not src:
            continue
        flags = []
        flags = []
        try:
            tgt = translator.translate(src) or ""
        except Exception as e:
            tgt = f"[Translation error: {e}]"
            flags.append("TRANSLATION_ERROR")
        tgt = apply_glossary(postprocess_translation_general(tgt)).upper()
        src_u = src.upper()
-        out_lines.append(f"#{bid}|{reading_map.get(bid,bid)}|{src_u}|{tgt}|{','.join(flags) if flags else '-'}")
+        out_lines.append(
-        print(f"#{bid:<7} {reading_map.get(bid,bid):<6} {src_u[:50]:<50} {tgt[:50]:<50} {','.join(flags) if flags else '-'}")
+            f"#{bid}|{reading_map.get(bid,bid)}|{src_u}|{tgt}|{','.join(flags) if flags else '-'}"
        )
        print(
            f"#{bid:<7} {reading_map.get(bid,bid):<6} "
            f"{src_u[:50]:<50} {tgt[:50]:<50} {','.join(flags) if flags else '-'}"
        )
        translated_count += 1
    out_lines.append(divider)
@@ -691,13 +848,13 @@ def translate_manga_text(
    print(f"Saved: {export_to_file}")
    print(f"Saved: {export_bubbles_to}")
    if debug:
-        print("Saved: debug_clusters.png (special chars included in yellow boxes)")
+        print("Saved: debug_clusters.png")
 if __name__ == "__main__":
    translate_manga_text(
-        image_path="002-page.png",
+        image_path="001-page.png",
-        source_lang="en",
+        source_lang="it",
        target_lang="ca",
        confidence_threshold=0.12,
        min_text_length=1,
--- a/pipeline.py
+++ b/pipeline.py
@@ -2,60 +2,76 @@
 """
 pipeline.py
 ───────────────────────────────────────────────────────────────
-Translation-only pipeline for Dandadan_059_2022_Digital
+Translation + render pipeline
 Flow per page:
-  1. Run translate_manga_text()  → output.txt + bubbles.json
+  1) translate_manga_text() -> output.txt + bubbles.json (+ debug_clusters.png if DEBUG)
-  2. Copy original image to workdir for reference
+  2) render_translations()  -> page_translated.png
  3) Pack CBZ with originals + rendered pages + text outputs
-Folder structure produced:
+Folder structure:
-  Dandadan_059_2022_Digital_1r0n/
+  <CHAPTER_DIR>/
  ├── 000.png
  ├── 001.png
  └── translated/
-      ├── 00/
+      ├── 000/
-      │   ├── output.txt            ← translations to review
+      │   ├── output.txt
-      │   ├── bubbles.json          ← bubble boxes
+      │   ├── bubbles.json
-      │   └── debug_clusters.png    ← cluster debug (if DEBUG=True)
+      │   ├── page_translated.png
-      ├── 01/
+      │   └── debug_clusters.png (optional)
      ├── 001/
      │   └── ...
      └── ...
-  Dandadan_059_translated.cbz       ← original pages + translations
+CBZ:
-                                       zipped for reference
+  - pages/<original pages>
  - rendered/<page_stem>_translated.png
  - translations/<page_stem>_output.txt
 """
 import os
 import sys
 import shutil
 import zipfile
 import importlib.util
 from pathlib import Path
 # ─────────────────────────────────────────────
-#  CONFIG  — edit these as needed
+#  CONFIG
 # ─────────────────────────────────────────────
-CHAPTER_DIR  = "/Users/guillemhernandezsola/Downloads/Spy_x_Family_076_2023_Digital_1r0n"
+CHAPTER_DIR = "/Users/guillemhernandezsola/Downloads/Dandadan_059_2022_Digital_1r0n"
-OUTPUT_CBZ   = "/Users/guillemhernandezsola/Downloads/Spy_x_Family_076_2023_Digital_1r0n_translated.cbz"
+OUTPUT_CBZ  = "/Users/guillemhernandezsola/Downloads/Dandadan_059_2022_Digital_1r0n_translated.cbz"
 SOURCE_LANG  = "en"
 TARGET_LANG  = "ca"
-# manga-translator.py settings
+SOURCE_LANG = "en"
 TARGET_LANG = "ca"
 # translator (NEW signature-compatible)
 CONFIDENCE_THRESHOLD = 0.10
-MIN_TEXT_LENGTH      = 2
+MIN_TEXT_LENGTH      = 1
-CLUSTER_EPS          = "auto"
+GAP_PX               = "auto"      # was cluster/proximity in old version
 PROXIMITY_PX         = 80
 FILTER_SFX           = True
-QUALITY_THRESHOLD    = 0.5
+QUALITY_THRESHOLD    = 0.50
-UPSCALE_FACTOR       = 2.5
+READING_MODE         = "ltr"
 BBOX_PADDING         = 5
 DEBUG                = True
 # renderer
 RENDER_ENABLED       = True
 RENDER_OUTPUT_NAME   = "page_translated.png"
 # optional custom font list for renderer
 FONT_CANDIDATES = [
    "fonts/ComicNeue-Regular.ttf",
    "fonts/ComicRelief-Regular.ttf"
 ]
 # ─────────────────────────────────────────────
 #  DYNAMIC MODULE LOADER
 # ─────────────────────────────────────────────
 def load_module(name, filepath):
-    spec   = importlib.util.spec_from_file_location(name, filepath)
+    spec = importlib.util.spec_from_file_location(name, filepath)
    if spec is None or spec.loader is None:
        raise FileNotFoundError(f"Cannot load spec for {filepath}")
    module = importlib.util.module_from_spec(spec)
    spec.loader.exec_module(module)
    return module
@@ -65,10 +81,10 @@ def load_module(name, filepath):
 #  HELPERS
 # ─────────────────────────────────────────────
 def sorted_pages(chapter_dir):
-    exts  = {".jpg", ".jpeg", ".png", ".webp"}
+    exts = {".jpg", ".jpeg", ".png", ".webp"}
    pages = [
        p for p in Path(chapter_dir).iterdir()
-        if p.suffix.lower() in exts
+        if p.is_file() and p.suffix.lower() in exts
    ]
    return sorted(pages, key=lambda p: p.stem)
@@ -80,82 +96,97 @@ def make_page_workdir(chapter_dir, page_stem):
 def pack_cbz(chapter_dir, translated_dir, output_cbz):
-    """
+    exts = {".jpg", ".jpeg", ".png", ".webp"}
-    Packs into CBZ:
+
-      - All original pages  (from chapter_dir root)
+    pages = sorted(
      - All output.txt      (one per page subfolder)
    Sorted by page stem for correct reading order.
    """
    exts   = {".jpg", ".jpeg", ".png", ".webp"}
    pages  = sorted(
        [p for p in Path(chapter_dir).iterdir()
-         if p.suffix.lower() in exts],
+         if p.is_file() and p.suffix.lower() in exts],
        key=lambda p: p.stem
    )
-    txts   = sorted(
+
    txts = sorted(
        translated_dir.rglob("output.txt"),
        key=lambda p: p.parent.name
    )
    rendered = sorted(
        translated_dir.rglob(RENDER_OUTPUT_NAME),
        key=lambda p: p.parent.name
    )
    if not pages:
        print("⚠️  No original pages found — CBZ not created.")
        return
-    with zipfile.ZipFile(output_cbz, "w",
+    with zipfile.ZipFile(output_cbz, "w", compression=zipfile.ZIP_STORED) as zf:
-                         compression=zipfile.ZIP_STORED) as zf:
+        # original pages
        # Original pages
        for img in pages:
            arcname = f"pages/{img.name}"
            zf.write(img, arcname)
            print(f"  🖼  {arcname}")
-        # Translation text files
+        # rendered pages
        for rp in rendered:
            arcname = f"rendered/{rp.parent.name}_translated.png"
            zf.write(rp, arcname)
            print(f"  🎨 {arcname}")
        # text outputs
        for txt in txts:
            arcname = f"translations/{txt.parent.name}_output.txt"
            zf.write(txt, arcname)
            print(f"  📄 {arcname}")
-    print(f"\n✅ CBZ saved → {output_cbz}  "
+    print(
-          f"({len(pages)} page(s), {len(txts)} translation(s))")
+        f"\n✅ CBZ saved → {output_cbz} "
        f"({len(pages)} original, {len(rendered)} rendered, {len(txts)} text)"
    )
 # ─────────────────────────────────────────────
 #  PER-PAGE PIPELINE
 # ─────────────────────────────────────────────
-def process_page(page_path, workdir, translator_module):
+def process_page(page_path, workdir, translator_module, renderer_module):
    """
-    Runs translator for a single page.
+    Runs translator + renderer for one page.
-    All output files land in workdir.
+    All generated files are written inside workdir.
    Returns True on success, False on failure.
    """
-    print(f"\n{'─'*60}")
+    print(f"\n{'─' * 70}")
-    print(f"  PAGE: {page_path.name}")
+    print(f"PAGE: {page_path.name}")
-    print(f"{'─'*60}")
+    print(f"{'─' * 70}")
    orig_dir = os.getcwd()
    try:
        # chdir into workdir so debug_clusters.png,
        # temp files etc. all land there
        os.chdir(workdir)
        # 1) translate
        translator_module.translate_manga_text(
-            image_path           = str(page_path.resolve()),
+            image_path= str(page_path.resolve()),
-            source_lang          = SOURCE_LANG,
+            source_lang=SOURCE_LANG,
-            target_lang          = TARGET_LANG,
+            target_lang=TARGET_LANG,
-            confidence_threshold = CONFIDENCE_THRESHOLD,
+            confidence_threshold=CONFIDENCE_THRESHOLD,
-            export_to_file       = "output.txt",
+            min_text_length=MIN_TEXT_LENGTH,
-            export_bubbles_to    = "bubbles.json",
+            gap_px=GAP_PX,
-            min_text_length      = MIN_TEXT_LENGTH,
+            filter_sound_effects=FILTER_SFX,
-            cluster_eps          = CLUSTER_EPS,
+            quality_threshold=QUALITY_THRESHOLD,
-            proximity_px         = PROXIMITY_PX,
+            export_to_file="output.txt",
-            filter_sound_effects = FILTER_SFX,
+            export_bubbles_to="bubbles.json",
-            quality_threshold    = QUALITY_THRESHOLD,
+            reading_mode=READING_MODE,
-            upscale_factor       = UPSCALE_FACTOR,
+            debug=DEBUG
            bbox_padding         = BBOX_PADDING,
            debug                = DEBUG,
        )
        print("  ✅ translator done")
        # 2) render
        if RENDER_ENABLED:
            renderer_module.render_translations(
                input_image=str(page_path.resolve()),
                output_image=RENDER_OUTPUT_NAME,
                translations_file="output.txt",
                bubbles_file="bubbles.json",
                font_candidates=FONT_CANDIDATES
            )
            print("  ✅ renderer done")
        print(f"  ✅ Translated → {workdir}")
        return True
    except Exception as e:
@@ -170,16 +201,20 @@ def process_page(page_path, workdir, translator_module):
 #  MAIN
 # ─────────────────────────────────────────────
 def main():
-    # ── Load translator module ────────────────────────────────────
+    print("Loading modules...")
-    print("Loading manga-translator.py...")
+
    try:
-        translator = load_module(
+        translator = load_module("manga_translator", "manga-translator.py")
-            "manga_translator", "manga-translator.py")
+    except Exception as e:
-    except FileNotFoundError as e:
+        print(f"❌ Could not load manga-translator.py: {e}")
-        print(f"❌ Could not load module: {e}")
+        sys.exit(1)
    try:
        renderer = load_module("manga_renderer", "manga-renderer.py")
    except Exception as e:
        print(f"❌ Could not load manga-renderer.py: {e}")
        sys.exit(1)
    # ── Discover pages ────────────────────────────────────────────
    pages = sorted_pages(CHAPTER_DIR)
    if not pages:
        print(f"❌ No images found in: {CHAPTER_DIR}")
@@ -187,33 +222,31 @@ def main():
    print(f"\n📖 Chapter : {CHAPTER_DIR}")
    print(f"   Pages   : {len(pages)}")
-    print(f"   Source  : {SOURCE_LANG}  →  Target: {TARGET_LANG}\n")
+    print(f"   Source  : {SOURCE_LANG} → Target: {TARGET_LANG}")
    print(f"   Render  : {'ON' if RENDER_ENABLED else 'OFF'}\n")
    # ── Process each page ─────────────────────────────────────────
    translated_dir = Path(CHAPTER_DIR) / "translated"
    succeeded = []
-    failed    = []
+    failed = []
    for i, page_path in enumerate(pages, start=1):
-        print(f"\n[{i}/{len(pages)}] {page_path.name}")
+        print(f"[{i}/{len(pages)}] {page_path.name}")
        workdir = make_page_workdir(CHAPTER_DIR, page_path.stem)
-        ok      = process_page(page_path, workdir, translator)
+        ok = process_page(page_path, workdir, translator, renderer)
        if ok:
            succeeded.append(page_path.name)
        else:
            failed.append(page_path.name)
-    # ── Summary ───────────────────────────────────────────────────
+    print(f"\n{'═' * 70}")
-    print(f"\n{'═'*60}")
+    print("PIPELINE COMPLETE")
-    print(f"  PIPELINE COMPLETE")
+    print(f"✅ {len(succeeded)} page(s) succeeded")
    print(f"  ✅ {len(succeeded)} page(s) succeeded")
    if failed:
-        print(f"  ❌ {len(failed)} page(s) failed:")
+        print(f"❌ {len(failed)} page(s) failed:")
        for f in failed:
-            print(f"     • {f}")
+            print(f"   • {f}")
-    print(f"{'═'*60}\n")
+    print(f"{'═' * 70}\n")
    # ── Pack CBZ ──────────────────────────────────────────────────
    print("Packing CBZ...")
    pack_cbz(CHAPTER_DIR, translated_dir, OUTPUT_CBZ)