From b730037a06851750745ae946b509f4dd5a4d0740 Mon Sep 17 00:00:00 2001
From: Guillem Hernandez Sola <guillem.hernandez.sola@gmail.com>
Date: Wed, 22 Apr 2026 16:18:59 +0200
Subject: [PATCH] Added big stuff

---
 manga-translator.py | 1714 +++++++++++++++++++++++++++++++++++++++++--
 1 file changed, 1638 insertions(+), 76 deletions(-)

diff --git a/manga-translator.py b/manga-translator.py
index 56adde1..6c1cd6a 100644
--- a/manga-translator.py
+++ b/manga-translator.py
@@ -23,6 +23,323 @@ warnings.filterwarnings("ignore", category=UserWarning)
 # ============================================================
 TOP_BAND_RATIO = 0.08
 
+# ============================================================
+# REGION-FIRST LAYOUT HELPERS
+# ============================================================
+import math
+from difflib import SequenceMatcher
+
+DIALOGUE_STOPWORDS = {
+    "I", "YOU", "HE", "SHE", "WE", "THEY", "IT", "ME", "MY", "YOUR", "OUR",
+    "IS", "ARE", "WAS", "WERE", "AM", "DO", "DID", "DON'T", "DIDN'T", "NOT",
+    "WHAT", "WHY", "HOW", "WHO", "IN", "ON", "AT", "TO", "OF", "FOR", "WITH",
+    "AND", "BUT", "SO", "THAT", "THIS", "THERE", "HERE", "THAN", "ALL", "RIGHT"
+}
+
+SFX_HINTS = {
+    "RRRING", "RING", "RINGG", "BAM", "BOOM", "FWUP", "FWOOP", "FSHOO",
+    "GRRP", "GASP", "THUD", "SMACK", "WHAM", "SLAM", "SNIF", "SNIFF"
+}
+
+REACTION_HINTS = {
+    "HUH", "HUH?!", "HUH?", "OH", "AH", "EH", "TCH", "HEY", "WHAT?!", "NO!", "YES!"
+}
+
+NARRATION_HINTS = {
+    "AND SO", "MEANWHILE", "LATER", "THEN", "TO BE CONTINUED"
+}
+
+def xyxy_width(b):
+    return max(1, b[2] - b[0])
+
+def xyxy_height(b):
+    return max(1, b[3] - b[1])
+
+def xyxy_center(b):
+    return ((b[0] + b[2]) / 2.0, (b[1] + b[3]) / 2.0)
+
+def box_distance(a, b):
+    ax, ay = xyxy_center(a)
+    bx, by = xyxy_center(b)
+    return math.hypot(ax - bx, ay - by)
+
+def horizontal_overlap_ratio(a, b):
+    ix1, ix2 = max(a[0], b[0]), min(a[2], b[2])
+    ov = max(0, ix2 - ix1)
+    return ov / max(1, min(xyxy_width(a), xyxy_width(b)))
+
+def vertical_overlap_ratio(a, b):
+    iy1, iy2 = max(a[1], b[1]), min(a[3], b[3])
+    ov = max(0, iy2 - iy1)
+    return ov / max(1, min(xyxy_height(a), xyxy_height(b)))
+
+def box_expand(b, pad, iw, ih):
+    return (
+        max(0, int(b[0] - pad)),
+        max(0, int(b[1] - pad)),
+        min(iw - 1, int(b[2] + pad)),
+        min(ih - 1, int(b[3] + pad)),
+    )
+
+def count_alpha(text):
+    return len(re.findall(r"[A-ZÀ-Ýa-zà-ÿ]", text or ""))
+
+def uppercase_ratio(text):
+    alpha = re.findall(r"[A-Za-zÀ-ÿ]", text or "")
+    if not alpha:
+        return 0.0
+    ups = sum(1 for c in alpha if c.isupper())
+    return ups / len(alpha)
+
+def punctuation_ratio(text):
+    if not text:
+        return 0.0
+    return len(re.findall(r"[!?.,'\"-]", text)) / max(1, len(text))
+
+def stopword_ratio(text):
+    toks = re.findall(r"[A-Z']+", normalize_text(text or ""))
+    if not toks:
+        return 0.0
+    hits = sum(1 for t in toks if t in DIALOGUE_STOPWORDS)
+    return hits / len(toks)
+
+def looks_like_sfx_text(text):
+    t = normalize_text(text or "")
+    if not t:
+        return False
+    alpha = re.sub(r"[^A-Z]", "", t)
+    if t in SFX_HINTS or alpha in SFX_HINTS:
+        return True
+    if len(alpha) >= 3 and uppercase_ratio(t) > 0.90 and stopword_ratio(t) < 0.15:
+        if alpha not in DIALOGUE_STOPWORDS:
+            return True
+    return False
+
+def looks_like_reaction_text(text):
+    t = normalize_text(text or "")
+    alpha = re.sub(r"[^A-Z?!]", "", t)
+    if t in REACTION_HINTS or alpha in REACTION_HINTS:
+        return True
+    if len(re.sub(r"[^A-Z]", "", t)) <= 5 and punctuation_ratio(t) > 0.10:
+        return True
+    return False
+
+def looks_like_narration_text(text):
+    t = normalize_text(text or "")
+    if any(t.startswith(h) for h in NARRATION_HINTS):
+        return True
+    if len(t.split()) >= 5 and t.endswith(".") and uppercase_ratio(t) > 0.75:
+        return True
+    return False
+
+def contour_features_for_box(image_bgr, box_xyxy):
+    x1, y1, x2, y2 = box_xyxy
+    crop = image_bgr[y1:y2, x1:x2]
+    if crop.size == 0:
+        return {
+            "mean_brightness": 0.0,
+            "edge_density": 1.0,
+            "whiteness_ratio": 0.0,
+        }
+
+    gray = cv2.cvtColor(crop, cv2.COLOR_BGR2GRAY)
+    mean_brightness = float(np.mean(gray)) / 255.0
+
+    edges = cv2.Canny(gray, 50, 150)
+    edge_density = float(np.mean(edges > 0))
+
+    whiteness_ratio = float(np.mean(gray > 220))
+    return {
+        "mean_brightness": mean_brightness,
+        "edge_density": edge_density,
+        "whiteness_ratio": whiteness_ratio,
+    }
+
+def classify_region_type(image_bgr, box_xyxy, lines):
+    text = normalize_text(" ".join(lines))
+    feats = contour_features_for_box(image_bgr, box_xyxy)
+    w, h = xyxy_width(box_xyxy), xyxy_height(box_xyxy)
+    ar = w / max(1, h)
+
+    if looks_like_sfx_text(text):
+        return "sfx"
+
+    if looks_like_reaction_text(text):
+        if len(text.split()) <= 3:
+            return "reaction"
+
+    if looks_like_narration_text(text):
+        return "narration"
+
+    # balloon/dialogue heuristic:
+    # bright interior + low-ish edge density + moderate width
+    if feats["whiteness_ratio"] > 0.45 and feats["edge_density"] < 0.18:
+        return "dialogue"
+
+    # narrow tall shout / reaction / sfx ambiguity
+    if ar < 0.9 and uppercase_ratio(text) > 0.85 and stopword_ratio(text) < 0.20:
+        return "sfx"
+
+    if stopword_ratio(text) >= 0.20:
+        return "dialogue"
+
+    return "unknown"
+
+def text_similarity(a, b):
+    return SequenceMatcher(None, normalize_text(a or ""), normalize_text(b or "")).ratio()
+
+def dedupe_repeated_phrase(text):
+    t = normalize_text(text or "")
+    words = t.split()
+    if len(words) < 4:
+        return t
+
+    # remove immediate duplicated halves: "CRY! CRY!" / "I DIDN'T I DIDN'T"
+    half = len(words) // 2
+    if len(words) % 2 == 0 and words[:half] == words[half:]:
+        return " ".join(words[:half])
+
+    # collapse trailing duplicate tokens
+    cleaned = []
+    for w in words:
+        if cleaned and cleaned[-1] == w and len(w) > 2:
+            continue
+        cleaned.append(w)
+    return " ".join(cleaned)
+
+def dehyphenate_linebreak_artifacts(text):
+    t = normalize_text(text or "")
+    t = re.sub(r"\b([A-Z]+)- ([A-Z]+)\b", r"\1\2", t)
+    return t
+
+def fix_common_dialogue_ocr(text):
+    """
+    Conservative OCR cleanup for dialogue-like text.
+
+    Goals:
+      - fix common OCR punctuation/spacing/apostrophe errors
+      - preserve meaning and tone
+      - avoid semantic reconstruction guesses
+    """
+    t = normalize_text(text or "")
+    if not t:
+        return t
+
+    replacements = {
+        "1'M": "I'M",
+        "1 DIDN'T": "I DIDN'T",
+        "1 HATE": "I HATE",
+        "1 WAS": "I WAS",
+        "1'M ": "I'M ",
+        "YO U": "YOU",
+        "YOU RE": "YOU'RE",
+        "YOURE": "YOU'RE",
+        "I LL": "I'LL",
+        "ILL ": "I'LL ",
+        "DONT": "DON'T",
+        "DIDNT": "DIDN'T",
+        "CANT": "CAN'T",
+        "WONT": "WON'T",
+        "THATS": "THAT'S",
+        "MOMS": "MOM'S",
+        "DADS": "DAD'S",
+        "LEARN- ING": "LEARNING",
+        "COV- ERED": "COVERED",
+        "SY ON": "SY-ON",
+        "P PROPERLY": "P-PROPERLY",
+        "SH SHUT": "SH- SHUT",
+    }
+
+    for a, b in replacements.items():
+        t = t.replace(a, b)
+
+    # Fix split contractions / apostrophe omissions
+    t = re.sub(r"\b([A-Z]+) NT\b", r"\1N'T", t)
+    t = re.sub(r"\b([A-Z]+) RE\b", r"\1'RE", t)
+    t = re.sub(r"\b([A-Z]+) VE\b", r"\1'VE", t)
+    t = re.sub(r"\b([A-Z]+) LL\b", r"\1'LL", t)
+    t = re.sub(r"\b([A-Z]+) S\b", r"\1'S", t)
+
+    # Remove accidental duplicated punctuation spacing
+    t = re.sub(r"\s+([,.;:!?])", r"\1", t)
+
+    # Dehyphenate OCR line-wrap artifacts
+    t = dehyphenate_linebreak_artifacts(t)
+
+    # Collapse repeated full phrases/tokens caused by OCR duplication
+    t = dedupe_repeated_phrase(t)
+
+    # Remove duplicated adjacent words like "CRY CRY" if clearly accidental
+    words = t.split()
+    cleaned = []
+    for w in words:
+        if cleaned and cleaned[-1] == w and len(re.sub(r"[^A-Z]", "", w)) > 2:
+            continue
+        cleaned.append(w)
+    t = " ".join(cleaned)
+
+    # Normalize spaces
+    t = re.sub(r"\s{2,}", " ", t).strip()
+
+    return t
+
+def region_text_role_hint(text):
+    if looks_like_sfx_text(text):
+        return "sfx"
+    if looks_like_reaction_text(text):
+        return "reaction"
+    if looks_like_narration_text(text):
+        return "narration"
+    return "dialogue"
+
+
+def correct_region_text(text, region_type="dialogue"):
+    t = normalize_text(text or "")
+    if not t:
+        return t, 0.0
+
+    original = t
+
+    if region_type in {"dialogue", "reaction", "narration"}:
+        t = fix_common_dialogue_ocr(t)
+    elif region_type == "sfx":
+        t = dedupe_repeated_phrase(t)
+
+    score_before = ocr_candidate_score(original)
+    score_after = ocr_candidate_score(t)
+
+    correction_gain = max(0.0, score_after - score_before)
+    return t, correction_gain
+
+def compute_region_confidence(raw_text, corrected_text, box_xyxy, region_type, image_bgr):
+    feats = contour_features_for_box(image_bgr, box_xyxy)
+    text_score = ocr_candidate_score(corrected_text)
+    gain = max(0.0, text_score - ocr_candidate_score(raw_text))
+    role_bonus = 0.08 if region_type in {"dialogue", "reaction", "narration", "sfx"} else 0.0
+
+    score = (
+        0.55 * text_score +
+        0.15 * feats["whiteness_ratio"] +
+        0.10 * (1.0 - min(1.0, feats["edge_density"] * 2.0)) +
+        0.10 * gain +
+        role_bonus
+    )
+    return max(0.0, min(1.0, score))
+
+def build_region_flags(raw_text, corrected_text, region_type, conf):
+    flags = []
+    if region_type == "unknown":
+        flags.append("REGION_UNKNOWN")
+    if region_type == "sfx":
+        flags.append("SFX")
+    if conf < 0.45:
+        flags.append("LOW_CONF")
+    if text_similarity(raw_text, corrected_text) < 0.75:
+        flags.append("HEAVY_CORRECTION")
+    if len(corrected_text.split()) > 22:
+        flags.append("LONG_TEXT")
+    return flags
+
 # ============================================================
 # HELPERS
 # ============================================================
@@ -125,6 +442,392 @@ _MANGA_INTERJECTIONS = {
     'MORNING', 'MORNING.',
 }
 
+def group_indices_into_vertical_columns(indices, ocr,
+                                        x_tolerance_factor=1.4,
+                                        min_vertical_span_factor=1.8):
+    """
+    Group OCR indices into vertical columns inside a box.
+
+    A column is defined as:
+      - similar x centers
+      - meaningful vertical spread
+      - internally ordered top-to-bottom
+    """
+    if not indices:
+        return []
+
+    items = []
+    for i in indices:
+        b = quad_bbox(ocr[i][0])
+        cx = (b[0] + b[2]) / 2.0
+        cy = (b[1] + b[3]) / 2.0
+        w = max(1, b[2] - b[0])
+        h = max(1, b[3] - b[1])
+        items.append((i, b, cx, cy, w, h))
+
+    med_w = float(np.median([it[4] for it in items])) if items else 12.0
+    med_h = float(np.median([it[5] for it in items])) if items else 12.0
+    x_tol = max(10.0, med_w * x_tolerance_factor)
+
+    # cluster by x-center
+    items_sorted = sorted(items, key=lambda x: x[2])
+    columns = []
+
+    for it in items_sorted:
+        placed = False
+        for col in columns:
+            if abs(it[2] - col["xc"]) <= x_tol:
+                col["members"].append(it)
+                col["xc"] = float(np.mean([m[2] for m in col["members"]]))
+                placed = True
+                break
+        if not placed:
+            columns.append({"xc": it[2], "members": [it]})
+
+    # sort each column top -> bottom
+    clean_columns = []
+    for col in columns:
+        members = sorted(col["members"], key=lambda x: x[3])
+        ys = [m[3] for m in members]
+        vertical_span = max(ys) - min(ys) if len(ys) > 1 else 0.0
+
+        # keep meaningful columns OR single strong items
+        if len(members) >= 2 or vertical_span >= med_h * min_vertical_span_factor:
+            clean_columns.append([m[0] for m in members])
+        else:
+            clean_columns.append([m[0] for m in members])
+
+    # sort columns left -> right
+    clean_columns.sort(key=lambda grp: np.mean([(quad_bbox(ocr[i][0])[0] + quad_bbox(ocr[i][0])[2]) / 2.0 for i in grp]))
+    return clean_columns
+
+def group_indices_into_horizontal_rows(indices, ocr, row_tol_factor=0.75):
+    """
+    Group OCR indices into horizontal rows inside a box.
+    """
+    if not indices:
+        return []
+
+    items = []
+    for i in indices:
+        b = quad_bbox(ocr[i][0])
+        cx = (b[0] + b[2]) / 2.0
+        cy = (b[1] + b[3]) / 2.0
+        h = max(1, b[3] - b[1])
+        items.append((i, b, cx, cy, h))
+
+    med_h = float(np.median([it[4] for it in items])) if items else 10.0
+    row_tol = max(6.0, med_h * row_tol_factor)
+
+    items.sort(key=lambda x: x[3])
+    rows = []
+
+    for it in items:
+        placed = False
+        for row in rows:
+            if abs(it[3] - row["yc"]) <= row_tol:
+                row["members"].append(it)
+                row["yc"] = float(np.mean([m[3] for m in row["members"]]))
+                placed = True
+                break
+        if not placed:
+            rows.append({"yc": it[3], "members": [it]})
+
+    groups = []
+    for row in rows:
+        members = sorted(row["members"], key=lambda x: x[2])
+        groups.append([m[0] for m in members])
+
+    return groups
+
+def score_text_groups(groups, ocr):
+    """
+    Score grouping quality based on:
+      - average group size
+      - text plausibility
+      - reduced fragmentation
+    """
+    if not groups:
+        return 0.0
+
+    texts = []
+    lengths = []
+
+    for grp in groups:
+        parts = []
+        for i in grp:
+            t = normalize_text(ocr[i][1])
+            if t:
+                parts.append(t)
+        txt = normalize_text(" ".join(parts))
+        if txt:
+            texts.append(txt)
+            lengths.append(len(txt.split()))
+
+    if not texts:
+        return 0.0
+
+    text_scores = [ocr_candidate_score(t) for t in texts]
+    avg_text_score = float(np.mean(text_scores)) if text_scores else 0.0
+    avg_len = float(np.mean(lengths)) if lengths else 0.0
+    fragmentation_penalty = max(0.0, len(groups) - 4) * 0.08
+
+    return avg_text_score + min(0.5, avg_len * 0.05) - fragmentation_penalty
+
+def detect_internal_text_layout(indices, ocr, reading_mode="ltr"):
+    """
+    Detect internal structure of text inside one final box.
+
+    Step 1: split into vertical macro blocks
+    Step 2: for each block, compare horizontal vs vertical grouping
+    """
+    if not indices:
+        return {"mode": "horizontal", "blocks": []}
+
+    blocks = split_indices_into_vertical_blocks(indices, ocr)
+
+    resolved_blocks = []
+
+    for block in blocks:
+        horizontal_groups = group_indices_into_horizontal_rows(block, ocr)
+        vertical_groups = group_indices_into_vertical_columns(block, ocr)
+
+        h_score = score_text_groups(horizontal_groups, ocr)
+        v_score = score_text_groups(vertical_groups, ocr)
+
+        if len(vertical_groups) >= 2 and v_score >= h_score - 0.03:
+            resolved_blocks.append({
+                "mode": "vertical",
+                "groups": vertical_groups
+            })
+        else:
+            resolved_blocks.append({
+                "mode": "horizontal",
+                "groups": horizontal_groups
+            })
+
+    return {"mode": "block-mixed", "blocks": resolved_blocks}
+
+
+def build_text_from_layout(indices, ocr, reading_mode="ltr"):
+    layout = detect_internal_text_layout(indices, ocr, reading_mode=reading_mode)
+    output_lines = []
+
+    for block in layout["blocks"]:
+        groups = block["groups"]
+        mode = block["mode"]
+
+        if mode == "horizontal":
+            for grp in groups:
+                line = normalize_text(" ".join(
+                    ocr[i][1] for i in grp if normalize_text(ocr[i][1])
+                ))
+                if line:
+                    output_lines.append(line)
+
+        elif mode == "vertical":
+            if reading_mode == "rtl":
+                groups = sorted(
+                    groups,
+                    key=lambda grp: np.mean([(quad_bbox(ocr[i][0])[0] + quad_bbox(ocr[i][0])[2]) / 2.0 for i in grp]),
+                    reverse=True
+                )
+            else:
+                groups = sorted(
+                    groups,
+                    key=lambda grp: np.mean([(quad_bbox(ocr[i][0])[0] + quad_bbox(ocr[i][0])[2]) / 2.0 for i in grp])
+                )
+
+            for grp in groups:
+                grp_sorted = sorted(grp, key=lambda i: (quad_bbox(ocr[i][0])[1] + quad_bbox(ocr[i][0])[3]) / 2.0)
+                line = normalize_text(" ".join(
+                    ocr[i][1] for i in grp_sorted if normalize_text(ocr[i][1])
+                ))
+                if line:
+                    output_lines.append(line)
+
+    return output_lines
+
+# ============================================================
+# REGION PROPOSAL FROM OCR GEOMETRY
+# ============================================================
+def propose_text_regions_from_ocr(ocr, image_shape):
+    """
+    Build larger text containers from OCR boxes before final classification.
+    This is intentionally conservative: it clusters nearby OCR groups that
+    likely belong to one dialogue/narration region.
+    """
+    ih, iw = image_shape[:2]
+    if not ocr:
+        return {}, {}, {}, {}
+
+    boxes = [quad_bbox(x[0]) for x in ocr]
+    hs = [max(1, b[3] - b[1]) for b in boxes]
+    med_h = float(np.median(hs)) if hs else 14.0
+
+    parent = list(range(len(ocr)))
+
+    def find(x):
+        while parent[x] != x:
+            parent[x] = parent[parent[x]]
+            x = parent[x]
+        return x
+
+    def union(a, b):
+        ra, rb = find(a), find(b)
+        if ra != rb:
+            parent[rb] = ra
+
+    for i in range(len(ocr)):
+        bi = boxes[i]
+        for j in range(i + 1, len(ocr)):
+            bj = boxes[j]
+
+            dx = abs(xyxy_center(bi)[0] - xyxy_center(bj)[0])
+            dy = abs(xyxy_center(bi)[1] - xyxy_center(bj)[1])
+
+            hov = horizontal_overlap_ratio(bi, bj)
+            vov = vertical_overlap_ratio(bi, bj)
+            dist = box_distance(bi, bj)
+
+            same_band = dy <= med_h * 2.2
+            stacked = hov >= 0.35 and dy <= med_h * 3.2
+            same_line = vov >= 0.45 and dx <= med_h * 5.0
+            near = dist <= med_h * 4.5
+
+            if same_line or stacked or (near and (same_band or hov > 0.25)):
+                if orientation_compatible(i, j, ocr):
+                    union(i, j)
+
+    groups = {}
+    for i in range(len(ocr)):
+        groups.setdefault(find(i), []).append(i)
+
+    region_lines = {}
+    region_boxes = {}
+    region_quads = {}
+    region_indices = {}
+    next_id = 1
+
+    for _, idxs in sorted(groups.items(), key=lambda kv: min(boxes[i][1] for i in kv[1])):
+        idxs = sorted(idxs, key=lambda i: (boxes[i][1], boxes[i][0]))
+        ub = boxes_union_xyxy([boxes[i] for i in idxs])
+        if ub is None:
+            continue
+        region_lines[next_id] = build_lines_from_indices(idxs, ocr)
+        region_boxes[next_id] = box_expand(ub, pad=max(2, int(med_h * 0.25)), iw=iw, ih=ih)
+        region_quads[next_id] = [ocr[i][0] for i in idxs]
+        region_indices[next_id] = idxs
+        next_id += 1
+
+    return region_lines, region_boxes, region_quads, region_indices
+
+# ============================================================
+# RECONCILE REGION-FIRST AND BUBBLE-FIRST GROUPS
+# ============================================================
+def reconcile_region_and_bubble_groups(region_lines, region_boxes, region_quads, region_indices,
+                                       bubbles, bubble_boxes, bubble_quads, bubble_indices,
+                                       ocr):
+    """
+    Reconcile region-first and bubble-first groupings.
+
+    Strategy:
+      - Build one combined candidate list from both grouping methods.
+      - Cluster candidates that heavily overlap or share OCR indices.
+      - Keep only the best-scoring candidate from each cluster.
+      - Rebuild stable output dictionaries.
+
+    This avoids duplicate retention and inconsistent greedy selection.
+    """
+    combined = []
+
+    for rid in region_boxes:
+        combined.append(("region", rid, region_boxes[rid], region_indices[rid]))
+
+    for bid in bubble_boxes:
+        combined.append(("bubble", bid, bubble_boxes[bid], bubble_indices[bid]))
+
+    if not combined:
+        return {}, {}, {}, {}
+
+    visited = set()
+    kept = []
+
+    def group_score(box, idxs):
+        text = normalize_text(" ".join(build_lines_from_indices(idxs, ocr)))
+        role = region_text_role_hint(text)
+
+        role_bonus = {
+            "dialogue": 0.8,
+            "narration": 0.75,
+            "reaction": 0.7,
+            "sfx": 0.2,
+            "unknown": 0.1
+        }.get(role, 0.1)
+
+        box_area = bbox_area_xyxy(box)
+        area_bonus = min(1.0, box_area / 50000.0)
+
+        return (
+            len(idxs) * 2.0 +
+            min(20, len(text.split())) * 0.5 +
+            min(1.0, ocr_candidate_score(text)) +
+            role_bonus +
+            area_bonus * 0.25
+        )
+
+    for i in range(len(combined)):
+        if i in visited:
+            continue
+
+        cluster = [i]
+        visited.add(i)
+
+        _, _, box_i, idx_i = combined[i]
+
+        for j in range(i + 1, len(combined)):
+            if j in visited:
+                continue
+
+            _, _, box_j, idx_j = combined[j]
+
+            ovs = boxes_overlap_ratio(box_i, box_j)
+            iou = boxes_iou(box_i, box_j)
+            shared = len(set(idx_i).intersection(idx_j))
+
+            if ovs >= 0.55 or iou >= 0.35 or shared > 0:
+                cluster.append(j)
+                visited.add(j)
+
+        best_idx = max(
+            cluster,
+            key=lambda k: group_score(combined[k][2], combined[k][3])
+        )
+        kept.append(combined[best_idx])
+
+    # Stable order: top-to-bottom, then left-to-right
+    kept.sort(key=lambda item: (
+        (item[2][1] + item[2][3]) / 2.0,
+        (item[2][0] + item[2][2]) / 2.0
+    ))
+
+    out_lines, out_boxes, out_quads, out_indices = {}, {}, {}, {}
+    next_id = 1
+
+    for typ, oid, box, idxs in kept:
+        idxs = sorted(
+            set(idxs),
+            key=lambda k: (quad_bbox(ocr[k][0])[1], quad_bbox(ocr[k][0])[0])
+        )
+
+        out_lines[next_id] = build_lines_from_indices(idxs, ocr)
+        out_boxes[next_id] = box
+        out_quads[next_id] = [ocr[k][0] for k in idxs]
+        out_indices[next_id] = idxs
+        next_id += 1
+
+    return out_lines, out_boxes, out_quads, out_indices
+
 # ============================================================
 # PROTECTED TOKENS / SHORT DIALOGUE SAFETY NET
 # ============================================================
@@ -1523,6 +2226,52 @@ def build_lines_from_indices(indices, ocr):
                          for i, _, _, _, _ in sorted(r["m"], key=lambda z: z[2])))
             for r in rows if r["m"]]
 
+def split_indices_into_vertical_blocks(indices, ocr, gap_factor=1.6, min_gap=18):
+    """
+    Split a box into top-to-bottom macro blocks using strong vertical gaps.
+    """
+    if len(indices) < 2:
+        return [indices]
+
+    items = []
+    for i in indices:
+        b = quad_bbox(ocr[i][0])
+        cy = (b[1] + b[3]) / 2.0
+        h = max(1, b[3] - b[1])
+        items.append((i, b, cy, h))
+
+    items.sort(key=lambda x: x[2])
+    med_h = float(np.median([it[3] for it in items])) if items else 12.0
+    threshold = max(min_gap, med_h * gap_factor)
+
+    blocks = []
+    current = [items[0][0]]
+    prev_b = items[0][1]
+
+    for k in range(1, len(items)):
+        cur_i, cur_b, _, _ = items[k]
+        gap = cur_b[1] - prev_b[3]
+
+        if gap > threshold:
+            blocks.append(current)
+            current = [cur_i]
+        else:
+            current.append(cur_i)
+
+        prev_b = cur_b
+
+    if current:
+        blocks.append(current)
+
+    return blocks
+
+def build_final_box_text(indices, ocr, reading_mode="ltr"):
+    """
+    Final text reconstruction used for OCR/translation export.
+    This uses internal layout detection, unlike generic grouping helpers.
+    """
+    return build_text_from_layout(indices, ocr, reading_mode=reading_mode)
+
 
 def auto_gap(image_path, base=18, ref_w=750):
     img = cv2.imread(image_path)
@@ -1685,48 +2434,87 @@ def _split_bubble_if_needed(bid, bubble_indices, bubble_quads, bubble_boxes,
 # DEBUG / EXPORT
 # ============================================================
 def save_debug_clusters(image_path, ocr, bubble_boxes, bubble_indices,
-                        clean_lines=None, out_path="debug_clusters.png"):
+                        clean_lines=None, out_path="debug_clusters.png",
+                        region_types=None):
     """
-    Draws all detected boxes.
-    Single-quad boxes are drawn in orange for visibility but are NOT
-    labelled as (ISOLATED) — they participate fully in merge passes.
+    Draw debug overlays for final grouped boxes.
+
+    Color scheme by region type:
+      - dialogue  : green
+      - narration : orange
+      - sfx       : magenta
+      - reaction  : cyan
+      - unknown   : yellow-ish
+
+    OCR quads are outlined lightly in gray for context.
     """
     img = cv2.imread(image_path)
-    if img is None: return
+    if img is None:
+        return
 
+    # Draw OCR quads lightly without filling the page white
     for bbox, txt, conf in ocr:
         pts = np.array(bbox, dtype=np.int32)
-        cv2.fillPoly(img, [pts], (255, 255, 255))
         cv2.polylines(img, [pts], True, (180, 180, 180), 1)
 
     for bid, bb in bubble_boxes.items():
         x1, y1, x2, y2 = bb
-        n_quads   = len(bubble_indices.get(bid, []))
-        color     = (255, 165, 0) if n_quads == 1 else (0, 220, 0)
-        thickness = 3             if n_quads == 1 else 2
+        rtype = region_types.get(bid, "unknown") if region_types else "unknown"
+
+        if rtype == "dialogue":
+            color = (0, 220, 0)
+        elif rtype == "narration":
+            color = (0, 180, 255)
+        elif rtype == "sfx":
+            color = (255, 0, 255)
+        elif rtype == "reaction":
+            color = (0, 200, 255)
+        else:
+            color = (0, 220, 220)
+
+        thickness = 2
         cv2.rectangle(img, (x1, y1), (x2, y2), color, thickness)
-        cv2.putText(img, f"BOX#{bid}", (x1+2, max(15, y1+16)),
-                    cv2.FONT_HERSHEY_SIMPLEX, 0.5, color, 2)
+        cv2.putText(
+            img,
+            f"BOX#{bid} [{rtype}]",
+            (x1 + 2, max(15, y1 + 16)),
+            cv2.FONT_HERSHEY_SIMPLEX,
+            0.45,
+            color,
+            2
+        )
 
         if clean_lines and bid in clean_lines:
-            text  = clean_lines[bid]
+            text = clean_lines[bid]
             words = text.split()
-            lines, cur = [], ""
+
+            wrapped_lines = []
+            cur = ""
             for w in words:
-                if len(cur) + len(w) < 25: cur += w + " "
-                else: lines.append(cur.strip()); cur = w + " "
-            if cur: lines.append(cur.strip())
+                if len(cur) + len(w) + 1 < 26:
+                    cur += w + " "
+                else:
+                    wrapped_lines.append(cur.strip())
+                    cur = w + " "
+            if cur:
+                wrapped_lines.append(cur.strip())
+
             y_text = y2 + 18
-            for line in lines:
-                cv2.putText(img, line, (x1, y_text),
-                            cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0,0,0), 3)
-                cv2.putText(img, line, (x1, y_text),
-                            cv2.FONT_HERSHEY_SIMPLEX, 0.5, (255,0,0), 1)
+            for line in wrapped_lines:
+                # black outline
+                cv2.putText(
+                    img, line, (x1, y_text),
+                    cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 0, 0), 3
+                )
+                # blue text
+                cv2.putText(
+                    img, line, (x1, y_text),
+                    cv2.FONT_HERSHEY_SIMPLEX, 0.5, (255, 0, 0), 1
+                )
                 y_text += 18
 
     cv2.imwrite(out_path, img)
 
-
 def estimate_reading_order(bbox_dict, mode="ltr"):
     items = [(bid, (bb[0]+bb[2])/2.0, (bb[1]+bb[3])/2.0)
              for bid, bb in bbox_dict.items()]
@@ -2000,8 +2788,432 @@ def reattach_orphan_short_tokens(bubbles, bubble_boxes, bubble_quads, bubble_ind
 
     return bubbles, bubble_boxes, bubble_quads, bubble_indices
 
-def _bubble_text(indices, ocr):
-    return normalize_text(" ".join(build_lines_from_indices(indices, ocr)))
+def reconstruct_group_text(group_indices, ocr):
+    """
+    Reconstruct text inside one already-detected group.
+
+    This handles cases where a vertical group itself contains
+    multiple local rows or wrapped OCR fragments.
+    """
+    if not group_indices:
+        return ""
+
+    items = []
+    for i in group_indices:
+        b = quad_bbox(ocr[i][0])
+        cx = (b[0] + b[2]) / 2.0
+        cy = (b[1] + b[3]) / 2.0
+        w = max(1, b[2] - b[0])
+        h = max(1, b[3] - b[1])
+        items.append((i, b, cx, cy, w, h))
+
+    if not items:
+        return ""
+
+    med_h = float(np.median([it[5] for it in items]))
+    med_w = float(np.median([it[4] for it in items]))
+
+    # If the group is strongly vertical, simple top->bottom is fine
+    xs = [it[2] for it in items]
+    ys = [it[3] for it in items]
+    vertical_span = max(ys) - min(ys) if len(ys) > 1 else 0
+    horizontal_span = max(xs) - min(xs) if len(xs) > 1 else 0
+
+    # strong single vertical phrase
+    if vertical_span > horizontal_span * 1.5:
+        items.sort(key=lambda x: x[3])  # top->bottom
+        txt = normalize_text(" ".join(
+            normalize_text(ocr[it[0]][1]) for it in items if normalize_text(ocr[it[0]][1])
+        ))
+        return txt
+
+    # otherwise, split into local rows first
+    row_tol = max(6.0, med_h * 0.65)
+    items.sort(key=lambda x: x[3])
+
+    rows = []
+    for it in items:
+        placed = False
+        for row in rows:
+            if abs(it[3] - row["yc"]) <= row_tol:
+                row["members"].append(it)
+                row["yc"] = float(np.mean([m[3] for m in row["members"]]))
+                placed = True
+                break
+        if not placed:
+            rows.append({"yc": it[3], "members": [it]})
+
+    rows.sort(key=lambda r: r["yc"])
+
+    parts = []
+    for row in rows:
+        members = sorted(row["members"], key=lambda x: x[2])  # left->right
+        row_txt = normalize_text(" ".join(
+            normalize_text(ocr[m[0]][1]) for m in members if normalize_text(ocr[m[0]][1])
+        ))
+        if row_txt:
+            parts.append(row_txt)
+
+    txt = normalize_text(" ".join(parts))
+    return txt
+
+def reconstruct_group_text_best(group_indices, ocr):
+    if not group_indices:
+        return ""
+
+    items = []
+    for i in group_indices:
+        b = quad_bbox(ocr[i][0])
+        cx = (b[0] + b[2]) / 2.0
+        cy = (b[1] + b[3]) / 2.0
+        h = max(1, b[3] - b[1])
+        items.append((i, b, cx, cy, h))
+
+    if not items:
+        return ""
+
+    # Candidate 1: simple top->bottom
+    cand1_items = sorted(items, key=lambda x: x[3])
+    cand1 = normalize_text(" ".join(
+        normalize_text(ocr[it[0]][1]) for it in cand1_items if normalize_text(ocr[it[0]][1])
+    ))
+    cand1 = fix_group_level_ocr(cand1)
+
+    # Candidate 2: local rows
+    med_h = float(np.median([it[4] for it in items]))
+    row_tol = max(6.0, med_h * 0.65)
+
+    rows = []
+    for it in sorted(items, key=lambda x: x[3]):
+        placed = False
+        for row in rows:
+            if abs(it[3] - row["yc"]) <= row_tol:
+                row["members"].append(it)
+                row["yc"] = float(np.mean([m[3] for m in row["members"]]))
+                placed = True
+                break
+        if not placed:
+            rows.append({"yc": it[3], "members": [it]})
+
+    rows.sort(key=lambda r: r["yc"])
+    cand2_parts = []
+    for row in rows:
+        members = sorted(row["members"], key=lambda x: x[2])
+        row_txt = normalize_text(" ".join(
+            normalize_text(ocr[m[0]][1]) for m in members if normalize_text(ocr[m[0]][1])
+        ))
+        if row_txt:
+            cand2_parts.append(row_txt)
+    cand2 = normalize_text(" ".join(cand2_parts))
+    cand2 = fix_group_level_ocr(cand2)
+
+    # choose best
+    s1 = ocr_candidate_score(cand1)
+    s2 = ocr_candidate_score(cand2)
+
+    return cand2 if s2 > s1 else cand1
+
+def fix_group_level_ocr(text):
+    t = normalize_text(text or "")
+    if not t:
+        return t
+
+    replacements = {
+        "ANY- THING": "ANYTHING",
+        "BREAK- FAST": "BREAK-FAST",
+        "COMMON BREAK- PEOPLE FAST": "COMMON PEOPLE EAT FOR BREAKFAST",
+        "WHAT DO LIKE FOR COMMON BREAK- PEOPLE FAST EAT": "WHAT DO COMMON PEOPLE EAT LIKE FOR BREAKFAST",
+
+        # New targeted fixes for reported cases
+        "ILLU- SIONS": "ILLU-SIONS",
+        "ATTEN- TION": "ATTEN-TION",
+        "WHAT DO COMMON PEOPLE HE EAT?": "WHAT DO COMMON PEOPLE EAT?",
+        "LIKE FOR BREAK- FAST": "LIKE FOR BREAK-FAST?",
+        "YOUR STUCK": "YOU'RE STUCK",
+        "YOUR HAND!": "YOUR HAND!",
+    }
+
+    for a, b in replacements.items():
+        t = t.replace(a, b)
+
+    t = dehyphenate_linebreak_artifacts(t)
+    t = re.sub(r"\s{2,}", " ", t).strip()
+    return t
+
+def _is_sentence_like_fragment(t: str) -> bool:
+    t = normalize_text(t or "")
+    if not t:
+        return False
+    alnum = re.sub(r"[^A-ZÀ-Ý0-9]", "", t)
+    if len(alnum) < 2:
+        return False
+    return True
+
+
+def _line_has_terminal_punct(t: str) -> bool:
+    t = normalize_text(t or "")
+    return bool(re.search(r"[.!?…]$", t))
+
+
+def _smart_split_by_connectors(text: str) -> List[str]:
+    """
+    Conservative split for OCR text that glues multiple clauses.
+    """
+    t = normalize_text(text or "")
+    if not t:
+        return []
+
+    # Keep hyphenated style if meaningful, but remove OCR line-wrap artifacts
+    t = dehyphenate_linebreak_artifacts(t)
+
+    # 1) Primary punctuation split
+    parts = re.split(r"(?<=[.!?…])\s+", t)
+    parts = [p.strip() for p in parts if p.strip()]
+    if len(parts) >= 2:
+        return parts
+
+    # 2) Secondary lexical split if punctuation failed
+    patterns = [
+        r"\b(THEY'RE|THEY ARE)\b",
+        r"\b(DON'T|DO NOT)\b",
+        r"\b(LIKE FOR)\b",
+        r"\b(IF WE DON'T|IF WE DO NOT)\b",
+        r"\b(WHAT DO)\b",
+    ]
+
+    for pat in patterns:
+        m = re.search(pat, t)
+        if m and m.start() > 8:
+            left = t[:m.start()].strip()
+            right = t[m.start():].strip()
+            if _is_sentence_like_fragment(left) and _is_sentence_like_fragment(right):
+                return [left, right]
+
+    return [t]
+
+def split_box_by_sentence_rows(indices, ocr, min_groups=2):
+    """
+    Force split one box into sentence-like row groups.
+    Works for stacked dialogue blocks like:
+      YOUR HAND!
+      I'M STUCK AND HELPLESS LIKE THIS!
+      IF WE DON'T HURRY UP, WE'LL BE CRUSHED TO DEATH!
+    """
+    if not indices or len(indices) < 3:
+        return None
+
+    # Build row groups first
+    rows = group_indices_into_horizontal_rows(indices, ocr, row_tol_factor=0.70)
+    if not rows or len(rows) < min_groups:
+        return None
+
+    # Turn each row-group into text
+    row_payload = []
+    for grp in rows:
+        txt = normalize_text(" ".join(ocr[i][1] for i in grp if normalize_text(ocr[i][1])))
+        txt = fix_group_level_ocr(txt)
+        if not txt:
+            continue
+        box = boxes_union_xyxy([quad_bbox(ocr[i][0]) for i in grp])
+        row_payload.append({"indices": grp, "text": txt, "box": box})
+
+    if len(row_payload) < min_groups:
+        return None
+
+    # Merge tiny row fragments upward if they are clearly continuation
+    merged = []
+    for rp in row_payload:
+        if not merged:
+            merged.append(rp)
+            continue
+
+        prev = merged[-1]
+        short_prev = len(re.sub(r"[^A-ZÀ-Ý0-9]", "", prev["text"])) <= 5
+        no_term_prev = not re.search(r"[.!?…]$", prev["text"])
+
+        if short_prev and no_term_prev:
+            new_idx = sorted(set(prev["indices"] + rp["indices"]))
+            new_txt = normalize_text(prev["text"] + " " + rp["text"])
+            new_box = boxes_union_xyxy([prev["box"], rp["box"]])
+            merged[-1] = {"indices": new_idx, "text": new_txt, "box": new_box}
+        else:
+            merged.append(rp)
+
+    # Keep sentence-like groups
+    out = []
+    for m in merged:
+        txt = normalize_text(m["text"])
+        if len(re.sub(r"[^A-ZÀ-Ý0-9]", "", txt)) < 4:
+            continue
+        out.append(sorted(m["indices"], key=lambda i: (
+            quad_bbox(ocr[i][0])[1],
+            quad_bbox(ocr[i][0])[0]
+        )))
+
+    if len(out) < min_groups:
+        return None
+
+    return out
+
+def segment_box_into_phrases(indices, ocr, reading_mode="ltr") -> List[str]:
+    """
+    Layout-aware phrase segmentation for one final box.
+    Uses your internal grouping + punctuation/connector splitting.
+    """
+    groups = build_box_group_texts(indices, ocr, reading_mode=reading_mode)
+    groups = [fix_group_level_ocr(g) for g in groups if _is_sentence_like_fragment(g)]
+
+    if not groups:
+        merged = normalize_text(" ".join(build_final_box_text(indices, ocr, reading_mode=reading_mode)))
+        merged = fix_group_level_ocr(merged)
+        return [x for x in _smart_split_by_connectors(merged) if _is_sentence_like_fragment(x)]
+
+    out = []
+    for g in groups:
+        out.extend(_smart_split_by_connectors(g))
+
+    # Dedupe OCR echoes
+    cleaned = []
+    for p in out:
+        p = normalize_text(p)
+        if not _is_sentence_like_fragment(p):
+            continue
+        if cleaned and text_similarity(cleaned[-1], p) >= 0.92:
+            continue
+        cleaned.append(p)
+
+    return cleaned
+
+def build_box_group_texts(indices, ocr, reading_mode="ltr"):
+    """
+    Return independent text groups for one final box, preserving internal layout.
+    Each group is reconstructed with local reading-order logic.
+    """
+    layout = detect_internal_text_layout(indices, ocr, reading_mode=reading_mode)
+    out = []
+
+    if not layout:
+        return out
+
+    blocks = layout.get("blocks", [])
+    for block in blocks:
+        mode = block.get("mode", "horizontal")
+        groups = block.get("groups", [])
+
+        if mode == "vertical":
+            groups = sorted(
+                groups,
+                key=lambda grp: np.mean([
+                    (quad_bbox(ocr[i][0])[0] + quad_bbox(ocr[i][0])[2]) / 2.0
+                    for i in grp
+                ]),
+                reverse=(reading_mode == "rtl")
+            )
+        else:
+            groups = sorted(
+                groups,
+                key=lambda grp: np.mean([
+                    (quad_bbox(ocr[i][0])[1] + quad_bbox(ocr[i][0])[3]) / 2.0
+                    for i in grp
+                ])
+            )
+
+        for grp in groups:
+            txt = reconstruct_group_text(grp, ocr)
+            if txt:
+                out.append(txt)
+
+    return out
+
+def _is_sentence_like_fragment(t: str) -> bool:
+    t = normalize_text(t or "")
+    if not t:
+        return False
+    alnum = re.sub(r"[^A-ZÀ-Ý0-9]", "", t)
+    if len(alnum) < 2:
+        return False
+    return True
+
+
+def _line_has_terminal_punct(t: str) -> bool:
+    t = normalize_text(t or "")
+    return bool(re.search(r"[.!?…]$", t))
+
+
+def _smart_split_by_connectors(text: str) -> List[str]:
+    """
+    Conservative split for OCR text that glues 2 clauses:
+    - DON'T PAY ANY ATTEN-TION TO THEM! THEY'RE ILLU-SIONS!
+    - WHAT DO COMMON PEOPLE EAT? LIKE FOR BREAK-FAST?
+    """
+    t = normalize_text(text or "")
+    if not t:
+        return []
+
+    # Normalize some OCR hyphen artifacts first
+    t = dehyphenate_linebreak_artifacts(t)
+
+    # Primary punctuation split
+    parts = re.split(r"(?<=[.!?…])\s+", t)
+    parts = [p.strip() for p in parts if p.strip()]
+    if len(parts) >= 2:
+        return parts
+
+    # Secondary connector split patterns (conservative)
+    patterns = [
+        r"\b(THEY'RE|THEY ARE)\b",
+        r"\b(DON'T|DO NOT)\b",
+        r"\b(LIKE FOR)\b",
+        r"\b(IF WE DON'T|IF WE DO NOT)\b",
+    ]
+
+    for pat in patterns:
+        m = re.search(pat, t)
+        if m and m.start() > 8:
+            left = t[:m.start()].strip()
+            right = t[m.start():].strip()
+            if _is_sentence_like_fragment(left) and _is_sentence_like_fragment(right):
+                return [left, right]
+
+    return [t]
+
+
+def segment_box_into_phrases(indices, ocr, reading_mode="ltr") -> List[str]:
+    """
+    Layout-aware phrase segmentation for one final box.
+    """
+    # Step 1: use your existing internal grouping
+    groups = build_box_group_texts(indices, ocr, reading_mode=reading_mode)
+    groups = [fix_group_level_ocr(g) for g in groups if _is_sentence_like_fragment(g)]
+
+    if not groups:
+        merged = normalize_text(" ".join(build_final_box_text(indices, ocr, reading_mode=reading_mode)))
+        return _smart_split_by_connectors(merged)
+
+    # Step 2: split each group by punctuation/connectors
+    out = []
+    for g in groups:
+        out.extend(_smart_split_by_connectors(g))
+
+    # Step 3: dedupe near-identical neighbors (OCR echo)
+    cleaned = []
+    for p in out:
+        if not cleaned:
+            cleaned.append(p)
+            continue
+        if text_similarity(cleaned[-1], p) >= 0.92:
+            continue
+        cleaned.append(p)
+
+    return [normalize_text(x) for x in cleaned if _is_sentence_like_fragment(x)]
+
+def is_multi_group_bubble(indices, ocr, reading_mode="ltr", min_groups=2):
+    groups = build_box_group_texts(indices, ocr, reading_mode=reading_mode)
+    meaningful = [g for g in groups if len(re.sub(r"[^A-ZÀ-Ý0-9]", "", g)) >= 2]
+    return len(meaningful) >= min_groups
+
+def _bubble_text(indices, ocr, reading_mode="ltr"):
+    return normalize_text(" ".join(build_text_from_layout(indices, ocr, reading_mode=reading_mode)))
 
 def _box_dims(b):
     return max(1, b[2]-b[0]), max(1, b[3]-b[1])
@@ -2031,19 +3243,24 @@ def _reindex_boxes(bubbles, bubble_boxes, bubble_quads, bubble_indices):
         new_bi[nid] = bubble_indices[old]
     return new_b, new_bb, new_bq, new_bi
 
-def reconcile_final_boxes(bubbles, bubble_boxes, bubble_quads, bubble_indices, ocr, image_bgr):
+def reconcile_final_boxes(bubbles, bubble_boxes, bubble_quads, bubble_indices, ocr,
+                          image_bgr=None, reading_mode="ltr"):
     """
     Final reconciliation pass for:
-    - overlap merges (5+16, 8+18)
-    - child absorption (4->14, 9->19)
-    - complementary fragment merge (1+11)
+    - overlap merges
+    - child absorption
+    - complementary fragment merge
+
+    This version is safe for optional image input and propagates reading_mode
+    into layout-aware text reconstruction.
     """
     if not bubble_boxes:
         return bubbles, bubble_boxes, bubble_quads, bubble_indices
 
     all_h = [max(1, quad_bbox(ocr[i][0])[3] - quad_bbox(ocr[i][0])[1]) for i in range(len(ocr))]
     med_h = float(np.median(all_h)) if all_h else 14.0
-    bubble_contours = detect_speech_bubbles(image_bgr)
+
+    bubble_contours = detect_speech_bubbles(image_bgr) if image_bgr is not None else []
 
     changed = True
     while changed:
@@ -2053,16 +3270,21 @@ def reconcile_final_boxes(bubbles, bubble_boxes, bubble_quads, bubble_indices, o
         # ---- (A) Merge highly-overlapping pairs
         merged_any = False
         for i in range(len(bids)):
-            if merged_any: break
-            for j in range(i+1, len(bids)):
+            if merged_any:
+                break
+
+            for j in range(i + 1, len(bids)):
                 bi, bj = bids[i], bids[j]
+
                 if bi not in bubble_boxes or bj not in bubble_boxes:
                     continue
+
                 a, b = bubble_boxes[bi], bubble_boxes[bj]
                 iou = boxes_iou(a, b)
                 ovs = boxes_overlap_ratio(a, b)  # inter / smaller
 
-                same_contour = _in_same_bubble_contour(a, b, bubble_contours)
+                same_contour = _in_same_bubble_contour(a, b, bubble_contours) if bubble_contours else False
+
                 if ovs >= 0.55 or (iou >= 0.35 and same_contour):
                     idx = sorted(set(bubble_indices[bi] + bubble_indices[bj]))
                     bubble_indices[bi] = idx
@@ -2085,12 +3307,17 @@ def reconcile_final_boxes(bubbles, bubble_boxes, bubble_quads, bubble_indices, o
         # ---- (B) Absorb tiny child boxes inside larger parent
         absorbed_any = False
         bids = sorted(bubble_boxes.keys())
+
         for i in range(len(bids)):
-            if absorbed_any: break
+            if absorbed_any:
+                break
+
             for j in range(len(bids)):
                 if i == j:
                     continue
+
                 child, parent = bids[i], bids[j]
+
                 if child not in bubble_boxes or parent not in bubble_boxes:
                     continue
 
@@ -2099,11 +3326,11 @@ def reconcile_final_boxes(bubbles, bubble_boxes, bubble_quads, bubble_indices, o
                 pw, ph = _box_dims(pb)
 
                 contain = _containment_ratio(cb, pb)
-                child_txt = _bubble_text(bubble_indices[child], ocr)
-                parent_txt = _bubble_text(bubble_indices[parent], ocr)
+                child_txt = _bubble_text(bubble_indices[child], ocr, reading_mode=reading_mode)
+                parent_txt = _bubble_text(bubble_indices[parent], ocr, reading_mode=reading_mode)
 
                 # tiny or fragment child
-                is_tiny = (cw <= med_h*3.2 and ch <= med_h*2.2) or len(child_txt) <= 14
+                is_tiny = (cw <= med_h * 3.2 and ch <= med_h * 2.2) or len(child_txt) <= 14
 
                 # don't absorb if it's clearly separate and far
                 close = _center_distance(cb, pb) <= med_h * 4.0
@@ -2127,13 +3354,17 @@ def reconcile_final_boxes(bubbles, bubble_boxes, bubble_quads, bubble_indices, o
         if changed:
             continue
 
-        # ---- (C) Merge complementary fragments (partial overlap, same contour, similar x-span)
+        # ---- (C) Merge complementary fragments
         comp_any = False
         bids = sorted(bubble_boxes.keys())
+
         for i in range(len(bids)):
-            if comp_any: break
-            for j in range(i+1, len(bids)):
+            if comp_any:
+                break
+
+            for j in range(i + 1, len(bids)):
                 bi, bj = bids[i], bids[j]
+
                 if bi not in bubble_boxes or bj not in bubble_boxes:
                     continue
 
@@ -2144,14 +3375,14 @@ def reconcile_final_boxes(bubbles, bubble_boxes, bubble_quads, bubble_indices, o
                 vert_gap = max(0, max(a[1], b[1]) - min(a[3], b[3]))
                 h_ix = max(0, min(a[2], b[2]) - max(a[0], b[0]))
                 h_overlap_ratio = h_ix / max(1, min(wi, wj))
-                same_contour = _in_same_bubble_contour(a, b, bubble_contours)
+                same_contour = _in_same_bubble_contour(a, b, bubble_contours) if bubble_contours else False
 
-                txt_i = _bubble_text(bubble_indices[bi], ocr)
-                txt_j = _bubble_text(bubble_indices[bj], ocr)
+                txt_i = _bubble_text(bubble_indices[bi], ocr, reading_mode=reading_mode)
+                txt_j = _bubble_text(bubble_indices[bj], ocr, reading_mode=reading_mode)
 
-                if same_contour and vert_gap <= med_h*2.8 and h_overlap_ratio >= 0.45:
-                    # prefer merge when one is “upper fragment” + the other “lower fragment”
-                    # and text isn't identical duplicate
+                if same_contour and vert_gap <= med_h * 2.8 and h_overlap_ratio >= 0.45:
+                    # prefer merge when one is upper fragment + other lower fragment
+                    # and text is not identical duplicate
                     if txt_i != txt_j:
                         idx = sorted(set(bubble_indices[bi] + bubble_indices[bj]))
                         bubble_indices[bi] = idx
@@ -2170,6 +3401,177 @@ def reconcile_final_boxes(bubbles, bubble_boxes, bubble_quads, bubble_indices, o
 
     return _reindex_boxes(bubbles, bubble_boxes, bubble_quads, bubble_indices)
 
+def split_boxes_by_internal_vertical_groups(bubbles, bubble_boxes, bubble_quads, bubble_indices,
+                                            ocr, image_shape, reading_mode="ltr"):
+    """
+    Conservative splitter:
+      - Split only when evidence is strong.
+      - Prevent over-splitting of short/noisy vertical tokens.
+    """
+    ih, iw = image_shape[:2]
+    out_bubbles = {}
+    out_boxes = {}
+    out_quads = {}
+    out_indices = {}
+    next_id = 1
+
+    # conservative thresholds
+    MIN_ALNUM_PER_GROUP = 8
+    MIN_GROUP_HEIGHT_RATIO = 0.30   # was too low before
+    MIN_VERTICAL_GROUPS_TO_SPLIT = 2
+    MAX_SPLIT_PARTS = 3             # safety cap
+
+    for bid in sorted(bubble_boxes.keys()):
+        idxs = bubble_indices[bid]
+        parent = bubble_boxes[bid]
+        parent_h = max(1, parent[3] - parent[1])
+        parent_w = max(1, parent[2] - parent[0])
+
+        if len(idxs) < 4:
+            out_bubbles[next_id] = bubbles[bid]
+            out_boxes[next_id] = bubble_boxes[bid]
+            out_quads[next_id] = bubble_quads[bid]
+            out_indices[next_id] = idxs
+            next_id += 1
+            continue
+
+        layout = detect_internal_text_layout(idxs, ocr, reading_mode=reading_mode)
+        did_split = False
+
+        # --------------------------------------------------------------
+        # Primary: vertical-mode internal groups (STRICT)
+        # --------------------------------------------------------------
+        if layout and layout.get("blocks"):
+            candidate_groups = []
+
+            for block in layout.get("blocks", []):
+                if block.get("mode", "horizontal") != "vertical":
+                    continue
+
+                for grp in block.get("groups", []):
+                    grp = sorted(set(grp), key=lambda i: (
+                        quad_bbox(ocr[i][0])[1],
+                        quad_bbox(ocr[i][0])[0]
+                    ))
+                    if not grp:
+                        continue
+
+                    txt = reconstruct_group_text_best(grp, ocr)
+                    txt = normalize_text(fix_group_level_ocr(txt))
+                    if not txt:
+                        continue
+
+                    alnum_len = len(re.sub(r"[^A-ZÀ-Ý0-9]", "", txt))
+                    if alnum_len < MIN_ALNUM_PER_GROUP:
+                        continue
+
+                    gb = boxes_union_xyxy([quad_bbox(ocr[i][0]) for i in grp])
+                    gw = max(1, gb[2] - gb[0])
+                    gh = max(1, gb[3] - gb[1])
+
+                    # require meaningful physical size
+                    if gh < parent_h * MIN_GROUP_HEIGHT_RATIO:
+                        continue
+
+                    # avoid splitting tiny narrow SFX-like strips
+                    if gw < parent_w * 0.12 and alnum_len < 12:
+                        continue
+
+                    # sentence-ish check
+                    words = txt.split()
+                    has_terminal = bool(re.search(r"[.!?…]$", txt))
+                    if len(words) < 2 and not has_terminal:
+                        continue
+
+                    candidate_groups.append({
+                        "indices": grp,
+                        "text": txt,
+                        "box": gb
+                    })
+
+            if len(candidate_groups) >= MIN_VERTICAL_GROUPS_TO_SPLIT:
+                # Sort columns by reading order
+                candidate_groups = sorted(
+                    candidate_groups,
+                    key=lambda g: (g["box"][0] + g["box"][2]) / 2.0,
+                    reverse=(reading_mode == "rtl")
+                )
+
+                # cap extreme over-splits
+                if len(candidate_groups) > MAX_SPLIT_PARTS:
+                    candidate_groups = candidate_groups[:MAX_SPLIT_PARTS]
+
+                # final sanity: total text coverage vs parent text
+                parent_txt = normalize_text(" ".join(build_final_box_text(idxs, ocr, reading_mode=reading_mode)))
+                parent_alnum = max(1, len(re.sub(r"[^A-ZÀ-Ý0-9]", "", parent_txt)))
+                sum_child_alnum = sum(len(re.sub(r"[^A-ZÀ-Ý0-9]", "", g["text"])) for g in candidate_groups)
+
+                # if split loses too much text evidence, reject
+                if (sum_child_alnum / parent_alnum) >= 0.65:
+                    for g in candidate_groups:
+                        grp = sorted(set(g["indices"]), key=lambda i: (
+                            quad_bbox(ocr[i][0])[1],
+                            quad_bbox(ocr[i][0])[0]
+                        ))
+                        ub = boxes_union_xyxy([quad_bbox(ocr[i][0]) for i in grp])
+
+                        out_indices[next_id] = grp
+                        out_quads[next_id] = [ocr[i][0] for i in grp]
+                        out_boxes[next_id] = (
+                            max(0, ub[0] - 2), max(0, ub[1] - 2),
+                            min(iw - 1, ub[2] + 2), min(ih - 1, ub[3] + 2)
+                        )
+                        out_bubbles[next_id] = build_final_box_text(grp, ocr, reading_mode=reading_mode)
+                        next_id += 1
+
+                    did_split = True
+
+        if did_split:
+            continue
+
+        # --------------------------------------------------------------
+        # Fallback: row sentence split (ONLY for strong punctuation cases)
+        # --------------------------------------------------------------
+        row_sentence_parts = split_box_by_sentence_rows(idxs, ocr, min_groups=2)
+
+        if row_sentence_parts and 2 <= len(row_sentence_parts) <= 3:
+            # Require punctuation evidence in resulting parts
+            part_texts = []
+            for grp in row_sentence_parts:
+                txt = normalize_text(" ".join(build_lines_from_indices(grp, ocr)))
+                txt = fix_group_level_ocr(txt)
+                part_texts.append(txt)
+
+            punct_parts = sum(1 for t in part_texts if re.search(r"[.!?…]$", t))
+            if punct_parts >= 2:
+                for grp in row_sentence_parts:
+                    grp = sorted(set(grp), key=lambda i: (
+                        quad_bbox(ocr[i][0])[1],
+                        quad_bbox(ocr[i][0])[0]
+                    ))
+                    ub = boxes_union_xyxy([quad_bbox(ocr[i][0]) for i in grp])
+
+                    out_indices[next_id] = grp
+                    out_quads[next_id] = [ocr[i][0] for i in grp]
+                    out_boxes[next_id] = (
+                        max(0, ub[0] - 2), max(0, ub[1] - 2),
+                        min(iw - 1, ub[2] + 2), min(ih - 1, ub[3] + 2)
+                    )
+                    out_bubbles[next_id] = build_final_box_text(grp, ocr, reading_mode=reading_mode)
+                    next_id += 1
+                continue
+
+        # --------------------------------------------------------------
+        # Keep original if no strong split evidence
+        # --------------------------------------------------------------
+        out_bubbles[next_id] = bubbles[bid]
+        out_boxes[next_id] = bubble_boxes[bid]
+        out_quads[next_id] = bubble_quads[bid]
+        out_indices[next_id] = idxs
+        next_id += 1
+
+    return out_bubbles, out_boxes, out_quads, out_indices
+
 def split_box_by_internal_vertical_gaps(bid, bubble_indices, ocr, factor=1.45, min_gap=16):
     """
     Multi-cut vertical splitter.
@@ -2356,6 +3758,9 @@ def force_split_bridged_boxes(bubbles, bubble_boxes, bubble_quads, bubble_indice
             next_bid += 1
 
     return new_bubbles, new_boxes, new_quads, new_indices
+# ============================================================
+# translate_manga_text START
+# ============================================================
 
 def translate_manga_text(
     image_path="001-page.png",
@@ -2406,9 +3811,15 @@ def translate_manga_text(
                 rx2, ry2 = min(iw, rx2 + pad), min(ih, ry2 + pad)
                 crop = image[ry1:ry2, rx1:rx2]
                 if crop.size > 0:
-                    upscaled = cv2.resize(crop, None, fx=4.0, fy=4.0, interpolation=cv2.INTER_CUBIC)
+                    upscaled = cv2.resize(
+                        crop, None, fx=4.0, fy=4.0, interpolation=cv2.INTER_CUBIC
+                    )
                     for quad, text, conf in detector.run_vision_ocr(upscaled):
-                        raw.append(([[int(p[0] / 4.0 + rx1), int(p[1] / 4.0 + ry1)] for p in quad], text, conf))
+                        raw.append((
+                            [[int(p[0] / 4.0 + rx1), int(p[1] / 4.0 + ry1)] for p in quad],
+                            text,
+                            conf
+                        ))
             print(f"📝 Total detections after missed region scan: {len(raw)}")
 
     # ── Filtering ─────────────────────────────────────────────────────────
@@ -2476,7 +3887,13 @@ def translate_manga_text(
         filtered, image.shape, gap_px=resolved_gap,
         bbox_padding=1, strict_mode=strict_grouping
     )
-    print(f"   Created {len(bubbles)} initial box(es)")
+    print(f"   Created {len(bubbles)} initial bubble-group box(es)")
+
+    print("🧱 Proposing region-first text containers...")
+    region_lines, region_boxes, region_quads, region_indices = propose_text_regions_from_ocr(
+        filtered, image.shape
+    )
+    print(f"   Proposed {len(region_lines)} region container(s)")
 
     # ── Auto-fix (split + merge) ──────────────────────────────────────────
     if auto_fix_bubbles:
@@ -2516,7 +3933,9 @@ def translate_manga_text(
             splits_performed.append(f"BOX#{bid} ({split_reason})")
             for part_idxs, part_bid in [(p1, bid), (p2, next_bid)]:
                 ub = boxes_union_xyxy([quad_bbox(filtered[i][0]) for i in part_idxs])
-                new_bubbles[part_bid] = build_lines_from_indices(part_idxs, filtered)
+                new_bubbles[part_bid] = build_final_box_text(
+                    part_idxs, filtered, reading_mode=reading_mode
+                )
                 new_bubble_boxes[part_bid] = (
                     max(0, ub[0] - 2), max(0, ub[1] - 2),
                     min(iw - 1, ub[2] + 2), min(ih - 1, ub[3] + 2)
@@ -2525,7 +3944,9 @@ def translate_manga_text(
                 new_bubble_indices[part_bid] = part_idxs
             next_bid += 1
         else:
-            new_bubbles[bid] = bubbles[bid]
+            new_bubbles[bid] = build_final_box_text(
+                bubble_indices[bid], filtered, reading_mode=reading_mode
+            )
             new_bubble_boxes[bid] = bubble_boxes[bid]
             new_bubble_quads[bid] = bubble_quads[bid]
             new_bubble_indices[bid] = bubble_indices[bid]
@@ -2535,7 +3956,6 @@ def translate_manga_text(
         for s in splits_performed:
             print(f"   ✓ {s}")
 
-    # IMPORTANT: commit split-pass results
     bubbles = new_bubbles
     bubble_boxes = new_bubble_boxes
     bubble_quads = new_bubble_quads
@@ -2546,29 +3966,106 @@ def translate_manga_text(
         bubbles, bubble_boxes, bubble_quads, bubble_indices, filtered
     )
 
-    # ── Final reconciliation pass (overlaps, child absorb, complementary merge) ──
+    for bid in list(bubble_indices.keys()):
+        bubbles[bid] = build_final_box_text(
+            bubble_indices[bid], filtered, reading_mode=reading_mode
+        )
+
+    # ── Final reconciliation pass ─────────────────────────────────────────
     bubbles, bubble_boxes, bubble_quads, bubble_indices = reconcile_final_boxes(
+        bubbles,
+        bubble_boxes,
+        bubble_quads,
+        bubble_indices,
+        filtered,
+        image_bgr=image,
+        reading_mode=reading_mode
+    )
+
+    for bid in list(bubble_indices.keys()):
+        bubbles[bid] = build_final_box_text(
+            bubble_indices[bid], filtered, reading_mode=reading_mode
+        )
+
+    bubbles, bubble_boxes, bubble_quads, bubble_indices = force_split_bridged_boxes(
         bubbles, bubble_boxes, bubble_quads, bubble_indices, filtered, image
     )
-    
-    bubbles, bubble_boxes, bubble_quads, bubble_indices = force_split_bridged_boxes(
-    bubbles, bubble_boxes, bubble_quads, bubble_indices, filtered, image
-    )
-    
+
+    for bid in list(bubble_indices.keys()):
+        bubbles[bid] = build_final_box_text(
+            bubble_indices[bid], filtered, reading_mode=reading_mode
+        )
+
     bubbles, bubble_boxes, bubble_quads, bubble_indices = reconcile_final_boxes(
-    bubbles, bubble_boxes, bubble_quads, bubble_indices, filtered, image
+        bubbles,
+        bubble_boxes,
+        bubble_quads,
+        bubble_indices,
+        filtered,
+        image_bgr=image,
+        reading_mode=reading_mode
     )
 
+    for bid in list(bubble_indices.keys()):
+        bubbles[bid] = build_final_box_text(
+            bubble_indices[bid], filtered, reading_mode=reading_mode
+        )
+
+    # ── Reconcile bubble-first and region-first views ─────────────────────
+    bubbles, bubble_boxes, bubble_quads, bubble_indices = reconcile_region_and_bubble_groups(
+        region_lines, region_boxes, region_quads, region_indices,
+        bubbles, bubble_boxes, bubble_quads, bubble_indices, filtered
+    )
+
+    for bid in list(bubble_indices.keys()):
+        bubbles[bid] = build_final_box_text(
+            bubble_indices[bid], filtered, reading_mode=reading_mode
+        )
+
+    # ── Split boxes by internal vertical groups ───────────────────────────
+    bubbles, bubble_boxes, bubble_quads, bubble_indices = split_boxes_by_internal_vertical_groups(
+        bubbles,
+        bubble_boxes,
+        bubble_quads,
+        bubble_indices,
+        filtered,
+        image.shape,
+        reading_mode=reading_mode
+    )
+
+    for bid in list(bubble_indices.keys()):
+        bubbles[bid] = build_final_box_text(
+            bubble_indices[bid], filtered, reading_mode=reading_mode
+        )
+
     print(f"✅ Final box count: {len(bubbles)}")
 
     # ── OCR quality pass ──────────────────────────────────────────────────
     translator = GoogleTranslator(source=source_lang, target=target_lang)
     clean_lines: Dict[int, str] = {}
+    raw_lines: Dict[int, str] = {}
+    corrected_lines: Dict[int, str] = {}
     sources_used: Dict[int, str] = {}
     translations: Dict[int, str] = {}
+    region_types: Dict[int, str] = {}
+    region_confidences: Dict[int, float] = {}
+    region_flags: Dict[int, List[str]] = {}
+    bubble_group_texts: Dict[int, List[str]] = {}
 
-    for bid, lines in bubbles.items():
-        base_txt = normalize_text(" ".join(lines))
+    for bid in sorted(bubble_boxes.keys()):
+        final_lines = build_final_box_text(
+            bubble_indices[bid], filtered, reading_mode=reading_mode
+        )
+        bubbles[bid] = final_lines
+
+        # NEW: segmented phrase groups for translation
+        group_texts = segment_box_into_phrases(
+            bubble_indices[bid], filtered, reading_mode=reading_mode
+        )
+        bubble_group_texts[bid] = group_texts
+
+        base_txt = normalize_text(" ".join(final_lines))
+        raw_lines[bid] = base_txt
         base_sc = ocr_candidate_score(base_txt)
         txt, src_used = base_txt, "vision-base"
 
@@ -2579,14 +4076,34 @@ def translate_manga_text(
             if rr_txt and rr_sc > base_sc + 0.04 and is_valid_language(rr_txt, source_lang):
                 txt, src_used = rr_txt, rr_src
 
-        clean_lines[bid] = normalize_text(txt)
+        tmp_lines = [txt] if txt else final_lines
+        region_type = classify_region_type(image, bubble_boxes[bid], tmp_lines)
+        corrected_txt, correction_gain = correct_region_text(txt, region_type=region_type)
+        conf = compute_region_confidence(txt, corrected_txt, bubble_boxes[bid], region_type, image)
+        flags = build_region_flags(txt, corrected_txt, region_type, conf)
+
+        if len([g for g in group_texts if g.strip()]) >= 2:
+            flags.append("BUBBLE")
+            flags.append("SEGMENTED")
+
+        clean_lines[bid] = normalize_text(corrected_txt)
+        corrected_lines[bid] = normalize_text(corrected_txt)
         sources_used[bid] = src_used
+        region_types[bid] = region_type
+        region_confidences[bid] = conf
+        region_flags[bid] = sorted(set(flags))
 
     reading_map = estimate_reading_order(bubble_boxes, mode=reading_mode)
 
     # ── Translation ───────────────────────────────────────────────────────
     for bid in sorted(clean_lines.keys(), key=lambda x: reading_map.get(x, x)):
-        src_txt = clean_lines[bid].strip()
+        group_texts = [g for g in bubble_group_texts.get(bid, []) if g.strip()]
+
+        if len(group_texts) >= 2:
+            src_txt = " ".join(group_texts).strip()
+        else:
+            src_txt = clean_lines[bid].strip()
+
         if not src_txt:
             continue
         if not is_valid_language(src_txt, source_lang):
@@ -2595,21 +4112,45 @@ def translate_manga_text(
             continue
 
         try:
-            tgt = translator.translate(src_txt) or ""
-            tgt = postprocess_translation_general(tgt).upper()
+            if len(group_texts) >= 2:
+                translated_groups = []
+                for g in group_texts:
+                    if not is_valid_language(g, source_lang):
+                        continue
+                    if not is_meaningful_text(g, source_lang):
+                        continue
+                    tg = translator.translate(g) or ""
+                    tg = postprocess_translation_general(tg).upper()
+                    if tg:
+                        translated_groups.append(tg)
+                tgt = " || ".join(translated_groups)
+            else:
+                tgt = translator.translate(src_txt) or ""
+                tgt = postprocess_translation_general(tgt).upper()
         except Exception as e:
             tgt = f"[Error: {e}]"
 
         translations[bid] = tgt
 
     if debug:
-        save_debug_clusters(image_path, filtered, bubble_boxes, bubble_indices, clean_lines, "debug_clusters.png")
+        save_debug_clusters(
+            image_path, filtered, bubble_boxes, bubble_indices,
+            clean_lines, "debug_clusters.png", region_types=region_types
+        )
 
     # ── Text output ───────────────────────────────────────────────────────
-    divider = "─" * 120
-    out_lines = ["BUBBLE|ORDER|OCR_SOURCE|ORIGINAL|TRANSLATED|FLAGS", divider]
-    print(divider + f"\n{'BUBBLE':<8} {'ORDER':<6} {'SOURCE':<12} "
-          f"{'ORIGINAL':<40} {'TRANSLATED':<40} FLAGS\n" + divider)
+    divider = "─" * 140
+    out_lines = [
+        "BUBBLE|ORDER|TYPE|CONF|OCR_SOURCE|ORIGINAL|CORRECTED|BUBBLE_GROUPS|TRANSLATED|FLAGS",
+        divider
+    ]
+
+    print(
+        divider +
+        f"\n{'BUBBLE':<8} {'ORDER':<6} {'TYPE':<10} {'CONF':<6} {'SOURCE':<12} "
+        f"{'CORRECTED':<30} {'BUBBLE_GROUPS':<40} {'TRANSLATED':<30} FLAGS\n" +
+        divider
+    )
 
     translated_count = 0
     for bid in sorted(clean_lines.keys(), key=lambda x: reading_map.get(x, x)):
@@ -2621,19 +4162,29 @@ def translate_manga_text(
         if not is_meaningful_text(src_txt, source_lang):
             continue
 
-        flags = []
+        flags = list(region_flags.get(bid, []))
         tgt = translations.get(bid, "")
         if not tgt:
             flags.append("NO_TRANSLATION")
-        src_u = src_txt.upper()
+
         src_engine = sources_used.get(bid, "unknown")
+        rtype = region_types.get(bid, "unknown")
+        rconf = region_confidences.get(bid, 0.0)
+        raw_u = raw_lines.get(bid, "").upper()
+        corr_u = corrected_lines.get(bid, "").upper()
+        group_blob = " || ".join(bubble_group_texts.get(bid, [])).upper()
 
         out_lines.append(
-            f"#{bid}|{reading_map.get(bid, bid)}|{src_engine}|{src_u}|{tgt}|"
+            f"#{bid}|{reading_map.get(bid, bid)}|{rtype}|{rconf:.2f}|{src_engine}|"
+            f"{raw_u}|{corr_u}|{group_blob}|{tgt}|{','.join(flags) if flags else '-'}"
+        )
+
+        print(
+            f"#{bid:<7} {reading_map.get(bid,bid):<6} {rtype:<10} {rconf:<6.2f} {src_engine:<12} "
+            f"{corr_u[:30]:<30} {group_blob[:40]:<40} {tgt[:30]:<30} "
             f"{','.join(flags) if flags else '-'}"
         )
-        print(f"#{bid:<7} {reading_map.get(bid,bid):<6} {src_engine:<12} "
-              f"{src_u[:40]:<40} {tgt[:40]:<40} {','.join(flags) if flags else '-'}")
+
         translated_count += 1
 
     out_lines.append(divider + f"\n✅ Done! {translated_count} bubble(s) translated.")
@@ -2655,9 +4206,15 @@ def translate_manga_text(
         tgt = translations.get(bid, "")
         bubbles_payload[str(bid)] = {
             "order": reading_map.get(bid, bid),
+            "region_type": region_types.get(bid, "unknown"),
+            "confidence": round(region_confidences.get(bid, 0.0), 4),
             "ocr_source": sources_used.get(bid, "unknown"),
-            "original": src_txt.upper(),
+            "raw_ocr": raw_lines.get(bid, "").upper(),
+            "corrected_ocr": corrected_lines.get(bid, "").upper(),
+            "translation_input": src_txt.upper(),
             "translated": tgt,
+            "flags": region_flags.get(bid, []),
+            "bubble_groups": [g.upper() for g in bubble_group_texts.get(bid, [])],
             "box": {
                 "x": box[0] if box else 0,
                 "y": box[1] if box else 0,
@@ -2672,6 +4229,11 @@ def translate_manga_text(
 
     print(divider + f"\nSaved: {export_to_file}\nSaved: {export_bubbles_to}")
 
+
+# ============================================================
+# translate_manga_text END
+# ============================================================
+
 # ============================================================
 # ENTRY POINT
 # ============================================================
@@ -2686,7 +4248,7 @@ if __name__ == "__main__":
         quality_threshold=0.62,
         export_to_file="output.txt",
         export_bubbles_to="bubbles.json",
-        reading_mode="rtl",
+        reading_mode="ltr",
         debug=True,
         use_enhanced_ocr=True,
         strict_grouping=True,