diff --git a/manga-translator.py b/manga-translator.py index c209665..e044598 100644 --- a/manga-translator.py +++ b/manga-translator.py @@ -519,6 +519,186 @@ def build_region_flags(raw_text, corrected_text, region_type, conf): # ============================================================ # HELPERS # ============================================================ +def split_boxes_by_horizontal_gap(out_boxes, out_indices, out_quads, out_lines, ocr, gap_multiplier=1.5): + """ + Splits a single bounding box into multiple boxes if there is a large horizontal + gap between columns of text (e.g., side-by-side speech bubbles). + """ + new_boxes = {} + new_indices = {} + new_quads = {} + new_lines = {} + + next_bid = max(out_boxes.keys()) + 1 if out_boxes else 1 + + for bid, box in out_boxes.items(): + indices = out_indices[bid] + lines = out_lines[bid] + + if len(indices) < 2: + # Nothing to split + new_boxes[bid] = box + new_indices[bid] = indices + new_quads[bid] = out_quads[bid] + new_lines[bid] = lines + continue + + # Extract bounding coordinates for each text line + line_data = [] + for idx, text in zip(indices, lines): + quad = ocr[idx][0] + xs = [p[0] for p in quad] + ys = [p[1] for p in quad] + line_data.append({ + "idx": idx, "text": text, "quad": quad, + "min_x": min(xs), "max_x": max(xs), + "min_y": min(ys), "max_y": max(ys) + }) + + # Calculate average line height to use as a dynamic gap threshold + avg_height = sum(ld["max_y"] - ld["min_y"] for ld in line_data) / len(line_data) + gap_threshold = avg_height * gap_multiplier + + # Sort lines horizontally (left to right) + line_data.sort(key=lambda x: x["min_x"]) + + columns = [] + current_column = [line_data[0]] + current_max_x = line_data[0]["max_x"] + + # Group lines into columns based on horizontal proximity + for i in range(1, len(line_data)): + curr_line = line_data[i] + horizontal_gap = curr_line["min_x"] - current_max_x + + if horizontal_gap > gap_threshold: + # Gap is too large! Split into a new column here. + columns.append(current_column) + current_column = [curr_line] + current_max_x = curr_line["max_x"] + else: + # Belongs to the same column + current_column.append(curr_line) + current_max_x = max(current_max_x, curr_line["max_x"]) + + columns.append(current_column) + + # Reassign to new boxes + if len(columns) == 1: + # No split happened + new_boxes[bid] = box + new_indices[bid] = indices + new_quads[bid] = out_quads[bid] + new_lines[bid] = lines + else: + # Box was split horizontally! Create new entries + for col in columns: + # Re-sort the lines in this new box vertically (top to bottom) + col.sort(key=lambda x: x["min_y"]) + + c_indices = [ld["idx"] for ld in col] + c_lines = [ld["text"] for ld in col] + c_quads = [ld["quad"] for ld in col] + + # Calculate new bounding box for this specific column + all_xs = [p[0] for q in c_quads for p in q] + all_ys = [p[1] for q in c_quads for p in q] + c_box = (min(all_xs), min(all_ys), max(all_xs), max(all_ys)) + + new_boxes[next_bid] = c_box + new_indices[next_bid] = c_indices + new_quads[next_bid] = c_quads + new_lines[next_bid] = c_lines + next_bid += 1 + + return new_lines, new_boxes, new_quads, new_indices + +def split_boxes_by_vertical_gap(out_boxes, out_indices, out_quads, out_lines, ocr, gap_multiplier=2.5): + """ + Splits a single bounding box into multiple boxes if there is a large vertical + gap between the text lines. + """ + new_boxes = {} + new_indices = {} + new_quads = {} + new_lines = {} + + next_bid = max(out_boxes.keys()) + 1 if out_boxes else 1 + + for bid, box in out_boxes.items(): + indices = out_indices[bid] + lines = out_lines[bid] + + if len(indices) < 2: + # Nothing to split + new_boxes[bid] = box + new_indices[bid] = indices + new_quads[bid] = out_quads[bid] + new_lines[bid] = lines + continue + + # Get the bounding boxes for each individual text line (quad) + line_data = [] + for idx, text in zip(indices, lines): + quad = ocr[idx][0] + # quad is [[x1,y1], [x2,y2], [x3,y3], [x4,y4]] + ys = [p[1] for p in quad] + min_y, max_y = min(ys), max(ys) + line_data.append({"idx": idx, "text": text, "min_y": min_y, "max_y": max_y, "quad": quad}) + + # Sort lines vertically + line_data.sort(key=lambda x: x["min_y"]) + + # Calculate average line height in this box + avg_height = sum(ld["max_y"] - ld["min_y"] for ld in line_data) / len(line_data) + gap_threshold = avg_height * gap_multiplier + + # Find split points + clusters = [] + current_cluster = [line_data[0]] + + for i in range(1, len(line_data)): + prev_line = current_cluster[-1] + curr_line = line_data[i] + + vertical_gap = curr_line["min_y"] - prev_line["max_y"] + + if vertical_gap > gap_threshold: + # Gap is too large! Split here. + clusters.append(current_cluster) + current_cluster = [curr_line] + else: + current_cluster.append(curr_line) + + clusters.append(current_cluster) + + # Reassign to new boxes + if len(clusters) == 1: + # No split happened + new_boxes[bid] = box + new_indices[bid] = indices + new_quads[bid] = out_quads[bid] + new_lines[bid] = lines + else: + # Box was split! Create new entries + for cluster in clusters: + c_indices = [ld["idx"] for ld in cluster] + c_lines = [ld["text"] for ld in cluster] + c_quads = [ld["quad"] for ld in cluster] + + # Calculate new bounding box for this cluster + all_xs = [p[0] for q in c_quads for p in q] + all_ys = [p[1] for q in c_quads for p in q] + c_box = (min(all_xs), min(all_ys), max(all_xs), max(all_ys)) + + new_boxes[next_bid] = c_box + new_indices[next_bid] = c_indices + new_quads[next_bid] = c_quads + new_lines[next_bid] = c_lines + next_bid += 1 + + return new_lines, new_boxes, new_quads, new_indices + def normalize_text(text: str) -> str: t = (text or "").strip().upper() t = t.replace("\u201c", "\"").replace("\u201d", "\"") @@ -2506,6 +2686,18 @@ def process_manga_page(image_path: str, print(f" Boxes after dedup: {len(out_boxes)}") + # ── Step 9.5: Split boxes with large vertical gaps ──────── + out_lines, out_boxes, out_quads, out_indices = \ + split_boxes_by_vertical_gap(out_boxes, out_indices, out_quads, out_lines, ocr, gap_multiplier=2.5) + + print(f" Boxes after vertical gap split: {len(out_boxes)}") + + # ── Step 9.6: Split boxes with large horizontal gaps ────── + out_lines, out_boxes, out_quads, out_indices = \ + split_boxes_by_horizontal_gap(out_boxes, out_indices, out_quads, out_lines, ocr, gap_multiplier=1.5) + + print(f" Boxes after horizontal gap split: {len(out_boxes)}") + # ── Step 10: Enforce max box size ───────────────────────── out_lines, out_boxes, out_quads, out_indices = \ enforce_max_box_size(out_boxes, out_indices, out_quads, out_lines,