Added some stuff

This commit is contained in:
Guillem Hernandez Sola
2026-04-23 18:34:13 +02:00
parent 2f61814971
commit 37bdc25bf6

View File

@@ -519,6 +519,186 @@ def build_region_flags(raw_text, corrected_text, region_type, conf):
# ============================================================
# HELPERS
# ============================================================
def split_boxes_by_horizontal_gap(out_boxes, out_indices, out_quads, out_lines, ocr, gap_multiplier=1.5):
"""
Splits a single bounding box into multiple boxes if there is a large horizontal
gap between columns of text (e.g., side-by-side speech bubbles).
"""
new_boxes = {}
new_indices = {}
new_quads = {}
new_lines = {}
next_bid = max(out_boxes.keys()) + 1 if out_boxes else 1
for bid, box in out_boxes.items():
indices = out_indices[bid]
lines = out_lines[bid]
if len(indices) < 2:
# Nothing to split
new_boxes[bid] = box
new_indices[bid] = indices
new_quads[bid] = out_quads[bid]
new_lines[bid] = lines
continue
# Extract bounding coordinates for each text line
line_data = []
for idx, text in zip(indices, lines):
quad = ocr[idx][0]
xs = [p[0] for p in quad]
ys = [p[1] for p in quad]
line_data.append({
"idx": idx, "text": text, "quad": quad,
"min_x": min(xs), "max_x": max(xs),
"min_y": min(ys), "max_y": max(ys)
})
# Calculate average line height to use as a dynamic gap threshold
avg_height = sum(ld["max_y"] - ld["min_y"] for ld in line_data) / len(line_data)
gap_threshold = avg_height * gap_multiplier
# Sort lines horizontally (left to right)
line_data.sort(key=lambda x: x["min_x"])
columns = []
current_column = [line_data[0]]
current_max_x = line_data[0]["max_x"]
# Group lines into columns based on horizontal proximity
for i in range(1, len(line_data)):
curr_line = line_data[i]
horizontal_gap = curr_line["min_x"] - current_max_x
if horizontal_gap > gap_threshold:
# Gap is too large! Split into a new column here.
columns.append(current_column)
current_column = [curr_line]
current_max_x = curr_line["max_x"]
else:
# Belongs to the same column
current_column.append(curr_line)
current_max_x = max(current_max_x, curr_line["max_x"])
columns.append(current_column)
# Reassign to new boxes
if len(columns) == 1:
# No split happened
new_boxes[bid] = box
new_indices[bid] = indices
new_quads[bid] = out_quads[bid]
new_lines[bid] = lines
else:
# Box was split horizontally! Create new entries
for col in columns:
# Re-sort the lines in this new box vertically (top to bottom)
col.sort(key=lambda x: x["min_y"])
c_indices = [ld["idx"] for ld in col]
c_lines = [ld["text"] for ld in col]
c_quads = [ld["quad"] for ld in col]
# Calculate new bounding box for this specific column
all_xs = [p[0] for q in c_quads for p in q]
all_ys = [p[1] for q in c_quads for p in q]
c_box = (min(all_xs), min(all_ys), max(all_xs), max(all_ys))
new_boxes[next_bid] = c_box
new_indices[next_bid] = c_indices
new_quads[next_bid] = c_quads
new_lines[next_bid] = c_lines
next_bid += 1
return new_lines, new_boxes, new_quads, new_indices
def split_boxes_by_vertical_gap(out_boxes, out_indices, out_quads, out_lines, ocr, gap_multiplier=2.5):
"""
Splits a single bounding box into multiple boxes if there is a large vertical
gap between the text lines.
"""
new_boxes = {}
new_indices = {}
new_quads = {}
new_lines = {}
next_bid = max(out_boxes.keys()) + 1 if out_boxes else 1
for bid, box in out_boxes.items():
indices = out_indices[bid]
lines = out_lines[bid]
if len(indices) < 2:
# Nothing to split
new_boxes[bid] = box
new_indices[bid] = indices
new_quads[bid] = out_quads[bid]
new_lines[bid] = lines
continue
# Get the bounding boxes for each individual text line (quad)
line_data = []
for idx, text in zip(indices, lines):
quad = ocr[idx][0]
# quad is [[x1,y1], [x2,y2], [x3,y3], [x4,y4]]
ys = [p[1] for p in quad]
min_y, max_y = min(ys), max(ys)
line_data.append({"idx": idx, "text": text, "min_y": min_y, "max_y": max_y, "quad": quad})
# Sort lines vertically
line_data.sort(key=lambda x: x["min_y"])
# Calculate average line height in this box
avg_height = sum(ld["max_y"] - ld["min_y"] for ld in line_data) / len(line_data)
gap_threshold = avg_height * gap_multiplier
# Find split points
clusters = []
current_cluster = [line_data[0]]
for i in range(1, len(line_data)):
prev_line = current_cluster[-1]
curr_line = line_data[i]
vertical_gap = curr_line["min_y"] - prev_line["max_y"]
if vertical_gap > gap_threshold:
# Gap is too large! Split here.
clusters.append(current_cluster)
current_cluster = [curr_line]
else:
current_cluster.append(curr_line)
clusters.append(current_cluster)
# Reassign to new boxes
if len(clusters) == 1:
# No split happened
new_boxes[bid] = box
new_indices[bid] = indices
new_quads[bid] = out_quads[bid]
new_lines[bid] = lines
else:
# Box was split! Create new entries
for cluster in clusters:
c_indices = [ld["idx"] for ld in cluster]
c_lines = [ld["text"] for ld in cluster]
c_quads = [ld["quad"] for ld in cluster]
# Calculate new bounding box for this cluster
all_xs = [p[0] for q in c_quads for p in q]
all_ys = [p[1] for q in c_quads for p in q]
c_box = (min(all_xs), min(all_ys), max(all_xs), max(all_ys))
new_boxes[next_bid] = c_box
new_indices[next_bid] = c_indices
new_quads[next_bid] = c_quads
new_lines[next_bid] = c_lines
next_bid += 1
return new_lines, new_boxes, new_quads, new_indices
def normalize_text(text: str) -> str:
t = (text or "").strip().upper()
t = t.replace("\u201c", "\"").replace("\u201d", "\"")
@@ -2506,6 +2686,18 @@ def process_manga_page(image_path: str,
print(f" Boxes after dedup: {len(out_boxes)}")
# ── Step 9.5: Split boxes with large vertical gaps ────────
out_lines, out_boxes, out_quads, out_indices = \
split_boxes_by_vertical_gap(out_boxes, out_indices, out_quads, out_lines, ocr, gap_multiplier=2.5)
print(f" Boxes after vertical gap split: {len(out_boxes)}")
# ── Step 9.6: Split boxes with large horizontal gaps ──────
out_lines, out_boxes, out_quads, out_indices = \
split_boxes_by_horizontal_gap(out_boxes, out_indices, out_quads, out_lines, ocr, gap_multiplier=1.5)
print(f" Boxes after horizontal gap split: {len(out_boxes)}")
# ── Step 10: Enforce max box size ─────────────────────────
out_lines, out_boxes, out_quads, out_indices = \
enforce_max_box_size(out_boxes, out_indices, out_quads, out_lines,