Added some stuff
This commit is contained in:
@@ -519,6 +519,186 @@ def build_region_flags(raw_text, corrected_text, region_type, conf):
|
||||
# ============================================================
|
||||
# HELPERS
|
||||
# ============================================================
|
||||
def split_boxes_by_horizontal_gap(out_boxes, out_indices, out_quads, out_lines, ocr, gap_multiplier=1.5):
|
||||
"""
|
||||
Splits a single bounding box into multiple boxes if there is a large horizontal
|
||||
gap between columns of text (e.g., side-by-side speech bubbles).
|
||||
"""
|
||||
new_boxes = {}
|
||||
new_indices = {}
|
||||
new_quads = {}
|
||||
new_lines = {}
|
||||
|
||||
next_bid = max(out_boxes.keys()) + 1 if out_boxes else 1
|
||||
|
||||
for bid, box in out_boxes.items():
|
||||
indices = out_indices[bid]
|
||||
lines = out_lines[bid]
|
||||
|
||||
if len(indices) < 2:
|
||||
# Nothing to split
|
||||
new_boxes[bid] = box
|
||||
new_indices[bid] = indices
|
||||
new_quads[bid] = out_quads[bid]
|
||||
new_lines[bid] = lines
|
||||
continue
|
||||
|
||||
# Extract bounding coordinates for each text line
|
||||
line_data = []
|
||||
for idx, text in zip(indices, lines):
|
||||
quad = ocr[idx][0]
|
||||
xs = [p[0] for p in quad]
|
||||
ys = [p[1] for p in quad]
|
||||
line_data.append({
|
||||
"idx": idx, "text": text, "quad": quad,
|
||||
"min_x": min(xs), "max_x": max(xs),
|
||||
"min_y": min(ys), "max_y": max(ys)
|
||||
})
|
||||
|
||||
# Calculate average line height to use as a dynamic gap threshold
|
||||
avg_height = sum(ld["max_y"] - ld["min_y"] for ld in line_data) / len(line_data)
|
||||
gap_threshold = avg_height * gap_multiplier
|
||||
|
||||
# Sort lines horizontally (left to right)
|
||||
line_data.sort(key=lambda x: x["min_x"])
|
||||
|
||||
columns = []
|
||||
current_column = [line_data[0]]
|
||||
current_max_x = line_data[0]["max_x"]
|
||||
|
||||
# Group lines into columns based on horizontal proximity
|
||||
for i in range(1, len(line_data)):
|
||||
curr_line = line_data[i]
|
||||
horizontal_gap = curr_line["min_x"] - current_max_x
|
||||
|
||||
if horizontal_gap > gap_threshold:
|
||||
# Gap is too large! Split into a new column here.
|
||||
columns.append(current_column)
|
||||
current_column = [curr_line]
|
||||
current_max_x = curr_line["max_x"]
|
||||
else:
|
||||
# Belongs to the same column
|
||||
current_column.append(curr_line)
|
||||
current_max_x = max(current_max_x, curr_line["max_x"])
|
||||
|
||||
columns.append(current_column)
|
||||
|
||||
# Reassign to new boxes
|
||||
if len(columns) == 1:
|
||||
# No split happened
|
||||
new_boxes[bid] = box
|
||||
new_indices[bid] = indices
|
||||
new_quads[bid] = out_quads[bid]
|
||||
new_lines[bid] = lines
|
||||
else:
|
||||
# Box was split horizontally! Create new entries
|
||||
for col in columns:
|
||||
# Re-sort the lines in this new box vertically (top to bottom)
|
||||
col.sort(key=lambda x: x["min_y"])
|
||||
|
||||
c_indices = [ld["idx"] for ld in col]
|
||||
c_lines = [ld["text"] for ld in col]
|
||||
c_quads = [ld["quad"] for ld in col]
|
||||
|
||||
# Calculate new bounding box for this specific column
|
||||
all_xs = [p[0] for q in c_quads for p in q]
|
||||
all_ys = [p[1] for q in c_quads for p in q]
|
||||
c_box = (min(all_xs), min(all_ys), max(all_xs), max(all_ys))
|
||||
|
||||
new_boxes[next_bid] = c_box
|
||||
new_indices[next_bid] = c_indices
|
||||
new_quads[next_bid] = c_quads
|
||||
new_lines[next_bid] = c_lines
|
||||
next_bid += 1
|
||||
|
||||
return new_lines, new_boxes, new_quads, new_indices
|
||||
|
||||
def split_boxes_by_vertical_gap(out_boxes, out_indices, out_quads, out_lines, ocr, gap_multiplier=2.5):
|
||||
"""
|
||||
Splits a single bounding box into multiple boxes if there is a large vertical
|
||||
gap between the text lines.
|
||||
"""
|
||||
new_boxes = {}
|
||||
new_indices = {}
|
||||
new_quads = {}
|
||||
new_lines = {}
|
||||
|
||||
next_bid = max(out_boxes.keys()) + 1 if out_boxes else 1
|
||||
|
||||
for bid, box in out_boxes.items():
|
||||
indices = out_indices[bid]
|
||||
lines = out_lines[bid]
|
||||
|
||||
if len(indices) < 2:
|
||||
# Nothing to split
|
||||
new_boxes[bid] = box
|
||||
new_indices[bid] = indices
|
||||
new_quads[bid] = out_quads[bid]
|
||||
new_lines[bid] = lines
|
||||
continue
|
||||
|
||||
# Get the bounding boxes for each individual text line (quad)
|
||||
line_data = []
|
||||
for idx, text in zip(indices, lines):
|
||||
quad = ocr[idx][0]
|
||||
# quad is [[x1,y1], [x2,y2], [x3,y3], [x4,y4]]
|
||||
ys = [p[1] for p in quad]
|
||||
min_y, max_y = min(ys), max(ys)
|
||||
line_data.append({"idx": idx, "text": text, "min_y": min_y, "max_y": max_y, "quad": quad})
|
||||
|
||||
# Sort lines vertically
|
||||
line_data.sort(key=lambda x: x["min_y"])
|
||||
|
||||
# Calculate average line height in this box
|
||||
avg_height = sum(ld["max_y"] - ld["min_y"] for ld in line_data) / len(line_data)
|
||||
gap_threshold = avg_height * gap_multiplier
|
||||
|
||||
# Find split points
|
||||
clusters = []
|
||||
current_cluster = [line_data[0]]
|
||||
|
||||
for i in range(1, len(line_data)):
|
||||
prev_line = current_cluster[-1]
|
||||
curr_line = line_data[i]
|
||||
|
||||
vertical_gap = curr_line["min_y"] - prev_line["max_y"]
|
||||
|
||||
if vertical_gap > gap_threshold:
|
||||
# Gap is too large! Split here.
|
||||
clusters.append(current_cluster)
|
||||
current_cluster = [curr_line]
|
||||
else:
|
||||
current_cluster.append(curr_line)
|
||||
|
||||
clusters.append(current_cluster)
|
||||
|
||||
# Reassign to new boxes
|
||||
if len(clusters) == 1:
|
||||
# No split happened
|
||||
new_boxes[bid] = box
|
||||
new_indices[bid] = indices
|
||||
new_quads[bid] = out_quads[bid]
|
||||
new_lines[bid] = lines
|
||||
else:
|
||||
# Box was split! Create new entries
|
||||
for cluster in clusters:
|
||||
c_indices = [ld["idx"] for ld in cluster]
|
||||
c_lines = [ld["text"] for ld in cluster]
|
||||
c_quads = [ld["quad"] for ld in cluster]
|
||||
|
||||
# Calculate new bounding box for this cluster
|
||||
all_xs = [p[0] for q in c_quads for p in q]
|
||||
all_ys = [p[1] for q in c_quads for p in q]
|
||||
c_box = (min(all_xs), min(all_ys), max(all_xs), max(all_ys))
|
||||
|
||||
new_boxes[next_bid] = c_box
|
||||
new_indices[next_bid] = c_indices
|
||||
new_quads[next_bid] = c_quads
|
||||
new_lines[next_bid] = c_lines
|
||||
next_bid += 1
|
||||
|
||||
return new_lines, new_boxes, new_quads, new_indices
|
||||
|
||||
def normalize_text(text: str) -> str:
|
||||
t = (text or "").strip().upper()
|
||||
t = t.replace("\u201c", "\"").replace("\u201d", "\"")
|
||||
@@ -2506,6 +2686,18 @@ def process_manga_page(image_path: str,
|
||||
|
||||
print(f" Boxes after dedup: {len(out_boxes)}")
|
||||
|
||||
# ── Step 9.5: Split boxes with large vertical gaps ────────
|
||||
out_lines, out_boxes, out_quads, out_indices = \
|
||||
split_boxes_by_vertical_gap(out_boxes, out_indices, out_quads, out_lines, ocr, gap_multiplier=2.5)
|
||||
|
||||
print(f" Boxes after vertical gap split: {len(out_boxes)}")
|
||||
|
||||
# ── Step 9.6: Split boxes with large horizontal gaps ──────
|
||||
out_lines, out_boxes, out_quads, out_indices = \
|
||||
split_boxes_by_horizontal_gap(out_boxes, out_indices, out_quads, out_lines, ocr, gap_multiplier=1.5)
|
||||
|
||||
print(f" Boxes after horizontal gap split: {len(out_boxes)}")
|
||||
|
||||
# ── Step 10: Enforce max box size ─────────────────────────
|
||||
out_lines, out_boxes, out_quads, out_indices = \
|
||||
enforce_max_box_size(out_boxes, out_indices, out_quads, out_lines,
|
||||
|
||||
Reference in New Issue
Block a user