Added some stuff
This commit is contained in:
@@ -519,6 +519,186 @@ def build_region_flags(raw_text, corrected_text, region_type, conf):
|
|||||||
# ============================================================
|
# ============================================================
|
||||||
# HELPERS
|
# HELPERS
|
||||||
# ============================================================
|
# ============================================================
|
||||||
|
def split_boxes_by_horizontal_gap(out_boxes, out_indices, out_quads, out_lines, ocr, gap_multiplier=1.5):
|
||||||
|
"""
|
||||||
|
Splits a single bounding box into multiple boxes if there is a large horizontal
|
||||||
|
gap between columns of text (e.g., side-by-side speech bubbles).
|
||||||
|
"""
|
||||||
|
new_boxes = {}
|
||||||
|
new_indices = {}
|
||||||
|
new_quads = {}
|
||||||
|
new_lines = {}
|
||||||
|
|
||||||
|
next_bid = max(out_boxes.keys()) + 1 if out_boxes else 1
|
||||||
|
|
||||||
|
for bid, box in out_boxes.items():
|
||||||
|
indices = out_indices[bid]
|
||||||
|
lines = out_lines[bid]
|
||||||
|
|
||||||
|
if len(indices) < 2:
|
||||||
|
# Nothing to split
|
||||||
|
new_boxes[bid] = box
|
||||||
|
new_indices[bid] = indices
|
||||||
|
new_quads[bid] = out_quads[bid]
|
||||||
|
new_lines[bid] = lines
|
||||||
|
continue
|
||||||
|
|
||||||
|
# Extract bounding coordinates for each text line
|
||||||
|
line_data = []
|
||||||
|
for idx, text in zip(indices, lines):
|
||||||
|
quad = ocr[idx][0]
|
||||||
|
xs = [p[0] for p in quad]
|
||||||
|
ys = [p[1] for p in quad]
|
||||||
|
line_data.append({
|
||||||
|
"idx": idx, "text": text, "quad": quad,
|
||||||
|
"min_x": min(xs), "max_x": max(xs),
|
||||||
|
"min_y": min(ys), "max_y": max(ys)
|
||||||
|
})
|
||||||
|
|
||||||
|
# Calculate average line height to use as a dynamic gap threshold
|
||||||
|
avg_height = sum(ld["max_y"] - ld["min_y"] for ld in line_data) / len(line_data)
|
||||||
|
gap_threshold = avg_height * gap_multiplier
|
||||||
|
|
||||||
|
# Sort lines horizontally (left to right)
|
||||||
|
line_data.sort(key=lambda x: x["min_x"])
|
||||||
|
|
||||||
|
columns = []
|
||||||
|
current_column = [line_data[0]]
|
||||||
|
current_max_x = line_data[0]["max_x"]
|
||||||
|
|
||||||
|
# Group lines into columns based on horizontal proximity
|
||||||
|
for i in range(1, len(line_data)):
|
||||||
|
curr_line = line_data[i]
|
||||||
|
horizontal_gap = curr_line["min_x"] - current_max_x
|
||||||
|
|
||||||
|
if horizontal_gap > gap_threshold:
|
||||||
|
# Gap is too large! Split into a new column here.
|
||||||
|
columns.append(current_column)
|
||||||
|
current_column = [curr_line]
|
||||||
|
current_max_x = curr_line["max_x"]
|
||||||
|
else:
|
||||||
|
# Belongs to the same column
|
||||||
|
current_column.append(curr_line)
|
||||||
|
current_max_x = max(current_max_x, curr_line["max_x"])
|
||||||
|
|
||||||
|
columns.append(current_column)
|
||||||
|
|
||||||
|
# Reassign to new boxes
|
||||||
|
if len(columns) == 1:
|
||||||
|
# No split happened
|
||||||
|
new_boxes[bid] = box
|
||||||
|
new_indices[bid] = indices
|
||||||
|
new_quads[bid] = out_quads[bid]
|
||||||
|
new_lines[bid] = lines
|
||||||
|
else:
|
||||||
|
# Box was split horizontally! Create new entries
|
||||||
|
for col in columns:
|
||||||
|
# Re-sort the lines in this new box vertically (top to bottom)
|
||||||
|
col.sort(key=lambda x: x["min_y"])
|
||||||
|
|
||||||
|
c_indices = [ld["idx"] for ld in col]
|
||||||
|
c_lines = [ld["text"] for ld in col]
|
||||||
|
c_quads = [ld["quad"] for ld in col]
|
||||||
|
|
||||||
|
# Calculate new bounding box for this specific column
|
||||||
|
all_xs = [p[0] for q in c_quads for p in q]
|
||||||
|
all_ys = [p[1] for q in c_quads for p in q]
|
||||||
|
c_box = (min(all_xs), min(all_ys), max(all_xs), max(all_ys))
|
||||||
|
|
||||||
|
new_boxes[next_bid] = c_box
|
||||||
|
new_indices[next_bid] = c_indices
|
||||||
|
new_quads[next_bid] = c_quads
|
||||||
|
new_lines[next_bid] = c_lines
|
||||||
|
next_bid += 1
|
||||||
|
|
||||||
|
return new_lines, new_boxes, new_quads, new_indices
|
||||||
|
|
||||||
|
def split_boxes_by_vertical_gap(out_boxes, out_indices, out_quads, out_lines, ocr, gap_multiplier=2.5):
|
||||||
|
"""
|
||||||
|
Splits a single bounding box into multiple boxes if there is a large vertical
|
||||||
|
gap between the text lines.
|
||||||
|
"""
|
||||||
|
new_boxes = {}
|
||||||
|
new_indices = {}
|
||||||
|
new_quads = {}
|
||||||
|
new_lines = {}
|
||||||
|
|
||||||
|
next_bid = max(out_boxes.keys()) + 1 if out_boxes else 1
|
||||||
|
|
||||||
|
for bid, box in out_boxes.items():
|
||||||
|
indices = out_indices[bid]
|
||||||
|
lines = out_lines[bid]
|
||||||
|
|
||||||
|
if len(indices) < 2:
|
||||||
|
# Nothing to split
|
||||||
|
new_boxes[bid] = box
|
||||||
|
new_indices[bid] = indices
|
||||||
|
new_quads[bid] = out_quads[bid]
|
||||||
|
new_lines[bid] = lines
|
||||||
|
continue
|
||||||
|
|
||||||
|
# Get the bounding boxes for each individual text line (quad)
|
||||||
|
line_data = []
|
||||||
|
for idx, text in zip(indices, lines):
|
||||||
|
quad = ocr[idx][0]
|
||||||
|
# quad is [[x1,y1], [x2,y2], [x3,y3], [x4,y4]]
|
||||||
|
ys = [p[1] for p in quad]
|
||||||
|
min_y, max_y = min(ys), max(ys)
|
||||||
|
line_data.append({"idx": idx, "text": text, "min_y": min_y, "max_y": max_y, "quad": quad})
|
||||||
|
|
||||||
|
# Sort lines vertically
|
||||||
|
line_data.sort(key=lambda x: x["min_y"])
|
||||||
|
|
||||||
|
# Calculate average line height in this box
|
||||||
|
avg_height = sum(ld["max_y"] - ld["min_y"] for ld in line_data) / len(line_data)
|
||||||
|
gap_threshold = avg_height * gap_multiplier
|
||||||
|
|
||||||
|
# Find split points
|
||||||
|
clusters = []
|
||||||
|
current_cluster = [line_data[0]]
|
||||||
|
|
||||||
|
for i in range(1, len(line_data)):
|
||||||
|
prev_line = current_cluster[-1]
|
||||||
|
curr_line = line_data[i]
|
||||||
|
|
||||||
|
vertical_gap = curr_line["min_y"] - prev_line["max_y"]
|
||||||
|
|
||||||
|
if vertical_gap > gap_threshold:
|
||||||
|
# Gap is too large! Split here.
|
||||||
|
clusters.append(current_cluster)
|
||||||
|
current_cluster = [curr_line]
|
||||||
|
else:
|
||||||
|
current_cluster.append(curr_line)
|
||||||
|
|
||||||
|
clusters.append(current_cluster)
|
||||||
|
|
||||||
|
# Reassign to new boxes
|
||||||
|
if len(clusters) == 1:
|
||||||
|
# No split happened
|
||||||
|
new_boxes[bid] = box
|
||||||
|
new_indices[bid] = indices
|
||||||
|
new_quads[bid] = out_quads[bid]
|
||||||
|
new_lines[bid] = lines
|
||||||
|
else:
|
||||||
|
# Box was split! Create new entries
|
||||||
|
for cluster in clusters:
|
||||||
|
c_indices = [ld["idx"] for ld in cluster]
|
||||||
|
c_lines = [ld["text"] for ld in cluster]
|
||||||
|
c_quads = [ld["quad"] for ld in cluster]
|
||||||
|
|
||||||
|
# Calculate new bounding box for this cluster
|
||||||
|
all_xs = [p[0] for q in c_quads for p in q]
|
||||||
|
all_ys = [p[1] for q in c_quads for p in q]
|
||||||
|
c_box = (min(all_xs), min(all_ys), max(all_xs), max(all_ys))
|
||||||
|
|
||||||
|
new_boxes[next_bid] = c_box
|
||||||
|
new_indices[next_bid] = c_indices
|
||||||
|
new_quads[next_bid] = c_quads
|
||||||
|
new_lines[next_bid] = c_lines
|
||||||
|
next_bid += 1
|
||||||
|
|
||||||
|
return new_lines, new_boxes, new_quads, new_indices
|
||||||
|
|
||||||
def normalize_text(text: str) -> str:
|
def normalize_text(text: str) -> str:
|
||||||
t = (text or "").strip().upper()
|
t = (text or "").strip().upper()
|
||||||
t = t.replace("\u201c", "\"").replace("\u201d", "\"")
|
t = t.replace("\u201c", "\"").replace("\u201d", "\"")
|
||||||
@@ -2506,6 +2686,18 @@ def process_manga_page(image_path: str,
|
|||||||
|
|
||||||
print(f" Boxes after dedup: {len(out_boxes)}")
|
print(f" Boxes after dedup: {len(out_boxes)}")
|
||||||
|
|
||||||
|
# ── Step 9.5: Split boxes with large vertical gaps ────────
|
||||||
|
out_lines, out_boxes, out_quads, out_indices = \
|
||||||
|
split_boxes_by_vertical_gap(out_boxes, out_indices, out_quads, out_lines, ocr, gap_multiplier=2.5)
|
||||||
|
|
||||||
|
print(f" Boxes after vertical gap split: {len(out_boxes)}")
|
||||||
|
|
||||||
|
# ── Step 9.6: Split boxes with large horizontal gaps ──────
|
||||||
|
out_lines, out_boxes, out_quads, out_indices = \
|
||||||
|
split_boxes_by_horizontal_gap(out_boxes, out_indices, out_quads, out_lines, ocr, gap_multiplier=1.5)
|
||||||
|
|
||||||
|
print(f" Boxes after horizontal gap split: {len(out_boxes)}")
|
||||||
|
|
||||||
# ── Step 10: Enforce max box size ─────────────────────────
|
# ── Step 10: Enforce max box size ─────────────────────────
|
||||||
out_lines, out_boxes, out_quads, out_indices = \
|
out_lines, out_boxes, out_quads, out_indices = \
|
||||||
enforce_max_box_size(out_boxes, out_indices, out_quads, out_lines,
|
enforce_max_box_size(out_boxes, out_indices, out_quads, out_lines,
|
||||||
|
|||||||
Reference in New Issue
Block a user