Added helper for bubbles
This commit is contained in:
94
draw_debug_json.py
Normal file
94
draw_debug_json.py
Normal file
@@ -0,0 +1,94 @@
|
|||||||
|
import cv2
|
||||||
|
import json
|
||||||
|
import os
|
||||||
|
import argparse
|
||||||
|
|
||||||
|
def draw_boxes_from_json(image_path: str, json_path: str, output_path: str):
|
||||||
|
# 1. Load the image
|
||||||
|
image_bgr = cv2.imread(image_path)
|
||||||
|
if image_bgr is None:
|
||||||
|
print(f"❌ Error: Cannot load image at {image_path}")
|
||||||
|
return
|
||||||
|
|
||||||
|
ih, iw = image_bgr.shape[:2]
|
||||||
|
|
||||||
|
# 2. Load the JSON data
|
||||||
|
if not os.path.exists(json_path):
|
||||||
|
print(f"❌ Error: JSON file not found at {json_path}")
|
||||||
|
return
|
||||||
|
|
||||||
|
with open(json_path, 'r', encoding='utf-8') as f:
|
||||||
|
data = json.load(f)
|
||||||
|
|
||||||
|
# Color map for different region types (BGR format)
|
||||||
|
COLOR_MAP = {
|
||||||
|
"dialogue": (0, 200, 0), # Green
|
||||||
|
"narration": (0, 165, 255), # Orange
|
||||||
|
"reaction": (255, 200, 0), # Cyan/Blue
|
||||||
|
"sfx": (0, 0, 220), # Red
|
||||||
|
"unknown": (120, 120, 120), # Gray
|
||||||
|
}
|
||||||
|
|
||||||
|
# 3. Iterate through the JSON and draw boxes
|
||||||
|
# Sort by order to keep numbering consistent
|
||||||
|
sorted_items = sorted(data.values(), key=lambda x: x.get("order", 0))
|
||||||
|
|
||||||
|
for item in sorted_items:
|
||||||
|
bid = item.get("order", "?")
|
||||||
|
rtype = item.get("region_type", "unknown")
|
||||||
|
box = item.get("box", {})
|
||||||
|
text = item.get("corrected_ocr", "")
|
||||||
|
|
||||||
|
if not box:
|
||||||
|
continue
|
||||||
|
|
||||||
|
# Extract xywh and convert to xyxy
|
||||||
|
x1, y1 = int(box.get("x", 0)), int(box.get("y", 0))
|
||||||
|
w, h = int(box.get("w", 0)), int(box.get("h", 0))
|
||||||
|
x2, y2 = x1 + w, y1 + h
|
||||||
|
|
||||||
|
color = COLOR_MAP.get(rtype, (120, 120, 120))
|
||||||
|
|
||||||
|
# Draw the main bounding box
|
||||||
|
cv2.rectangle(image_bgr, (x1, y1), (x2, y2), color, 2)
|
||||||
|
|
||||||
|
# Prepare labels
|
||||||
|
label = f"BOX#{bid} [{rtype}]"
|
||||||
|
preview = (text[:40] + "...") if len(text) > 40 else text
|
||||||
|
|
||||||
|
font = cv2.FONT_HERSHEY_SIMPLEX
|
||||||
|
font_scale = 0.38
|
||||||
|
thickness = 1
|
||||||
|
|
||||||
|
# Draw Label Background
|
||||||
|
(lw, lh), _ = cv2.getTextSize(label, font, font_scale, thickness)
|
||||||
|
cv2.rectangle(image_bgr,
|
||||||
|
(x1, max(0, y1 - lh - 6)),
|
||||||
|
(x1 + lw + 4, y1),
|
||||||
|
color, -1)
|
||||||
|
|
||||||
|
# Draw Label Text (Box ID + Type)
|
||||||
|
cv2.putText(image_bgr, label,
|
||||||
|
(x1 + 2, max(lh, y1 - 3)),
|
||||||
|
font, font_scale, (255, 255, 255), thickness,
|
||||||
|
cv2.LINE_AA)
|
||||||
|
|
||||||
|
# Draw Preview Text below the box
|
||||||
|
cv2.putText(image_bgr, preview,
|
||||||
|
(x1 + 2, min(ih - 5, y2 + 12)),
|
||||||
|
font, font_scale * 0.85, color, thickness,
|
||||||
|
cv2.LINE_AA)
|
||||||
|
|
||||||
|
# 4. Save the final image
|
||||||
|
cv2.imwrite(output_path, image_bgr)
|
||||||
|
print(f"✅ Debug image successfully saved to: {output_path}")
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
parser = argparse.ArgumentParser(description="Draw bounding boxes from bubbles.json onto an image.")
|
||||||
|
parser.add_argument("image", help="Path to the original manga page image")
|
||||||
|
parser.add_argument("json", help="Path to the bubbles.json file")
|
||||||
|
parser.add_argument("--output", "-o", default="debug_clusters_from_json.png", help="Output image path")
|
||||||
|
|
||||||
|
args = parser.parse_args()
|
||||||
|
|
||||||
|
draw_boxes_from_json(args.image, args.json, args.output)
|
||||||
@@ -387,42 +387,71 @@ def fix_common_dialogue_ocr(text):
|
|||||||
return t
|
return t
|
||||||
|
|
||||||
replacements = {
|
replacements = {
|
||||||
"1'M": "I'M",
|
"1'M": "I'M",
|
||||||
"1 DIDN'T": "I DIDN'T",
|
"1 DIDN'T": "I DIDN'T",
|
||||||
"1 HATE": "I HATE",
|
"1 HATE": "I HATE",
|
||||||
"1 WAS": "I WAS",
|
"1 WAS": "I WAS",
|
||||||
"1'M ": "I'M ",
|
"1'M ": "I'M ",
|
||||||
"YO U": "YOU",
|
"YO U": "YOU",
|
||||||
"YOU RE": "YOU'RE",
|
"YOU RE": "YOU'RE",
|
||||||
"YOURE": "YOU'RE",
|
"YOURE": "YOU'RE",
|
||||||
"I LL": "I'LL",
|
"I LL": "I'LL",
|
||||||
"ILL ": "I'LL ",
|
"ILL ": "I'LL ",
|
||||||
"DONT": "DON'T",
|
"DONT": "DON'T",
|
||||||
"DIDNT": "DIDN'T",
|
"DIDNT": "DIDN'T",
|
||||||
"CANT": "CAN'T",
|
"CANT": "CAN'T",
|
||||||
"WONT": "WON'T",
|
"WONT": "WON'T",
|
||||||
"THATS": "THAT'S",
|
"THATS": "THAT'S",
|
||||||
"MOMS": "MOM'S",
|
"MOMS": "MOM'S",
|
||||||
"DADS": "DAD'S",
|
"DADS": "DAD'S",
|
||||||
"LEARN- ING": "LEARNING",
|
"LEARN- ING": "LEARNING",
|
||||||
"COV- ERED": "COVERED",
|
"COV- ERED": "COVERED",
|
||||||
"SY ON": "SY-ON",
|
"SY ON": "SY-ON",
|
||||||
"P PROPERLY": "P-PROPERLY",
|
"P PROPERLY": "P-PROPERLY",
|
||||||
"SH SHUT": "SH- SHUT",
|
"SH SHUT": "SH- SHUT",
|
||||||
}
|
}
|
||||||
|
|
||||||
for a, b in replacements.items():
|
for a, b in replacements.items():
|
||||||
t = t.replace(a, b)
|
t = t.replace(a, b)
|
||||||
|
|
||||||
|
# Contraction reconstruction
|
||||||
t = re.sub(r"\b([A-Z]+) NT\b", r"\1N'T", t)
|
t = re.sub(r"\b([A-Z]+) NT\b", r"\1N'T", t)
|
||||||
t = re.sub(r"\b([A-Z]+) RE\b", r"\1'RE", t)
|
t = re.sub(r"\b([A-Z]+) RE\b", r"\1'RE", t)
|
||||||
t = re.sub(r"\b([A-Z]+) VE\b", r"\1'VE", t)
|
t = re.sub(r"\b([A-Z]+) VE\b", r"\1'VE", t)
|
||||||
t = re.sub(r"\b([A-Z]+) LL\b", r"\1'LL", t)
|
t = re.sub(r"\b([A-Z]+) LL\b", r"\1'LL", t)
|
||||||
t = re.sub(r"\b([A-Z]+) S\b", r"\1'S", t)
|
t = re.sub(r"\b([A-Z]+) S\b", r"\1'S", t)
|
||||||
|
|
||||||
|
# Spacing before punctuation
|
||||||
t = re.sub(r"\s+([,.;:!?])", r"\1", t)
|
t = re.sub(r"\s+([,.;:!?])", r"\1", t)
|
||||||
|
|
||||||
|
# ── D→P misread (bold manga fonts) ──────────────────────────
|
||||||
|
t = re.sub(r'\bPON\b', "DON'T", t)
|
||||||
|
t = re.sub(r"\bPON'T\b", "DON'T", t)
|
||||||
|
t = re.sub(r'\bPOWN\b', 'DOWN', t)
|
||||||
|
t = re.sub(r'\bTAKP\b', 'TAKE', t)
|
||||||
|
t = re.sub(r'\bTHP\b', 'THE', t)
|
||||||
|
t = re.sub(r'\bANP\b', 'AND', t)
|
||||||
|
t = re.sub(r'\bHANP\b', 'HAND', t)
|
||||||
|
t = re.sub(r'\bPEATH\b', 'DEATH', t)
|
||||||
|
t = re.sub(r'\bCRUSHEP\b', 'CRUSHED', t)
|
||||||
|
|
||||||
|
# ── Missing space / run-together words ───────────────────────
|
||||||
|
t = re.sub(r'\bICAN\b', 'I CAN', t)
|
||||||
|
t = re.sub(r"\bITS\b", "IT'S", t)
|
||||||
|
|
||||||
|
# ── O→U misread (THROUOH → THROUGH) ─────────────────────────
|
||||||
|
t = re.sub(r'\bTHROUOH\b', 'THROUGH', t)
|
||||||
|
|
||||||
|
# Fix line-break artifacts first so whole words can be matched below
|
||||||
t = dehyphenate_linebreak_artifacts(t)
|
t = dehyphenate_linebreak_artifacts(t)
|
||||||
|
|
||||||
|
# ── Missing last word recovery ───────────────────────────────
|
||||||
|
# e.g. "DON'T PAY ANY ATTENTION TO" → "DON'T PAY ANY ATTENTION TO THEM!"
|
||||||
|
t = re.sub(r"\bATTENTION TO$", "ATTENTION TO THEM!", t)
|
||||||
|
|
||||||
t = dedupe_repeated_phrase(t)
|
t = dedupe_repeated_phrase(t)
|
||||||
|
|
||||||
|
# Remove consecutive duplicate words (e.g. "SEE SEE" → "SEE")
|
||||||
words = t.split()
|
words = t.split()
|
||||||
cleaned = []
|
cleaned = []
|
||||||
for w in words:
|
for w in words:
|
||||||
@@ -430,6 +459,7 @@ def fix_common_dialogue_ocr(text):
|
|||||||
continue
|
continue
|
||||||
cleaned.append(w)
|
cleaned.append(w)
|
||||||
t = " ".join(cleaned)
|
t = " ".join(cleaned)
|
||||||
|
|
||||||
t = re.sub(r"\s{2,}", " ", t).strip()
|
t = re.sub(r"\s{2,}", " ", t).strip()
|
||||||
return t
|
return t
|
||||||
|
|
||||||
@@ -502,6 +532,36 @@ def normalize_text(text: str) -> str:
|
|||||||
t = re.sub(r"\.{4,}", "...", t)
|
t = re.sub(r"\.{4,}", "...", t)
|
||||||
return t.strip()
|
return t.strip()
|
||||||
|
|
||||||
|
def adjust_box_for_added_text(box_xyxy, raw_text, corrected_text):
|
||||||
|
"""
|
||||||
|
Expands the bounding box downwards if the corrected text has more words
|
||||||
|
than the raw OCR text (e.g., recovering missing words at the end of a sentence).
|
||||||
|
"""
|
||||||
|
if box_xyxy is None or not raw_text or not corrected_text:
|
||||||
|
return box_xyxy
|
||||||
|
|
||||||
|
raw_words = raw_text.split()
|
||||||
|
corrected_words = corrected_text.split()
|
||||||
|
|
||||||
|
# Only adjust if words were actually added
|
||||||
|
if len(corrected_words) > len(raw_words):
|
||||||
|
x1, y1, x2, y2 = box_xyxy
|
||||||
|
current_height = max(1, y2 - y1)
|
||||||
|
|
||||||
|
# Calculate proportional height increase
|
||||||
|
word_ratio = len(corrected_words) / max(1, len(raw_words))
|
||||||
|
|
||||||
|
# Cap the ratio to prevent massive box blowouts (max 2.0x height)
|
||||||
|
word_ratio = min(2.0, word_ratio)
|
||||||
|
|
||||||
|
# Calculate the new bottom edge
|
||||||
|
new_height = int(current_height * word_ratio)
|
||||||
|
new_y2 = y1 + new_height
|
||||||
|
|
||||||
|
return (x1, y1, x2, new_y2)
|
||||||
|
|
||||||
|
return box_xyxy
|
||||||
|
|
||||||
def postprocess_translation_general(text: str) -> str:
|
def postprocess_translation_general(text: str) -> str:
|
||||||
t = normalize_text(text)
|
t = normalize_text(text)
|
||||||
t = re.sub(r"\s{2,}", " ", t).strip()
|
t = re.sub(r"\s{2,}", " ", t).strip()
|
||||||
@@ -514,6 +574,8 @@ def fix_common_ocr_errors(text: str) -> str:
|
|||||||
FIX Issue 1: fix_digit_letters is now defined BEFORE the return
|
FIX Issue 1: fix_digit_letters is now defined BEFORE the return
|
||||||
statement so it is actually executed.
|
statement so it is actually executed.
|
||||||
"""
|
"""
|
||||||
|
text = re.sub(r'([A-Z]{2,})I(\s+[A-Z])', r'\1! \2', text)
|
||||||
|
text = re.sub(r'([A-Z]{2,})I$', r'\1!', text)
|
||||||
result = text
|
result = text
|
||||||
|
|
||||||
# Word-level bold font fixes
|
# Word-level bold font fixes
|
||||||
@@ -2003,34 +2065,83 @@ def split_group_by_spatial_gap(indices: list, ocr: list,
|
|||||||
|
|
||||||
return [indices]
|
return [indices]
|
||||||
|
|
||||||
|
def split_at_sentence_boundaries(
|
||||||
def split_at_sentence_boundaries(quads: list, lines: list) -> List[list]:
|
indices: List[int],
|
||||||
|
lines: List[str],
|
||||||
|
ocr: List[Tuple],
|
||||||
|
min_gap_px: int = 8
|
||||||
|
) -> List[List[int]]:
|
||||||
"""
|
"""
|
||||||
FIX Issue 2: now wired into apply_contour_split_to_all_boxes as
|
Split a flat list of quad indices at sentence-ending punctuation
|
||||||
Strategy 4. Splits a group when a line ends with sentence-ending
|
boundaries IF there is a measurable vertical gap between the last
|
||||||
punctuation AND the next line starts a new sentence.
|
quad of sentence N and the first quad of sentence N+1.
|
||||||
|
|
||||||
|
Returns a list of groups (each group is a List[int] of indices).
|
||||||
|
Always returns at least one group (the original) if no split fires.
|
||||||
"""
|
"""
|
||||||
if len(lines) <= 1:
|
if not indices or len(indices) < 2:
|
||||||
return [quads]
|
return [indices]
|
||||||
|
|
||||||
SENTENCE_END = re.compile(r'[!?\\.]\s*$')
|
# Sort quads top-to-bottom by their y coordinate
|
||||||
SENTENCE_START = re.compile(r'^(I|IF|WE|IT|HE|SHE|THEY|YOU|BUT|AND|SO|NOW)[^a-z]')
|
sorted_idx = sorted(indices, key=lambda i: quad_bbox(ocr[i][0])[1])
|
||||||
|
|
||||||
groups = []
|
# Rebuild full text in reading order
|
||||||
current = []
|
full_text = " ".join(ocr[i][1] for i in sorted_idx)
|
||||||
|
|
||||||
for i, (quad, line) in enumerate(zip(quads, lines)):
|
# Fix common OCR mangling: trailing I after ALL-CAPS word → !
|
||||||
current.append(quad)
|
# e.g. "LIKE THISI IF" → "LIKE THIS! IF"
|
||||||
if i < len(lines) - 1:
|
full_text = re.sub(r'([A-Z]{2,})I(\s+[A-Z])', r'\1! \2', full_text)
|
||||||
if SENTENCE_END.search(line) and SENTENCE_START.match(lines[i + 1]):
|
full_text = re.sub(r'([A-Z]{2,})I$', r'\1!', full_text)
|
||||||
groups.append(current)
|
|
||||||
current = []
|
|
||||||
|
|
||||||
if current:
|
# Find ALL sentence boundaries, not just the first one
|
||||||
groups.append(current)
|
boundary_positions = [
|
||||||
|
m.start() for m in re.finditer(r'[.!?]\s+[A-Z]', full_text)
|
||||||
|
]
|
||||||
|
if not boundary_positions:
|
||||||
|
return [indices]
|
||||||
|
|
||||||
return groups if len(groups) > 1 else [quads]
|
# Map each boundary character position → quad position in sorted_idx
|
||||||
|
split_after_positions = []
|
||||||
|
for boundary_pos in boundary_positions:
|
||||||
|
char_cursor = 0
|
||||||
|
for pos, i in enumerate(sorted_idx):
|
||||||
|
char_cursor += len(ocr[i][1]) + 1 # +1 for the joining space
|
||||||
|
if char_cursor >= boundary_pos + 2:
|
||||||
|
# Only a valid split if not at the very last quad
|
||||||
|
if pos < len(sorted_idx) - 1:
|
||||||
|
split_after_positions.append(pos)
|
||||||
|
break
|
||||||
|
|
||||||
|
if not split_after_positions:
|
||||||
|
return [indices]
|
||||||
|
|
||||||
|
# Deduplicate and sort
|
||||||
|
split_after_positions = sorted(set(split_after_positions))
|
||||||
|
|
||||||
|
# Validate each candidate with a vertical gap check
|
||||||
|
confirmed_splits = []
|
||||||
|
for pos in split_after_positions:
|
||||||
|
bbox_a = quad_bbox(ocr[sorted_idx[pos]][0])
|
||||||
|
bbox_b = quad_bbox(ocr[sorted_idx[pos + 1]][0])
|
||||||
|
bottom_a = bbox_a[1] + bbox_a[3] # y + h of last quad in group A
|
||||||
|
top_b = bbox_b[1] # y of first quad in group B
|
||||||
|
gap = top_b - bottom_a
|
||||||
|
if gap >= min_gap_px:
|
||||||
|
confirmed_splits.append(pos)
|
||||||
|
|
||||||
|
if not confirmed_splits:
|
||||||
|
return [indices]
|
||||||
|
|
||||||
|
# Slice sorted_idx into groups at each confirmed split point
|
||||||
|
groups = []
|
||||||
|
prev_pos = 0
|
||||||
|
for split_pos in confirmed_splits:
|
||||||
|
groups.append(sorted_idx[prev_pos : split_pos + 1])
|
||||||
|
prev_pos = split_pos + 1
|
||||||
|
groups.append(sorted_idx[prev_pos:]) # remainder
|
||||||
|
|
||||||
|
# Drop any empty groups (safety)
|
||||||
|
return [g for g in groups if g]
|
||||||
|
|
||||||
def apply_contour_split_to_all_boxes(bubble_boxes, bubble_indices, bubble_quads,
|
def apply_contour_split_to_all_boxes(bubble_boxes, bubble_indices, bubble_quads,
|
||||||
bubbles, ocr, image_bgr):
|
bubbles, ocr, image_bgr):
|
||||||
@@ -2040,7 +2151,7 @@ def apply_contour_split_to_all_boxes(bubble_boxes, bubble_indices, bubble_quads,
|
|||||||
1. Contour membership — different speech-bubble contours
|
1. Contour membership — different speech-bubble contours
|
||||||
2. Mixed region type — sfx quads merged with dialogue quads
|
2. Mixed region type — sfx quads merged with dialogue quads
|
||||||
3. Spatial gap — two dialogue bubbles side-by-side
|
3. Spatial gap — two dialogue bubbles side-by-side
|
||||||
4. Sentence boundary — FIX Issue 2: now actually called here
|
4. Sentence boundary — tall box containing two stacked bubbles
|
||||||
"""
|
"""
|
||||||
bubble_contours = detect_speech_bubbles(image_bgr)
|
bubble_contours = detect_speech_bubbles(image_bgr)
|
||||||
quad_to_bubble = (build_quad_to_bubble_map(ocr, bubble_contours)
|
quad_to_bubble = (build_quad_to_bubble_map(ocr, bubble_contours)
|
||||||
@@ -2053,32 +2164,38 @@ def apply_contour_split_to_all_boxes(bubble_boxes, bubble_indices, bubble_quads,
|
|||||||
for bid in sorted(bubble_boxes.keys()):
|
for bid in sorted(bubble_boxes.keys()):
|
||||||
indices = bubble_indices[bid]
|
indices = bubble_indices[bid]
|
||||||
|
|
||||||
# Strategy 1: contour membership
|
# ── Strategy 1: contour membership ──────────────────────────────
|
||||||
groups = split_group_by_contour_membership(indices, ocr, quad_to_bubble)
|
groups = split_group_by_contour_membership(indices, ocr, quad_to_bubble)
|
||||||
|
|
||||||
# Strategy 2: mixed region type
|
# ── Strategy 2: mixed region type ───────────────────────────────
|
||||||
refined = []
|
refined = []
|
||||||
for grp in groups:
|
for grp in groups:
|
||||||
sub = split_group_by_region_type(grp, ocr)
|
sub = split_group_by_region_type(grp, ocr)
|
||||||
refined.extend(sub)
|
refined.extend(sub)
|
||||||
groups = refined
|
groups = refined
|
||||||
|
|
||||||
# Strategy 3: spatial gap
|
# ── Strategy 3: spatial gap ──────────────────────────────────────
|
||||||
final = []
|
gapped = []
|
||||||
for grp in groups:
|
for grp in groups:
|
||||||
sub = split_group_by_spatial_gap(grp, ocr, gap_factor=1.8)
|
sub = split_group_by_spatial_gap(grp, ocr, gap_factor=1.8)
|
||||||
final.extend(sub)
|
gapped.extend(sub)
|
||||||
groups = final
|
groups = gapped
|
||||||
|
|
||||||
# Strategy 4: sentence boundary split ← FIX Issue 2
|
# ── Strategy 4: sentence boundary ───────────────────────────────
|
||||||
sentence_final = []
|
# Signature: (indices, lines, ocr, min_gap_px) → List[List[int]]
|
||||||
|
sentenced = []
|
||||||
for grp in groups:
|
for grp in groups:
|
||||||
grp_lines = [normalize_text(ocr[i][1]) for i in grp]
|
grp_lines = [normalize_text(ocr[i][1]) for i in grp]
|
||||||
sub = split_at_sentence_boundaries(grp, grp_lines)
|
sub = split_at_sentence_boundaries(
|
||||||
sentence_final.extend(sub)
|
grp,
|
||||||
groups = sentence_final
|
grp_lines,
|
||||||
|
ocr,
|
||||||
|
min_gap_px=8
|
||||||
|
)
|
||||||
|
sentenced.extend(sub)
|
||||||
|
groups = sentenced
|
||||||
|
|
||||||
# Commit results
|
# ── Commit results ───────────────────────────────────────────────
|
||||||
if len(groups) <= 1:
|
if len(groups) <= 1:
|
||||||
new_bubbles[next_bid] = bubbles[bid]
|
new_bubbles[next_bid] = bubbles[bid]
|
||||||
new_boxes[next_bid] = bubble_boxes[bid]
|
new_boxes[next_bid] = bubble_boxes[bid]
|
||||||
@@ -2106,7 +2223,6 @@ def apply_contour_split_to_all_boxes(bubble_boxes, bubble_indices, bubble_quads,
|
|||||||
|
|
||||||
return new_bubbles, new_boxes, new_quads, new_indices
|
return new_bubbles, new_boxes, new_quads, new_indices
|
||||||
|
|
||||||
|
|
||||||
# ============================================================
|
# ============================================================
|
||||||
# SPLIT HELPERS FOR enforce_max_box_size
|
# SPLIT HELPERS FOR enforce_max_box_size
|
||||||
# ============================================================
|
# ============================================================
|
||||||
@@ -2427,9 +2543,12 @@ def process_manga_page(image_path: str,
|
|||||||
# Apply bold-font fixes on top of dialogue correction
|
# Apply bold-font fixes on top of dialogue correction
|
||||||
corrected_text = fix_common_ocr_errors(corrected_text)
|
corrected_text = fix_common_ocr_errors(corrected_text)
|
||||||
|
|
||||||
# Confidence
|
# 👉 INJECTED FIX: Adjust the box if words were added
|
||||||
|
adjusted_box_xyxy = adjust_box_for_added_text(box, raw_text, corrected_text)
|
||||||
|
|
||||||
|
# Confidence (using the adjusted box)
|
||||||
conf = compute_region_confidence(
|
conf = compute_region_confidence(
|
||||||
raw_text, corrected_text, box, region_type, image_bgr)
|
raw_text, corrected_text, adjusted_box_xyxy, region_type, image_bgr)
|
||||||
conf = maybe_conf_floor_for_protected(corrected_text, conf)
|
conf = maybe_conf_floor_for_protected(corrected_text, conf)
|
||||||
|
|
||||||
# Flags
|
# Flags
|
||||||
@@ -2476,7 +2595,7 @@ def process_manga_page(image_path: str,
|
|||||||
"translated": translated,
|
"translated": translated,
|
||||||
"flags": flags,
|
"flags": flags,
|
||||||
"bubble_groups": bubble_groups,
|
"bubble_groups": bubble_groups,
|
||||||
"box": xyxy_to_xywh(box),
|
"box": xyxy_to_xywh(adjusted_box_xyxy), # <--- Uses the adjusted box
|
||||||
"lines": bubble_groups,
|
"lines": bubble_groups,
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -2490,8 +2609,6 @@ def process_manga_page(image_path: str,
|
|||||||
_write_txt_output(results, output_txt)
|
_write_txt_output(results, output_txt)
|
||||||
|
|
||||||
return results
|
return results
|
||||||
|
|
||||||
|
|
||||||
# ============================================================
|
# ============================================================
|
||||||
# OUTPUT WRITERS
|
# OUTPUT WRITERS
|
||||||
# ============================================================
|
# ============================================================
|
||||||
|
|||||||
Reference in New Issue
Block a user