Added new styles
This commit is contained in:
@@ -104,11 +104,9 @@ def looks_like_box_tag(t: str) -> bool:
|
|||||||
def is_noise_text(text: str) -> bool:
|
def is_noise_text(text: str) -> bool:
|
||||||
t = (text or "").strip()
|
t = (text or "").strip()
|
||||||
|
|
||||||
# ALLOW pure punctuation clusters like "...", "!!", "?!"
|
|
||||||
if re.fullmatch(r"[\?\!\.]+", t):
|
if re.fullmatch(r"[\?\!\.]+", t):
|
||||||
return False
|
return False
|
||||||
|
|
||||||
# ALLOW single alphabetical characters (crucial for vertical text)
|
|
||||||
if len(t) == 1 and t.isalpha():
|
if len(t) == 1 and t.isalpha():
|
||||||
return False
|
return False
|
||||||
|
|
||||||
@@ -117,7 +115,6 @@ def is_noise_text(text: str) -> bool:
|
|||||||
if looks_like_box_tag(t):
|
if looks_like_box_tag(t):
|
||||||
return True
|
return True
|
||||||
|
|
||||||
# Relaxed the length check to allow 1-2 letter words and punctuation
|
|
||||||
if len(t) <= 2 and not re.search(r"[A-Z0-9\?\!\.]", t) and not t.isalpha():
|
if len(t) <= 2 and not re.search(r"[A-Z0-9\?\!\.]", t) and not t.isalpha():
|
||||||
return True
|
return True
|
||||||
|
|
||||||
@@ -199,12 +196,102 @@ def ocr_candidate_score(text: str) -> float:
|
|||||||
# ============================================================
|
# ============================================================
|
||||||
# SPLITTERS
|
# SPLITTERS
|
||||||
# ============================================================
|
# ============================================================
|
||||||
|
def split_wide_ocr_items(image_bgr, filtered_ocr):
|
||||||
|
"""
|
||||||
|
Detects if Apple Vision incorrectly merged two columns into a single wide line.
|
||||||
|
It measures the width of the white gaps and only splits if the gap is
|
||||||
|
significantly wider than a normal space between words.
|
||||||
|
"""
|
||||||
|
new_filtered = []
|
||||||
|
splits_made = 0
|
||||||
|
|
||||||
|
for item in filtered_ocr:
|
||||||
|
quad, text, conf = item
|
||||||
|
x1, y1, x2, y2 = quad_bbox(quad)
|
||||||
|
w = x2 - x1
|
||||||
|
h = max(1, y2 - y1)
|
||||||
|
|
||||||
|
# Check if it's abnormally wide
|
||||||
|
if w > h * 2.5 and len(text) > 5 and ' ' in text:
|
||||||
|
pad = 2
|
||||||
|
roi_y1 = max(0, y1 - pad)
|
||||||
|
roi_y2 = min(image_bgr.shape[0], y2 + pad)
|
||||||
|
roi_x1 = max(0, x1)
|
||||||
|
roi_x2 = min(image_bgr.shape[1], x2)
|
||||||
|
|
||||||
|
roi = image_bgr[roi_y1:roi_y2, roi_x1:roi_x2]
|
||||||
|
if roi.size > 0:
|
||||||
|
gray = cv2.cvtColor(roi, cv2.COLOR_BGR2GRAY)
|
||||||
|
_, thresh = cv2.threshold(gray, 0, 255, cv2.THRESH_BINARY_INV + cv2.THRESH_OTSU)
|
||||||
|
proj = np.sum(thresh, axis=0)
|
||||||
|
|
||||||
|
start_x = int(w * 0.20)
|
||||||
|
end_x = int(w * 0.80)
|
||||||
|
|
||||||
|
if start_x < end_x:
|
||||||
|
# Calculate expected character width
|
||||||
|
char_w = w / max(1, len(text))
|
||||||
|
# A real column gap should be at least 2.5 chars wide or 75% of line height
|
||||||
|
min_gap_width = max(int(char_w * 2.5), int(h * 0.75))
|
||||||
|
|
||||||
|
gap_threshold = h * 255 * 0.15
|
||||||
|
gap_mask = proj < gap_threshold
|
||||||
|
|
||||||
|
# Find the widest continuous gap
|
||||||
|
best_gap_start = -1
|
||||||
|
best_gap_len = 0
|
||||||
|
current_gap_start = -1
|
||||||
|
current_gap_len = 0
|
||||||
|
|
||||||
|
for x_rel in range(start_x, end_x):
|
||||||
|
if gap_mask[x_rel]:
|
||||||
|
if current_gap_len == 0:
|
||||||
|
current_gap_start = x_rel
|
||||||
|
current_gap_len += 1
|
||||||
|
else:
|
||||||
|
if current_gap_len > best_gap_len:
|
||||||
|
best_gap_len = current_gap_len
|
||||||
|
best_gap_start = current_gap_start
|
||||||
|
current_gap_len = 0
|
||||||
|
|
||||||
|
if current_gap_len > best_gap_len:
|
||||||
|
best_gap_len = current_gap_len
|
||||||
|
best_gap_start = current_gap_start
|
||||||
|
|
||||||
|
# ONLY split if the gap is wide enough to be a gutter between bubbles
|
||||||
|
if best_gap_len >= min_gap_width:
|
||||||
|
split_x = roi_x1 + best_gap_start + (best_gap_len // 2)
|
||||||
|
|
||||||
|
split_idx = int((split_x - x1) / char_w)
|
||||||
|
|
||||||
|
spaces = [i for i, c in enumerate(text) if c == ' ']
|
||||||
|
if spaces:
|
||||||
|
best_space = min(spaces, key=lambda i: abs(i - split_idx))
|
||||||
|
if abs(best_space - split_idx) < len(text) * 0.35:
|
||||||
|
split_idx = best_space
|
||||||
|
|
||||||
|
text_left = text[:split_idx].strip()
|
||||||
|
text_right = text[split_idx:].strip()
|
||||||
|
|
||||||
|
if text_left and text_right:
|
||||||
|
quad_left = [[x1, y1], [split_x, y1], [split_x, y2], [x1, y2]]
|
||||||
|
quad_right = [[split_x, y1], [x2, y1], [x2, y2], [split_x, y2]]
|
||||||
|
new_filtered.append((quad_left, text_left, conf))
|
||||||
|
new_filtered.append((quad_right, text_right, conf))
|
||||||
|
splits_made += 1
|
||||||
|
continue
|
||||||
|
|
||||||
|
# If no split was made, keep the original item
|
||||||
|
new_filtered.append(item)
|
||||||
|
|
||||||
|
return new_filtered, splits_made
|
||||||
|
|
||||||
def split_panel_box(image_bgr, bbox_xyxy, bubble_quads=None):
|
def split_panel_box(image_bgr, bbox_xyxy, bubble_quads=None):
|
||||||
x1, y1, x2, y2 = bbox_xyxy
|
x1, y1, x2, y2 = bbox_xyxy
|
||||||
w = x2 - x1
|
w = x2 - x1
|
||||||
h = y2 - y1
|
h = y2 - y1
|
||||||
|
|
||||||
if bubble_quads is not None and len(bubble_quads) < 10:
|
if bubble_quads is not None and len(bubble_quads) < 4:
|
||||||
return None
|
return None
|
||||||
|
|
||||||
if w < 50 or h < 50:
|
if w < 50 or h < 50:
|
||||||
@@ -345,10 +432,8 @@ def split_bubble_if_multiple_rows(indices, ocr, bid=None):
|
|||||||
# ============================================================
|
# ============================================================
|
||||||
class MacVisionDetector:
|
class MacVisionDetector:
|
||||||
def __init__(self, source_lang="en"):
|
def __init__(self, source_lang="en"):
|
||||||
# 1. Normalize the input language string
|
|
||||||
lang_key = source_lang.lower().strip()
|
lang_key = source_lang.lower().strip()
|
||||||
|
|
||||||
# 2. Comprehensive mapping to Apple Vision BCP-47 language codes
|
|
||||||
lang_map = {
|
lang_map = {
|
||||||
"en": "en-US", "english": "en-US",
|
"en": "en-US", "english": "en-US",
|
||||||
"es": "es-ES", "spanish": "es-ES",
|
"es": "es-ES", "spanish": "es-ES",
|
||||||
@@ -358,10 +443,9 @@ class MacVisionDetector:
|
|||||||
"it": "it-IT", "italian": "it-IT",
|
"it": "it-IT", "italian": "it-IT",
|
||||||
"de": "de-DE", "german": "de-DE",
|
"de": "de-DE", "german": "de-DE",
|
||||||
"ko": "ko-KR", "korean": "ko-KR",
|
"ko": "ko-KR", "korean": "ko-KR",
|
||||||
"zh": "ko-KR", "chinese": "zh-Hans" # Simplified Chinese
|
"zh": "zh-Hans", "chinese": "zh-Hans"
|
||||||
}
|
}
|
||||||
|
|
||||||
# 3. Resolve the language code
|
|
||||||
apple_lang = lang_map.get(lang_key, "en-US")
|
apple_lang = lang_map.get(lang_key, "en-US")
|
||||||
self.langs = [apple_lang]
|
self.langs = [apple_lang]
|
||||||
print(f"⚡ Using Apple Vision OCR (Language: {self.langs[0]})")
|
print(f"⚡ Using Apple Vision OCR (Language: {self.langs[0]})")
|
||||||
@@ -929,6 +1013,11 @@ def translate_manga_text(
|
|||||||
print("⚠️ No text after filtering.")
|
print("⚠️ No text after filtering.")
|
||||||
return
|
return
|
||||||
|
|
||||||
|
# --- NEW: Split wide OCR items across column gaps ---
|
||||||
|
filtered, splits_made = split_wide_ocr_items(image, filtered)
|
||||||
|
if splits_made > 0:
|
||||||
|
print(f"✂️ Split {splits_made} wide OCR lines across column gaps.")
|
||||||
|
|
||||||
bubbles, bubble_boxes, bubble_quads, bubble_indices = group_tokens(
|
bubbles, bubble_boxes, bubble_quads, bubble_indices = group_tokens(
|
||||||
filtered, image.shape, gap_px=resolved_gap, bbox_padding=3
|
filtered, image.shape, gap_px=resolved_gap, bbox_padding=3
|
||||||
)
|
)
|
||||||
@@ -957,7 +1046,7 @@ def translate_manga_text(
|
|||||||
if left_idxs and right_idxs:
|
if left_idxs and right_idxs:
|
||||||
bubble_split = (left_idxs, right_idxs)
|
bubble_split = (left_idxs, right_idxs)
|
||||||
splits_performed.append(f"BOX#{bid} (panel border at x={split_x})")
|
splits_performed.append(f"BOX#{bid} (panel border at x={split_x})")
|
||||||
elif len(bubble_quads[bid]) >= 10:
|
elif len(bubble_quads[bid]) >= 4:
|
||||||
col_split = split_bubble_if_multiple_columns(bubble_indices[bid], filtered, bid=bid, use_aggressive_thresholds=True)
|
col_split = split_bubble_if_multiple_columns(bubble_indices[bid], filtered, bid=bid, use_aggressive_thresholds=True)
|
||||||
if col_split:
|
if col_split:
|
||||||
left_idxs, right_idxs = col_split
|
left_idxs, right_idxs = col_split
|
||||||
@@ -1115,8 +1204,8 @@ def translate_manga_text(
|
|||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
translate_manga_text(
|
translate_manga_text(
|
||||||
image_path="003.jpg",
|
image_path="09.jpg",
|
||||||
source_lang="es",
|
source_lang="english",
|
||||||
target_lang="ca",
|
target_lang="ca",
|
||||||
confidence_threshold=0.05,
|
confidence_threshold=0.05,
|
||||||
min_text_length=1,
|
min_text_length=1,
|
||||||
@@ -1125,6 +1214,6 @@ if __name__ == "__main__":
|
|||||||
quality_threshold=0.62,
|
quality_threshold=0.62,
|
||||||
export_to_file="output.txt",
|
export_to_file="output.txt",
|
||||||
export_bubbles_to="bubbles.json",
|
export_bubbles_to="bubbles.json",
|
||||||
reading_mode="rtl", # Changed to RTL for Japanese Manga
|
reading_mode="rtl",
|
||||||
debug=True
|
debug=True
|
||||||
)
|
)
|
||||||
Reference in New Issue
Block a user