#!/usr/bin/env python3 import sys sys.path.insert(0, '/Users/guillemhernandezsola/code/manga-translator') import cv2 import json import numpy as np import importlib.util spec = importlib.util.spec_from_file_location("manga_translator", "/Users/guillemhernandezsola/code/manga-translator/manga-translator.py") mt = importlib.util.module_from_spec(spec) spec.loader.exec_module(mt) image_path = '004.png' detector = mt.MacVisionDetector(source_lang='en') raw = detector.read(image_path) image = cv2.imread(image_path) # Filter as the pipeline does filtered = [] for bbox, text, conf in raw: t = mt.normalize_text(text) qb = mt.quad_bbox(bbox) if conf < 0.12: continue if len(t) < 1: continue if mt.is_noise_text(t): continue if mt.is_sound_effect(t): continue if mt.is_title_text(t): continue filtered.append((bbox, t, conf)) print(f"Filtered {len(filtered)} detections") # Now run grouping bubbles, bubble_boxes, bubble_quads, bubble_indices = mt.group_tokens( filtered, image.shape, gap_px=18, bbox_padding=3 ) # Find which bubble contains box 5 box5_region = (378, 570, 536, 753) print("\n=== BUBBLES ===") for bid, box in bubble_boxes.items(): print(f"Bubble {bid}: {box}") print(f" Indices: {bubble_indices[bid]}") print(f" Detections:") for idx in bubble_indices[bid]: b = mt.quad_bbox(filtered[idx][0]) print(f" {idx}: [{b[0]:3d},{b[1]:3d} -> {b[2]:3d},{b[3]:3d}] = {filtered[idx][1]}")