diff --git a/batch-clean.sh b/batch-clean.sh old mode 100644 new mode 100755 diff --git a/batch-renderer.sh b/batch-renderer.sh new file mode 100755 index 0000000..6fc66a4 --- /dev/null +++ b/batch-renderer.sh @@ -0,0 +1,240 @@ +#!/usr/bin/env bash +# ============================================================ +# batch-renderer.sh +# Batch manga text rendering using cleaned images and output.txt +# +# Usage: +# ./batch-renderer.sh +# ./batch-renderer.sh --start 3 --end 7 +# +# Output per page lands in: +# /translated// +# └── _translated.png +# ============================================================ + +set -uo pipefail + +# ───────────────────────────────────────────────────────────── +# CONFIGURATION +# ───────────────────────────────────────────────────────────── +START_PAGE=1 +END_PAGE=999999 +PYTHON_BIN="python" +SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" +RENDERER="${SCRIPT_DIR}/manga-renderer.py" + +# ───────────────────────────────────────────────────────────── +# COLOURS +# ───────────────────────────────────────────────────────────── +RED='\033[0;31m' +GREEN='\033[0;32m' +YELLOW='\033[1;33m' +CYAN='\033[0;36m' +BOLD='\033[1m' +RESET='\033[0m' + +# ───────────────────────────────────────────────────────────── +# HELPERS +# ───────────────────────────────────────────────────────────── +usage() { + echo "" + echo -e "${BOLD}Usage:${RESET}" + echo " $0 [options]" + echo "" + echo -e "${BOLD}Options:${RESET}" + echo " --start First page number (default: 1)" + echo " --end Last page number (default: all)" + echo " --python Python binary (default: python)" + echo " --help, -h Show this help" + echo "" + echo -e "${BOLD}Examples:${RESET}" + echo " $0 pages-for-tests" + echo " $0 pages-for-tests --start 3 --end 7" + echo "" +} + +log_info() { echo -e "${CYAN}ℹ️ $*${RESET}"; } +log_ok() { echo -e "${GREEN}✅ $*${RESET}"; } +log_warn() { echo -e "${YELLOW}⚠️ $*${RESET}"; } +log_error() { echo -e "${RED}❌ $*${RESET}"; } +log_section() { + echo -e "\n${BOLD}${CYAN}══════════════════════════════════════════${RESET}" + echo -e "${BOLD}${CYAN} 🔤 $*${RESET}" + echo -e "${BOLD}${CYAN}══════════════════════════════════════════${RESET}" +} + +# ───────────────────────────────────────────────────────────── +# ARGUMENT PARSING +# ───────────────────────────────────────────────────────────── +if [[ $# -eq 0 ]]; then + log_error "No folder specified." + usage + exit 1 +fi + +FOLDER="$1" +shift + +while [[ $# -gt 0 ]]; do + case "$1" in + --start) START_PAGE="$2"; shift 2 ;; + --end) END_PAGE="$2"; shift 2 ;; + --python) PYTHON_BIN="$2"; shift 2 ;; + --help|-h) usage; exit 0 ;; + *) + log_error "Unknown option: $1" + usage + exit 1 + ;; + esac +done + +# ───────────────────────────────────────────────────────────── +# VALIDATION +# ───────────────────────────────────────────────────────────── +if [[ ! -d "$FOLDER" ]]; then + log_error "Folder not found: $FOLDER" + exit 1 +fi + +if [[ ! -f "$RENDERER" ]]; then + log_error "manga-renderer.py not found at: $RENDERER" + exit 1 +fi + +if ! command -v "$PYTHON_BIN" &>/dev/null; then + log_error "Python binary not found: $PYTHON_BIN" + log_error "Try --python python3" + exit 1 +fi + +# ───────────────────────────────────────────────────────────── +# DISCOVER IMAGES +# ───────────────────────────────────────────────────────────── +ALL_IMAGES=() +while IFS= read -r -d '' img; do + ALL_IMAGES+=("$img") +done < <( + find "$FOLDER" -maxdepth 1 -type f \ + \( -iname "*.jpg" -o -iname "*.jpeg" \ + -o -iname "*.png" -o -iname "*.webp" \) \ + -print0 | sort -z +) + +TOTAL=${#ALL_IMAGES[@]} + +if [[ $TOTAL -eq 0 ]]; then + log_error "No image files found in: $FOLDER" + exit 1 +fi + +# ───────────────────────────────────────────────────────────── +# SLICE TO REQUESTED PAGE RANGE +# ───────────────────────────────────────────────────────────── +PAGES=() +for i in "${!ALL_IMAGES[@]}"; do + PAGE_NUM=$(( i + 1 )) + if [[ $PAGE_NUM -ge $START_PAGE && $PAGE_NUM -le $END_PAGE ]]; then + PAGES+=("${ALL_IMAGES[$i]}") + fi +done + +if [[ ${#PAGES[@]} -eq 0 ]]; then + log_error "No pages in range [${START_PAGE}, ${END_PAGE}] (total: ${TOTAL})" + exit 1 +fi + +# ───────────────────────────────────────────────────────────── +# SUMMARY HEADER +# ───────────────────────────────────────────────────────────── +log_section "BATCH MANGA RENDERER" +log_info "📂 Folder : $(realpath "$FOLDER")" +log_info "📄 Pages : ${#PAGES[@]} of ${TOTAL} total" +log_info "🔢 Range : ${START_PAGE} → ${END_PAGE}" +echo "" + +# ───────────────────────────────────────────────────────────── +# PROCESS EACH PAGE +# ───────────────────────────────────────────────────────────── +PASS=0 +FAIL=0 +FAIL_LIST=() + +for i in "${!PAGES[@]}"; do + IMAGE="${PAGES[$i]}" + PAGE_NUM=$(( START_PAGE + i )) + STEM="$(basename "${IMAGE%.*}")" + WORKDIR="${FOLDER}/translated/${STEM}" + + echo "" + echo -e "${BOLD}──────────────────────────────────────────${RESET}" + echo -e "${BOLD} 🖼️ [${PAGE_NUM}/${TOTAL}] ${STEM}${RESET}" + echo -e "${BOLD}──────────────────────────────────────────${RESET}" + + INPUT_CLEANED="${WORKDIR}/${STEM}_cleaned.png" + INPUT_JSON="${WORKDIR}/bubbles.json" + INPUT_TXT="${WORKDIR}/output.txt" + OUTPUT_RENDERED="${WORKDIR}/${STEM}_translated.png" + + # Check for required files + MISSING_FILES=0 + for REQ_FILE in "$INPUT_CLEANED" "$INPUT_JSON" "$INPUT_TXT"; do + if [[ ! -f "$REQ_FILE" ]]; then + log_warn "Missing required file: $(basename "$REQ_FILE")" + MISSING_FILES=1 + fi + done + + if [[ $MISSING_FILES -eq 1 ]]; then + log_error "Skipping ${STEM} due to missing files. Did you run batch-clean.sh?" + FAIL=$(( FAIL + 1 )) + FAIL_LIST+=("${STEM} (Missing Files)") + continue + fi + + log_info "🗂️ Cleaned Image : $(basename "$INPUT_CLEANED")" + log_info "🔤 Rendering translated text..." + + # ── Run the renderer ────────────────────────────────────── + if "$PYTHON_BIN" "$RENDERER" \ + -i "$INPUT_CLEANED" \ + -j "$INPUT_JSON" \ + -t "$INPUT_TXT" \ + -o "$OUTPUT_RENDERED"; then + + if [[ -f "$OUTPUT_RENDERED" ]]; then + log_ok "Translated image saved → ${STEM}_translated.png" + PASS=$(( PASS + 1 )) + else + log_error "Script ran but output image is missing." + FAIL=$(( FAIL + 1 )) + FAIL_LIST+=("${STEM} (Missing Output)") + fi + else + log_error "Page ${PAGE_NUM} FAILED — check output above." + FAIL=$(( FAIL + 1 )) + FAIL_LIST+=("${STEM} (Script Error)") + fi + +done + +# ───────────────────────────────────────────────────────────── +# FINAL SUMMARY +# ───────────────────────────────────────────────────────────── +log_section "BATCH RENDERING COMPLETE" +echo -e " ✅ ${GREEN}Passed : ${PASS}${RESET}" +echo -e " ❌ ${RED}Failed : ${FAIL}${RESET}" + +if [[ ${#FAIL_LIST[@]} -gt 0 ]]; then + echo "" + log_warn "Failed pages:" + for NAME in "${FAIL_LIST[@]}"; do + echo -e " ❌ ${RED}${NAME}${RESET}" + done +fi + +echo "" +log_info "📦 Output folder: $(realpath "${FOLDER}/translated")" +echo "" + +[[ $FAIL -eq 0 ]] && exit 0 || exit 1 \ No newline at end of file diff --git a/manga-renderer.py b/manga-renderer.py index 5e70093..ed99ca6 100644 --- a/manga-renderer.py +++ b/manga-renderer.py @@ -3,49 +3,37 @@ """ manga-renderer.py -Inputs: 001.jpg + bubbles.json + output_001.txt -Output: translated_page_001.png - -Strategy: - 1. For every bubble, white-fill all its OCR quads (erases original text cleanly). - 2. Detect the original font size from the OCR bounding boxes. - 3. Dynamically wrap and scale down the translated text if it exceeds the bubble dimensions. - 4. Render the translated text centered inside the bubble bounding box. - 5. Uses uniform line heights (ascent + descent) to prevent Catalan accent collisions (È, À). - 6. Adds a dynamic white stroke to the text to cover any residual original characters. +Inputs: 16_cleaned.png + bubbles.json + output.txt +Output: 16_translated.png """ import json import textwrap import cv2 import numpy as np +import os +import argparse from PIL import Image, ImageDraw, ImageFont from typing import Dict, List, Tuple, Optional, Set, Any # ============================================================ -# CONFIG — edit these paths to match your setup +# CONFIG # ============================================================ -IMAGE_PATH = "004.png" -BUBBLES_PATH = "bubbles_004.json" -TRANSLATIONS_PATH = "output_004.txt" -OUTPUT_PATH = "translated_page_004.png" - -# Font candidates — Prioritizes Laffayette for Catalan, with safe fallbacks +# Added System Fallbacks (macOS, Windows, Linux) so it never fails FONT_CANDIDATES = [ "fonts/animeace2_reg.ttf", "fonts/ComicNeue-Bold.ttf", + "/Library/Fonts/Arial.ttf", # macOS + "/System/Library/Fonts/Helvetica.ttc", # macOS + "C:\\Windows\\Fonts\\arial.ttf", # Windows + "/usr/share/fonts/truetype/dejavu/DejaVuSans.ttf" # Linux ] DEFAULT_FONT_SIZE = 18 MIN_FONT_SIZE = 8 -QUAD_PAD = 4 # extra pixels added around each quad before white-fill -# ============================================================ -# SKIP LIST -# ============================================================ -SKIP_BUBBLE_IDS: Set[int] = { - # Add any bubble IDs you do NOT want rendered here. -} +# Add any bubble IDs you do NOT want rendered here. +SKIP_BUBBLE_IDS: Set[int] = set() # ============================================================ # FONT LOADER @@ -65,20 +53,17 @@ def load_font(path: str, size: int) -> Optional[ImageFont.FreeTypeFont]: def resolve_font_path() -> str: """Return the path for the first working candidate.""" for candidate in FONT_CANDIDATES: - if load_font(candidate, DEFAULT_FONT_SIZE) is not None: - print(f" ✅ Font: {candidate}") + if os.path.exists(candidate) and load_font(candidate, DEFAULT_FONT_SIZE) is not None: + print(f" ✅ Font loaded: {candidate}") return candidate - print(" ⚠️ No TrueType font found — using Pillow bitmap fallback") + print(" ⚠️ No TrueType font found — using Pillow bitmap fallback (Text may look small)") return "" # ============================================================ # PARSERS # ============================================================ def parse_translations(filepath: str) -> Dict[int, str]: - """ - Reads output.txt and returns {bubble_id: translated_text}. - Lines look like: #2|1|vision-base|ORIGINAL|TRANSLATED|FLAGS - """ + """Reads output.txt and returns {bubble_id: translated_text}.""" translations = {} with open(filepath, "r", encoding="utf-8") as f: for line in f: @@ -86,11 +71,11 @@ def parse_translations(filepath: str) -> Dict[int, str]: if not line.startswith("#"): continue parts = line.split("|") - if len(parts) < 5: + if len(parts) < 9: continue try: bid = int(parts[0].lstrip("#")) - translated = parts[4].strip() + translated = parts[8].strip() # Index 8 is TRANSLATED if translated and translated != "-": translations[bid] = translated except ValueError: @@ -98,70 +83,23 @@ def parse_translations(filepath: str) -> Dict[int, str]: return translations def parse_bubbles(filepath: str): - """Returns the full JSON data.""" with open(filepath, "r", encoding="utf-8") as f: - data = json.load(f) - return data - -# ============================================================ -# ERASE — white-fill every OCR quad (with small padding) -# ============================================================ -def erase_quads( - image_bgr, - bubbles_data: Dict[str, dict], - translations: Dict[int, str], - skip_ids: Set[int], - pad: int = QUAD_PAD -): - """ - White-fills OCR quads ONLY for bubbles that: - - have a translation in output.txt AND - - are NOT in SKIP_BUBBLE_IDS - """ - ih, iw = image_bgr.shape[:2] - result = image_bgr.copy() - - erased_count = 0 - skipped_count = 0 - - for bid_str, val in bubbles_data.items(): - bid = int(bid_str) - quads = val.get("quads", []) - - if bid in skip_ids or bid not in translations: - skipped_count += 1 - continue - - for quad in quads: - pts = np.array(quad, dtype=np.int32) - cv2.fillPoly(result, [pts], (255, 255, 255)) - - xs = [p[0] for p in quad]; ys = [p[1] for p in quad] - x1 = max(0, min(xs) - pad) - y1 = max(0, min(ys) - pad) - x2 = min(iw - 1, max(xs) + pad) - y2 = min(ih - 1, max(ys) + pad) - cv2.rectangle(result, (x1, y1), (x2, y2), (255, 255, 255), -1) - - erased_count += 1 - - print(f" Erased : {erased_count} bubbles") - print(f" Ignored: {skipped_count} bubbles (no translation or in skip list)") - return result + return json.load(f) # ============================================================ # DYNAMIC TEXT FITTING # ============================================================ def get_original_font_size(bubble_data: dict, fallback_size: int = DEFAULT_FONT_SIZE) -> int: - """Calculates the original font size based on the OCR bounding boxes.""" - line_bboxes = bubble_data.get("line_bboxes", []) - if not line_bboxes: + box = bubble_data.get("box") + lines = bubble_data.get("lines", []) + + if not box or not lines: return fallback_size - heights = [box["h"] for box in line_bboxes] - median_h = int(np.median(heights)) + line_count = len(lines) + estimated_line_height = box["h"] / max(1, line_count) + estimated_size = int(estimated_line_height * 0.85) - estimated_size = int(median_h * 0.85) return max(MIN_FONT_SIZE, min(estimated_size, 60)) def fit_text_dynamically( @@ -171,16 +109,11 @@ def fit_text_dynamically( max_h: int, target_font_size: int ) -> Tuple[List[str], Any, int, int]: - """ - Wraps text and scales down font size if it exceeds the bubble dimensions. - Returns: (wrapped_lines, font_object, line_spacing, final_font_size) - """ font_size = target_font_size if not font_path: font = ImageFont.load_default() - char_w = 6 - chars_per_line = max(1, int(max_w / char_w)) + chars_per_line = max(1, int(max_w / 6)) wrapped_lines = textwrap.wrap(text, width=chars_per_line) return wrapped_lines, font, 4, 10 @@ -196,7 +129,6 @@ def fit_text_dynamically( wrapped_lines = textwrap.wrap(text, width=chars_per_line) - # Use uniform font metrics for height to protect accents like È line_spacing = max(2, int(font_size * 0.15)) if hasattr(font, 'getmetrics'): ascent, descent = font.getmetrics() @@ -235,10 +167,6 @@ def render_text( font_path: str, skip_ids: Set[int] ): - """ - Draws the translated text centered in the line_union_bbox of each bubble. - Adds a dynamic white stroke (outline) to cover any residual original characters. - """ image_rgb = cv2.cvtColor(image_bgr, cv2.COLOR_BGR2RGB) pil_img = Image.fromarray(image_rgb) draw = ImageDraw.Draw(pil_img) @@ -252,14 +180,11 @@ def render_text( continue text = translations[bid] - - union_box = val.get("line_union_bbox") - if not union_box: - union_box = val.get("text_bbox") - if not union_box: - continue + box = val.get("box") + if not box: + continue - bx, by, bw, bh = union_box["x"], union_box["y"], union_box["w"], union_box["h"] + bx, by, bw, bh = box["x"], box["y"], box["w"], box["h"] pad_x = int(bw * 0.1) pad_y = int(bh * 0.1) @@ -271,7 +196,6 @@ def render_text( target_size = get_original_font_size(val) wrapped_lines, font, line_spacing, final_size = fit_text_dynamically(text, font_path, bw, bh, target_size) - # Use uniform typographic line height for rendering to protect accents if hasattr(font, 'getmetrics'): ascent, descent = font.getmetrics() line_h = ascent + descent @@ -279,11 +203,17 @@ def render_text( line_h = final_size total_text_height = (line_h * len(wrapped_lines)) + (line_spacing * max(0, len(wrapped_lines) - 1)) - current_y = by + (bh - total_text_height) // 2 - # Dynamic outline thickness based on the final scaled font size - outline_thickness = max(2, int(final_size * 0.10)) + # --- SMART OUTLINE LOGIC --- + bg_type = val.get("background_type", "white") + + # Only use a white outline if the background is complex (inpainted artwork). + # If it's a white bubble, or if we are using the tiny default font, disable the outline. + if bg_type == "complex" and font_path: + outline_thickness = max(1, int(final_size * 0.05)) + else: + outline_thickness = 0 for i, line in enumerate(wrapped_lines): if hasattr(font, 'getbbox'): @@ -294,7 +224,6 @@ def render_text( current_x = bx + (bw - lw) // 2 - # Draw text with white stroke for artifact coverage draw.text( (current_x, current_y), line, @@ -304,53 +233,59 @@ def render_text( stroke_fill=(255, 255, 255) ) - # Advance Y by the uniform line height + spacing current_y += line_h + line_spacing rendered_count += 1 - print(f" Rendered: {rendered_count} bubbles (with uniform line spacing & outlines)") + print(f" Rendered: {rendered_count} bubbles") return cv2.cvtColor(np.array(pil_img), cv2.COLOR_RGB2BGR) # ============================================================ # MAIN # ============================================================ def main(): - print(f"Loading image: {IMAGE_PATH}") - image_bgr = cv2.imread(IMAGE_PATH) - if image_bgr is None: - print(f"❌ Error: Could not load {IMAGE_PATH}") - return - - print(f"Loading translations: {TRANSLATIONS_PATH}") - translations = parse_translations(TRANSLATIONS_PATH) + parser = argparse.ArgumentParser(description="Render translated text onto cleaned manga pages.") + parser.add_argument("-i", "--image", required=True, help="Path to the CLEANED manga image") + parser.add_argument("-j", "--json", required=True, help="Path to bubbles.json") + parser.add_argument("-t", "--txt", required=True, help="Path to output.txt") + parser.add_argument("-o", "--output", help="Path to save the final translated image") - print(f"Loading bubble data: {BUBBLES_PATH}") - bubbles_data = parse_bubbles(BUBBLES_PATH) + args = parser.parse_args() - print("Resolving font...") + if not os.path.exists(args.image): + print(f"❌ Error: Image file not found at {args.image}") + return + + print(f"📂 Loading cleaned image: {args.image}") + image_bgr = cv2.imread(args.image) + + print(f"📂 Loading translations: {args.txt}") + translations = parse_translations(args.txt) + + print(f"📂 Loading bubble data: {args.json}") + bubbles_data = parse_bubbles(args.json) + + print("🔍 Resolving font...") font_path = resolve_font_path() - print("\n--- Step 1: Erasing original text ---") - erased_bgr = erase_quads( - image_bgr=image_bgr, - bubbles_data=bubbles_data, - translations=translations, - skip_ids=SKIP_BUBBLE_IDS, - pad=QUAD_PAD - ) - - print("\n--- Step 2: Rendering translated text ---") + print("\n--- Rendering translated text ---") final_bgr = render_text( - image_bgr=erased_bgr, + image_bgr=image_bgr, bubbles_data=bubbles_data, translations=translations, font_path=font_path, skip_ids=SKIP_BUBBLE_IDS ) - print(f"\nSaving final image to: {OUTPUT_PATH}") - cv2.imwrite(OUTPUT_PATH, final_bgr) + if args.output: + out_path = args.output + else: + base_name = args.image.replace("_cleaned", "") + base_name, ext = os.path.splitext(base_name) + out_path = f"{base_name}_translated{ext}" + + print(f"\n💾 Saving final image to: {out_path}") + cv2.imwrite(out_path, final_bgr) print("✅ Done!") if __name__ == "__main__":