Added all

This commit is contained in:
Guillem Hernandez Sola
2026-04-23 19:28:39 +02:00
parent fd0339d8ca
commit 832d699917
3 changed files with 312 additions and 137 deletions

0
batch-clean.sh Normal file → Executable file
View File

240
batch-renderer.sh Executable file
View File

@@ -0,0 +1,240 @@
#!/usr/bin/env bash
# ============================================================
# batch-renderer.sh
# Batch manga text rendering using cleaned images and output.txt
#
# Usage:
# ./batch-renderer.sh <folder>
# ./batch-renderer.sh <folder> --start 3 --end 7
#
# Output per page lands in:
# <folder>/translated/<page_stem>/
# └── <page_stem>_translated.png
# ============================================================
set -uo pipefail
# ─────────────────────────────────────────────────────────────
# CONFIGURATION
# ─────────────────────────────────────────────────────────────
START_PAGE=1
END_PAGE=999999
PYTHON_BIN="python"
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
RENDERER="${SCRIPT_DIR}/manga-renderer.py"
# ─────────────────────────────────────────────────────────────
# COLOURS
# ─────────────────────────────────────────────────────────────
RED='\033[0;31m'
GREEN='\033[0;32m'
YELLOW='\033[1;33m'
CYAN='\033[0;36m'
BOLD='\033[1m'
RESET='\033[0m'
# ─────────────────────────────────────────────────────────────
# HELPERS
# ─────────────────────────────────────────────────────────────
usage() {
echo ""
echo -e "${BOLD}Usage:${RESET}"
echo " $0 <folder> [options]"
echo ""
echo -e "${BOLD}Options:${RESET}"
echo " --start First page number (default: 1)"
echo " --end Last page number (default: all)"
echo " --python Python binary (default: python)"
echo " --help, -h Show this help"
echo ""
echo -e "${BOLD}Examples:${RESET}"
echo " $0 pages-for-tests"
echo " $0 pages-for-tests --start 3 --end 7"
echo ""
}
log_info() { echo -e "${CYAN} $*${RESET}"; }
log_ok() { echo -e "${GREEN}$*${RESET}"; }
log_warn() { echo -e "${YELLOW}⚠️ $*${RESET}"; }
log_error() { echo -e "${RED}$*${RESET}"; }
log_section() {
echo -e "\n${BOLD}${CYAN}══════════════════════════════════════════${RESET}"
echo -e "${BOLD}${CYAN} 🔤 $*${RESET}"
echo -e "${BOLD}${CYAN}══════════════════════════════════════════${RESET}"
}
# ─────────────────────────────────────────────────────────────
# ARGUMENT PARSING
# ─────────────────────────────────────────────────────────────
if [[ $# -eq 0 ]]; then
log_error "No folder specified."
usage
exit 1
fi
FOLDER="$1"
shift
while [[ $# -gt 0 ]]; do
case "$1" in
--start) START_PAGE="$2"; shift 2 ;;
--end) END_PAGE="$2"; shift 2 ;;
--python) PYTHON_BIN="$2"; shift 2 ;;
--help|-h) usage; exit 0 ;;
*)
log_error "Unknown option: $1"
usage
exit 1
;;
esac
done
# ─────────────────────────────────────────────────────────────
# VALIDATION
# ─────────────────────────────────────────────────────────────
if [[ ! -d "$FOLDER" ]]; then
log_error "Folder not found: $FOLDER"
exit 1
fi
if [[ ! -f "$RENDERER" ]]; then
log_error "manga-renderer.py not found at: $RENDERER"
exit 1
fi
if ! command -v "$PYTHON_BIN" &>/dev/null; then
log_error "Python binary not found: $PYTHON_BIN"
log_error "Try --python python3"
exit 1
fi
# ─────────────────────────────────────────────────────────────
# DISCOVER IMAGES
# ─────────────────────────────────────────────────────────────
ALL_IMAGES=()
while IFS= read -r -d '' img; do
ALL_IMAGES+=("$img")
done < <(
find "$FOLDER" -maxdepth 1 -type f \
\( -iname "*.jpg" -o -iname "*.jpeg" \
-o -iname "*.png" -o -iname "*.webp" \) \
-print0 | sort -z
)
TOTAL=${#ALL_IMAGES[@]}
if [[ $TOTAL -eq 0 ]]; then
log_error "No image files found in: $FOLDER"
exit 1
fi
# ─────────────────────────────────────────────────────────────
# SLICE TO REQUESTED PAGE RANGE
# ─────────────────────────────────────────────────────────────
PAGES=()
for i in "${!ALL_IMAGES[@]}"; do
PAGE_NUM=$(( i + 1 ))
if [[ $PAGE_NUM -ge $START_PAGE && $PAGE_NUM -le $END_PAGE ]]; then
PAGES+=("${ALL_IMAGES[$i]}")
fi
done
if [[ ${#PAGES[@]} -eq 0 ]]; then
log_error "No pages in range [${START_PAGE}, ${END_PAGE}] (total: ${TOTAL})"
exit 1
fi
# ─────────────────────────────────────────────────────────────
# SUMMARY HEADER
# ─────────────────────────────────────────────────────────────
log_section "BATCH MANGA RENDERER"
log_info "📂 Folder : $(realpath "$FOLDER")"
log_info "📄 Pages : ${#PAGES[@]} of ${TOTAL} total"
log_info "🔢 Range : ${START_PAGE}${END_PAGE}"
echo ""
# ─────────────────────────────────────────────────────────────
# PROCESS EACH PAGE
# ─────────────────────────────────────────────────────────────
PASS=0
FAIL=0
FAIL_LIST=()
for i in "${!PAGES[@]}"; do
IMAGE="${PAGES[$i]}"
PAGE_NUM=$(( START_PAGE + i ))
STEM="$(basename "${IMAGE%.*}")"
WORKDIR="${FOLDER}/translated/${STEM}"
echo ""
echo -e "${BOLD}──────────────────────────────────────────${RESET}"
echo -e "${BOLD} 🖼️ [${PAGE_NUM}/${TOTAL}] ${STEM}${RESET}"
echo -e "${BOLD}──────────────────────────────────────────${RESET}"
INPUT_CLEANED="${WORKDIR}/${STEM}_cleaned.png"
INPUT_JSON="${WORKDIR}/bubbles.json"
INPUT_TXT="${WORKDIR}/output.txt"
OUTPUT_RENDERED="${WORKDIR}/${STEM}_translated.png"
# Check for required files
MISSING_FILES=0
for REQ_FILE in "$INPUT_CLEANED" "$INPUT_JSON" "$INPUT_TXT"; do
if [[ ! -f "$REQ_FILE" ]]; then
log_warn "Missing required file: $(basename "$REQ_FILE")"
MISSING_FILES=1
fi
done
if [[ $MISSING_FILES -eq 1 ]]; then
log_error "Skipping ${STEM} due to missing files. Did you run batch-clean.sh?"
FAIL=$(( FAIL + 1 ))
FAIL_LIST+=("${STEM} (Missing Files)")
continue
fi
log_info "🗂️ Cleaned Image : $(basename "$INPUT_CLEANED")"
log_info "🔤 Rendering translated text..."
# ── Run the renderer ──────────────────────────────────────
if "$PYTHON_BIN" "$RENDERER" \
-i "$INPUT_CLEANED" \
-j "$INPUT_JSON" \
-t "$INPUT_TXT" \
-o "$OUTPUT_RENDERED"; then
if [[ -f "$OUTPUT_RENDERED" ]]; then
log_ok "Translated image saved → ${STEM}_translated.png"
PASS=$(( PASS + 1 ))
else
log_error "Script ran but output image is missing."
FAIL=$(( FAIL + 1 ))
FAIL_LIST+=("${STEM} (Missing Output)")
fi
else
log_error "Page ${PAGE_NUM} FAILED — check output above."
FAIL=$(( FAIL + 1 ))
FAIL_LIST+=("${STEM} (Script Error)")
fi
done
# ─────────────────────────────────────────────────────────────
# FINAL SUMMARY
# ─────────────────────────────────────────────────────────────
log_section "BATCH RENDERING COMPLETE"
echo -e "${GREEN}Passed : ${PASS}${RESET}"
echo -e "${RED}Failed : ${FAIL}${RESET}"
if [[ ${#FAIL_LIST[@]} -gt 0 ]]; then
echo ""
log_warn "Failed pages:"
for NAME in "${FAIL_LIST[@]}"; do
echo -e "${RED}${NAME}${RESET}"
done
fi
echo ""
log_info "📦 Output folder: $(realpath "${FOLDER}/translated")"
echo ""
[[ $FAIL -eq 0 ]] && exit 0 || exit 1

View File

@@ -3,49 +3,37 @@
""" """
manga-renderer.py manga-renderer.py
Inputs: 001.jpg + bubbles.json + output_001.txt Inputs: 16_cleaned.png + bubbles.json + output.txt
Output: translated_page_001.png Output: 16_translated.png
Strategy:
1. For every bubble, white-fill all its OCR quads (erases original text cleanly).
2. Detect the original font size from the OCR bounding boxes.
3. Dynamically wrap and scale down the translated text if it exceeds the bubble dimensions.
4. Render the translated text centered inside the bubble bounding box.
5. Uses uniform line heights (ascent + descent) to prevent Catalan accent collisions (È, À).
6. Adds a dynamic white stroke to the text to cover any residual original characters.
""" """
import json import json
import textwrap import textwrap
import cv2 import cv2
import numpy as np import numpy as np
import os
import argparse
from PIL import Image, ImageDraw, ImageFont from PIL import Image, ImageDraw, ImageFont
from typing import Dict, List, Tuple, Optional, Set, Any from typing import Dict, List, Tuple, Optional, Set, Any
# ============================================================ # ============================================================
# CONFIG — edit these paths to match your setup # CONFIG
# ============================================================ # ============================================================
IMAGE_PATH = "004.png" # Added System Fallbacks (macOS, Windows, Linux) so it never fails
BUBBLES_PATH = "bubbles_004.json"
TRANSLATIONS_PATH = "output_004.txt"
OUTPUT_PATH = "translated_page_004.png"
# Font candidates — Prioritizes Laffayette for Catalan, with safe fallbacks
FONT_CANDIDATES = [ FONT_CANDIDATES = [
"fonts/animeace2_reg.ttf", "fonts/animeace2_reg.ttf",
"fonts/ComicNeue-Bold.ttf", "fonts/ComicNeue-Bold.ttf",
"/Library/Fonts/Arial.ttf", # macOS
"/System/Library/Fonts/Helvetica.ttc", # macOS
"C:\\Windows\\Fonts\\arial.ttf", # Windows
"/usr/share/fonts/truetype/dejavu/DejaVuSans.ttf" # Linux
] ]
DEFAULT_FONT_SIZE = 18 DEFAULT_FONT_SIZE = 18
MIN_FONT_SIZE = 8 MIN_FONT_SIZE = 8
QUAD_PAD = 4 # extra pixels added around each quad before white-fill
# ============================================================ # Add any bubble IDs you do NOT want rendered here.
# SKIP LIST SKIP_BUBBLE_IDS: Set[int] = set()
# ============================================================
SKIP_BUBBLE_IDS: Set[int] = {
# Add any bubble IDs you do NOT want rendered here.
}
# ============================================================ # ============================================================
# FONT LOADER # FONT LOADER
@@ -65,20 +53,17 @@ def load_font(path: str, size: int) -> Optional[ImageFont.FreeTypeFont]:
def resolve_font_path() -> str: def resolve_font_path() -> str:
"""Return the path for the first working candidate.""" """Return the path for the first working candidate."""
for candidate in FONT_CANDIDATES: for candidate in FONT_CANDIDATES:
if load_font(candidate, DEFAULT_FONT_SIZE) is not None: if os.path.exists(candidate) and load_font(candidate, DEFAULT_FONT_SIZE) is not None:
print(f" ✅ Font: {candidate}") print(f" ✅ Font loaded: {candidate}")
return candidate return candidate
print(" ⚠️ No TrueType font found — using Pillow bitmap fallback") print(" ⚠️ No TrueType font found — using Pillow bitmap fallback (Text may look small)")
return "" return ""
# ============================================================ # ============================================================
# PARSERS # PARSERS
# ============================================================ # ============================================================
def parse_translations(filepath: str) -> Dict[int, str]: def parse_translations(filepath: str) -> Dict[int, str]:
""" """Reads output.txt and returns {bubble_id: translated_text}."""
Reads output.txt and returns {bubble_id: translated_text}.
Lines look like: #2|1|vision-base|ORIGINAL|TRANSLATED|FLAGS
"""
translations = {} translations = {}
with open(filepath, "r", encoding="utf-8") as f: with open(filepath, "r", encoding="utf-8") as f:
for line in f: for line in f:
@@ -86,11 +71,11 @@ def parse_translations(filepath: str) -> Dict[int, str]:
if not line.startswith("#"): if not line.startswith("#"):
continue continue
parts = line.split("|") parts = line.split("|")
if len(parts) < 5: if len(parts) < 9:
continue continue
try: try:
bid = int(parts[0].lstrip("#")) bid = int(parts[0].lstrip("#"))
translated = parts[4].strip() translated = parts[8].strip() # Index 8 is TRANSLATED
if translated and translated != "-": if translated and translated != "-":
translations[bid] = translated translations[bid] = translated
except ValueError: except ValueError:
@@ -98,70 +83,23 @@ def parse_translations(filepath: str) -> Dict[int, str]:
return translations return translations
def parse_bubbles(filepath: str): def parse_bubbles(filepath: str):
"""Returns the full JSON data."""
with open(filepath, "r", encoding="utf-8") as f: with open(filepath, "r", encoding="utf-8") as f:
data = json.load(f) return json.load(f)
return data
# ============================================================
# ERASE — white-fill every OCR quad (with small padding)
# ============================================================
def erase_quads(
image_bgr,
bubbles_data: Dict[str, dict],
translations: Dict[int, str],
skip_ids: Set[int],
pad: int = QUAD_PAD
):
"""
White-fills OCR quads ONLY for bubbles that:
- have a translation in output.txt AND
- are NOT in SKIP_BUBBLE_IDS
"""
ih, iw = image_bgr.shape[:2]
result = image_bgr.copy()
erased_count = 0
skipped_count = 0
for bid_str, val in bubbles_data.items():
bid = int(bid_str)
quads = val.get("quads", [])
if bid in skip_ids or bid not in translations:
skipped_count += 1
continue
for quad in quads:
pts = np.array(quad, dtype=np.int32)
cv2.fillPoly(result, [pts], (255, 255, 255))
xs = [p[0] for p in quad]; ys = [p[1] for p in quad]
x1 = max(0, min(xs) - pad)
y1 = max(0, min(ys) - pad)
x2 = min(iw - 1, max(xs) + pad)
y2 = min(ih - 1, max(ys) + pad)
cv2.rectangle(result, (x1, y1), (x2, y2), (255, 255, 255), -1)
erased_count += 1
print(f" Erased : {erased_count} bubbles")
print(f" Ignored: {skipped_count} bubbles (no translation or in skip list)")
return result
# ============================================================ # ============================================================
# DYNAMIC TEXT FITTING # DYNAMIC TEXT FITTING
# ============================================================ # ============================================================
def get_original_font_size(bubble_data: dict, fallback_size: int = DEFAULT_FONT_SIZE) -> int: def get_original_font_size(bubble_data: dict, fallback_size: int = DEFAULT_FONT_SIZE) -> int:
"""Calculates the original font size based on the OCR bounding boxes.""" box = bubble_data.get("box")
line_bboxes = bubble_data.get("line_bboxes", []) lines = bubble_data.get("lines", [])
if not line_bboxes:
if not box or not lines:
return fallback_size return fallback_size
heights = [box["h"] for box in line_bboxes] line_count = len(lines)
median_h = int(np.median(heights)) estimated_line_height = box["h"] / max(1, line_count)
estimated_size = int(estimated_line_height * 0.85)
estimated_size = int(median_h * 0.85)
return max(MIN_FONT_SIZE, min(estimated_size, 60)) return max(MIN_FONT_SIZE, min(estimated_size, 60))
def fit_text_dynamically( def fit_text_dynamically(
@@ -171,16 +109,11 @@ def fit_text_dynamically(
max_h: int, max_h: int,
target_font_size: int target_font_size: int
) -> Tuple[List[str], Any, int, int]: ) -> Tuple[List[str], Any, int, int]:
"""
Wraps text and scales down font size if it exceeds the bubble dimensions.
Returns: (wrapped_lines, font_object, line_spacing, final_font_size)
"""
font_size = target_font_size font_size = target_font_size
if not font_path: if not font_path:
font = ImageFont.load_default() font = ImageFont.load_default()
char_w = 6 chars_per_line = max(1, int(max_w / 6))
chars_per_line = max(1, int(max_w / char_w))
wrapped_lines = textwrap.wrap(text, width=chars_per_line) wrapped_lines = textwrap.wrap(text, width=chars_per_line)
return wrapped_lines, font, 4, 10 return wrapped_lines, font, 4, 10
@@ -196,7 +129,6 @@ def fit_text_dynamically(
wrapped_lines = textwrap.wrap(text, width=chars_per_line) wrapped_lines = textwrap.wrap(text, width=chars_per_line)
# Use uniform font metrics for height to protect accents like È
line_spacing = max(2, int(font_size * 0.15)) line_spacing = max(2, int(font_size * 0.15))
if hasattr(font, 'getmetrics'): if hasattr(font, 'getmetrics'):
ascent, descent = font.getmetrics() ascent, descent = font.getmetrics()
@@ -235,10 +167,6 @@ def render_text(
font_path: str, font_path: str,
skip_ids: Set[int] skip_ids: Set[int]
): ):
"""
Draws the translated text centered in the line_union_bbox of each bubble.
Adds a dynamic white stroke (outline) to cover any residual original characters.
"""
image_rgb = cv2.cvtColor(image_bgr, cv2.COLOR_BGR2RGB) image_rgb = cv2.cvtColor(image_bgr, cv2.COLOR_BGR2RGB)
pil_img = Image.fromarray(image_rgb) pil_img = Image.fromarray(image_rgb)
draw = ImageDraw.Draw(pil_img) draw = ImageDraw.Draw(pil_img)
@@ -252,14 +180,11 @@ def render_text(
continue continue
text = translations[bid] text = translations[bid]
box = val.get("box")
union_box = val.get("line_union_bbox") if not box:
if not union_box:
union_box = val.get("text_bbox")
if not union_box:
continue continue
bx, by, bw, bh = union_box["x"], union_box["y"], union_box["w"], union_box["h"] bx, by, bw, bh = box["x"], box["y"], box["w"], box["h"]
pad_x = int(bw * 0.1) pad_x = int(bw * 0.1)
pad_y = int(bh * 0.1) pad_y = int(bh * 0.1)
@@ -271,7 +196,6 @@ def render_text(
target_size = get_original_font_size(val) target_size = get_original_font_size(val)
wrapped_lines, font, line_spacing, final_size = fit_text_dynamically(text, font_path, bw, bh, target_size) wrapped_lines, font, line_spacing, final_size = fit_text_dynamically(text, font_path, bw, bh, target_size)
# Use uniform typographic line height for rendering to protect accents
if hasattr(font, 'getmetrics'): if hasattr(font, 'getmetrics'):
ascent, descent = font.getmetrics() ascent, descent = font.getmetrics()
line_h = ascent + descent line_h = ascent + descent
@@ -279,11 +203,17 @@ def render_text(
line_h = final_size line_h = final_size
total_text_height = (line_h * len(wrapped_lines)) + (line_spacing * max(0, len(wrapped_lines) - 1)) total_text_height = (line_h * len(wrapped_lines)) + (line_spacing * max(0, len(wrapped_lines) - 1))
current_y = by + (bh - total_text_height) // 2 current_y = by + (bh - total_text_height) // 2
# Dynamic outline thickness based on the final scaled font size # --- SMART OUTLINE LOGIC ---
outline_thickness = max(2, int(final_size * 0.10)) bg_type = val.get("background_type", "white")
# Only use a white outline if the background is complex (inpainted artwork).
# If it's a white bubble, or if we are using the tiny default font, disable the outline.
if bg_type == "complex" and font_path:
outline_thickness = max(1, int(final_size * 0.05))
else:
outline_thickness = 0
for i, line in enumerate(wrapped_lines): for i, line in enumerate(wrapped_lines):
if hasattr(font, 'getbbox'): if hasattr(font, 'getbbox'):
@@ -294,7 +224,6 @@ def render_text(
current_x = bx + (bw - lw) // 2 current_x = bx + (bw - lw) // 2
# Draw text with white stroke for artifact coverage
draw.text( draw.text(
(current_x, current_y), (current_x, current_y),
line, line,
@@ -304,53 +233,59 @@ def render_text(
stroke_fill=(255, 255, 255) stroke_fill=(255, 255, 255)
) )
# Advance Y by the uniform line height + spacing
current_y += line_h + line_spacing current_y += line_h + line_spacing
rendered_count += 1 rendered_count += 1
print(f" Rendered: {rendered_count} bubbles (with uniform line spacing & outlines)") print(f" Rendered: {rendered_count} bubbles")
return cv2.cvtColor(np.array(pil_img), cv2.COLOR_RGB2BGR) return cv2.cvtColor(np.array(pil_img), cv2.COLOR_RGB2BGR)
# ============================================================ # ============================================================
# MAIN # MAIN
# ============================================================ # ============================================================
def main(): def main():
print(f"Loading image: {IMAGE_PATH}") parser = argparse.ArgumentParser(description="Render translated text onto cleaned manga pages.")
image_bgr = cv2.imread(IMAGE_PATH) parser.add_argument("-i", "--image", required=True, help="Path to the CLEANED manga image")
if image_bgr is None: parser.add_argument("-j", "--json", required=True, help="Path to bubbles.json")
print(f"❌ Error: Could not load {IMAGE_PATH}") parser.add_argument("-t", "--txt", required=True, help="Path to output.txt")
parser.add_argument("-o", "--output", help="Path to save the final translated image")
args = parser.parse_args()
if not os.path.exists(args.image):
print(f"❌ Error: Image file not found at {args.image}")
return return
print(f"Loading translations: {TRANSLATIONS_PATH}") print(f"📂 Loading cleaned image: {args.image}")
translations = parse_translations(TRANSLATIONS_PATH) image_bgr = cv2.imread(args.image)
print(f"Loading bubble data: {BUBBLES_PATH}") print(f"📂 Loading translations: {args.txt}")
bubbles_data = parse_bubbles(BUBBLES_PATH) translations = parse_translations(args.txt)
print("Resolving font...") print(f"📂 Loading bubble data: {args.json}")
bubbles_data = parse_bubbles(args.json)
print("🔍 Resolving font...")
font_path = resolve_font_path() font_path = resolve_font_path()
print("\n--- Step 1: Erasing original text ---") print("\n--- Rendering translated text ---")
erased_bgr = erase_quads(
image_bgr=image_bgr,
bubbles_data=bubbles_data,
translations=translations,
skip_ids=SKIP_BUBBLE_IDS,
pad=QUAD_PAD
)
print("\n--- Step 2: Rendering translated text ---")
final_bgr = render_text( final_bgr = render_text(
image_bgr=erased_bgr, image_bgr=image_bgr,
bubbles_data=bubbles_data, bubbles_data=bubbles_data,
translations=translations, translations=translations,
font_path=font_path, font_path=font_path,
skip_ids=SKIP_BUBBLE_IDS skip_ids=SKIP_BUBBLE_IDS
) )
print(f"\nSaving final image to: {OUTPUT_PATH}") if args.output:
cv2.imwrite(OUTPUT_PATH, final_bgr) out_path = args.output
else:
base_name = args.image.replace("_cleaned", "")
base_name, ext = os.path.splitext(base_name)
out_path = f"{base_name}_translated{ext}"
print(f"\n💾 Saving final image to: {out_path}")
cv2.imwrite(out_path, final_bgr)
print("✅ Done!") print("✅ Done!")
if __name__ == "__main__": if __name__ == "__main__":