Added all

This commit is contained in:
Guillem Hernandez Sola
2026-04-23 19:28:39 +02:00
parent fd0339d8ca
commit 832d699917
3 changed files with 312 additions and 137 deletions

0
batch-clean.sh Normal file → Executable file
View File

240
batch-renderer.sh Executable file
View File

@@ -0,0 +1,240 @@
#!/usr/bin/env bash
# ============================================================
# batch-renderer.sh
# Batch manga text rendering using cleaned images and output.txt
#
# Usage:
# ./batch-renderer.sh <folder>
# ./batch-renderer.sh <folder> --start 3 --end 7
#
# Output per page lands in:
# <folder>/translated/<page_stem>/
# └── <page_stem>_translated.png
# ============================================================
set -uo pipefail
# ─────────────────────────────────────────────────────────────
# CONFIGURATION
# ─────────────────────────────────────────────────────────────
START_PAGE=1
END_PAGE=999999
PYTHON_BIN="python"
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
RENDERER="${SCRIPT_DIR}/manga-renderer.py"
# ─────────────────────────────────────────────────────────────
# COLOURS
# ─────────────────────────────────────────────────────────────
RED='\033[0;31m'
GREEN='\033[0;32m'
YELLOW='\033[1;33m'
CYAN='\033[0;36m'
BOLD='\033[1m'
RESET='\033[0m'
# ─────────────────────────────────────────────────────────────
# HELPERS
# ─────────────────────────────────────────────────────────────
usage() {
echo ""
echo -e "${BOLD}Usage:${RESET}"
echo " $0 <folder> [options]"
echo ""
echo -e "${BOLD}Options:${RESET}"
echo " --start First page number (default: 1)"
echo " --end Last page number (default: all)"
echo " --python Python binary (default: python)"
echo " --help, -h Show this help"
echo ""
echo -e "${BOLD}Examples:${RESET}"
echo " $0 pages-for-tests"
echo " $0 pages-for-tests --start 3 --end 7"
echo ""
}
log_info() { echo -e "${CYAN} $*${RESET}"; }
log_ok() { echo -e "${GREEN}$*${RESET}"; }
log_warn() { echo -e "${YELLOW}⚠️ $*${RESET}"; }
log_error() { echo -e "${RED}$*${RESET}"; }
log_section() {
echo -e "\n${BOLD}${CYAN}══════════════════════════════════════════${RESET}"
echo -e "${BOLD}${CYAN} 🔤 $*${RESET}"
echo -e "${BOLD}${CYAN}══════════════════════════════════════════${RESET}"
}
# ─────────────────────────────────────────────────────────────
# ARGUMENT PARSING
# ─────────────────────────────────────────────────────────────
if [[ $# -eq 0 ]]; then
log_error "No folder specified."
usage
exit 1
fi
FOLDER="$1"
shift
while [[ $# -gt 0 ]]; do
case "$1" in
--start) START_PAGE="$2"; shift 2 ;;
--end) END_PAGE="$2"; shift 2 ;;
--python) PYTHON_BIN="$2"; shift 2 ;;
--help|-h) usage; exit 0 ;;
*)
log_error "Unknown option: $1"
usage
exit 1
;;
esac
done
# ─────────────────────────────────────────────────────────────
# VALIDATION
# ─────────────────────────────────────────────────────────────
if [[ ! -d "$FOLDER" ]]; then
log_error "Folder not found: $FOLDER"
exit 1
fi
if [[ ! -f "$RENDERER" ]]; then
log_error "manga-renderer.py not found at: $RENDERER"
exit 1
fi
if ! command -v "$PYTHON_BIN" &>/dev/null; then
log_error "Python binary not found: $PYTHON_BIN"
log_error "Try --python python3"
exit 1
fi
# ─────────────────────────────────────────────────────────────
# DISCOVER IMAGES
# ─────────────────────────────────────────────────────────────
ALL_IMAGES=()
while IFS= read -r -d '' img; do
ALL_IMAGES+=("$img")
done < <(
find "$FOLDER" -maxdepth 1 -type f \
\( -iname "*.jpg" -o -iname "*.jpeg" \
-o -iname "*.png" -o -iname "*.webp" \) \
-print0 | sort -z
)
TOTAL=${#ALL_IMAGES[@]}
if [[ $TOTAL -eq 0 ]]; then
log_error "No image files found in: $FOLDER"
exit 1
fi
# ─────────────────────────────────────────────────────────────
# SLICE TO REQUESTED PAGE RANGE
# ─────────────────────────────────────────────────────────────
PAGES=()
for i in "${!ALL_IMAGES[@]}"; do
PAGE_NUM=$(( i + 1 ))
if [[ $PAGE_NUM -ge $START_PAGE && $PAGE_NUM -le $END_PAGE ]]; then
PAGES+=("${ALL_IMAGES[$i]}")
fi
done
if [[ ${#PAGES[@]} -eq 0 ]]; then
log_error "No pages in range [${START_PAGE}, ${END_PAGE}] (total: ${TOTAL})"
exit 1
fi
# ─────────────────────────────────────────────────────────────
# SUMMARY HEADER
# ─────────────────────────────────────────────────────────────
log_section "BATCH MANGA RENDERER"
log_info "📂 Folder : $(realpath "$FOLDER")"
log_info "📄 Pages : ${#PAGES[@]} of ${TOTAL} total"
log_info "🔢 Range : ${START_PAGE}${END_PAGE}"
echo ""
# ─────────────────────────────────────────────────────────────
# PROCESS EACH PAGE
# ─────────────────────────────────────────────────────────────
PASS=0
FAIL=0
FAIL_LIST=()
for i in "${!PAGES[@]}"; do
IMAGE="${PAGES[$i]}"
PAGE_NUM=$(( START_PAGE + i ))
STEM="$(basename "${IMAGE%.*}")"
WORKDIR="${FOLDER}/translated/${STEM}"
echo ""
echo -e "${BOLD}──────────────────────────────────────────${RESET}"
echo -e "${BOLD} 🖼️ [${PAGE_NUM}/${TOTAL}] ${STEM}${RESET}"
echo -e "${BOLD}──────────────────────────────────────────${RESET}"
INPUT_CLEANED="${WORKDIR}/${STEM}_cleaned.png"
INPUT_JSON="${WORKDIR}/bubbles.json"
INPUT_TXT="${WORKDIR}/output.txt"
OUTPUT_RENDERED="${WORKDIR}/${STEM}_translated.png"
# Check for required files
MISSING_FILES=0
for REQ_FILE in "$INPUT_CLEANED" "$INPUT_JSON" "$INPUT_TXT"; do
if [[ ! -f "$REQ_FILE" ]]; then
log_warn "Missing required file: $(basename "$REQ_FILE")"
MISSING_FILES=1
fi
done
if [[ $MISSING_FILES -eq 1 ]]; then
log_error "Skipping ${STEM} due to missing files. Did you run batch-clean.sh?"
FAIL=$(( FAIL + 1 ))
FAIL_LIST+=("${STEM} (Missing Files)")
continue
fi
log_info "🗂️ Cleaned Image : $(basename "$INPUT_CLEANED")"
log_info "🔤 Rendering translated text..."
# ── Run the renderer ──────────────────────────────────────
if "$PYTHON_BIN" "$RENDERER" \
-i "$INPUT_CLEANED" \
-j "$INPUT_JSON" \
-t "$INPUT_TXT" \
-o "$OUTPUT_RENDERED"; then
if [[ -f "$OUTPUT_RENDERED" ]]; then
log_ok "Translated image saved → ${STEM}_translated.png"
PASS=$(( PASS + 1 ))
else
log_error "Script ran but output image is missing."
FAIL=$(( FAIL + 1 ))
FAIL_LIST+=("${STEM} (Missing Output)")
fi
else
log_error "Page ${PAGE_NUM} FAILED — check output above."
FAIL=$(( FAIL + 1 ))
FAIL_LIST+=("${STEM} (Script Error)")
fi
done
# ─────────────────────────────────────────────────────────────
# FINAL SUMMARY
# ─────────────────────────────────────────────────────────────
log_section "BATCH RENDERING COMPLETE"
echo -e "${GREEN}Passed : ${PASS}${RESET}"
echo -e "${RED}Failed : ${FAIL}${RESET}"
if [[ ${#FAIL_LIST[@]} -gt 0 ]]; then
echo ""
log_warn "Failed pages:"
for NAME in "${FAIL_LIST[@]}"; do
echo -e "${RED}${NAME}${RESET}"
done
fi
echo ""
log_info "📦 Output folder: $(realpath "${FOLDER}/translated")"
echo ""
[[ $FAIL -eq 0 ]] && exit 0 || exit 1

View File

@@ -3,49 +3,37 @@
"""
manga-renderer.py
Inputs: 001.jpg + bubbles.json + output_001.txt
Output: translated_page_001.png
Strategy:
1. For every bubble, white-fill all its OCR quads (erases original text cleanly).
2. Detect the original font size from the OCR bounding boxes.
3. Dynamically wrap and scale down the translated text if it exceeds the bubble dimensions.
4. Render the translated text centered inside the bubble bounding box.
5. Uses uniform line heights (ascent + descent) to prevent Catalan accent collisions (È, À).
6. Adds a dynamic white stroke to the text to cover any residual original characters.
Inputs: 16_cleaned.png + bubbles.json + output.txt
Output: 16_translated.png
"""
import json
import textwrap
import cv2
import numpy as np
import os
import argparse
from PIL import Image, ImageDraw, ImageFont
from typing import Dict, List, Tuple, Optional, Set, Any
# ============================================================
# CONFIG — edit these paths to match your setup
# CONFIG
# ============================================================
IMAGE_PATH = "004.png"
BUBBLES_PATH = "bubbles_004.json"
TRANSLATIONS_PATH = "output_004.txt"
OUTPUT_PATH = "translated_page_004.png"
# Font candidates — Prioritizes Laffayette for Catalan, with safe fallbacks
# Added System Fallbacks (macOS, Windows, Linux) so it never fails
FONT_CANDIDATES = [
"fonts/animeace2_reg.ttf",
"fonts/ComicNeue-Bold.ttf",
"/Library/Fonts/Arial.ttf", # macOS
"/System/Library/Fonts/Helvetica.ttc", # macOS
"C:\\Windows\\Fonts\\arial.ttf", # Windows
"/usr/share/fonts/truetype/dejavu/DejaVuSans.ttf" # Linux
]
DEFAULT_FONT_SIZE = 18
MIN_FONT_SIZE = 8
QUAD_PAD = 4 # extra pixels added around each quad before white-fill
# ============================================================
# SKIP LIST
# ============================================================
SKIP_BUBBLE_IDS: Set[int] = {
# Add any bubble IDs you do NOT want rendered here.
}
# Add any bubble IDs you do NOT want rendered here.
SKIP_BUBBLE_IDS: Set[int] = set()
# ============================================================
# FONT LOADER
@@ -65,20 +53,17 @@ def load_font(path: str, size: int) -> Optional[ImageFont.FreeTypeFont]:
def resolve_font_path() -> str:
"""Return the path for the first working candidate."""
for candidate in FONT_CANDIDATES:
if load_font(candidate, DEFAULT_FONT_SIZE) is not None:
print(f" ✅ Font: {candidate}")
if os.path.exists(candidate) and load_font(candidate, DEFAULT_FONT_SIZE) is not None:
print(f" ✅ Font loaded: {candidate}")
return candidate
print(" ⚠️ No TrueType font found — using Pillow bitmap fallback")
print(" ⚠️ No TrueType font found — using Pillow bitmap fallback (Text may look small)")
return ""
# ============================================================
# PARSERS
# ============================================================
def parse_translations(filepath: str) -> Dict[int, str]:
"""
Reads output.txt and returns {bubble_id: translated_text}.
Lines look like: #2|1|vision-base|ORIGINAL|TRANSLATED|FLAGS
"""
"""Reads output.txt and returns {bubble_id: translated_text}."""
translations = {}
with open(filepath, "r", encoding="utf-8") as f:
for line in f:
@@ -86,11 +71,11 @@ def parse_translations(filepath: str) -> Dict[int, str]:
if not line.startswith("#"):
continue
parts = line.split("|")
if len(parts) < 5:
if len(parts) < 9:
continue
try:
bid = int(parts[0].lstrip("#"))
translated = parts[4].strip()
translated = parts[8].strip() # Index 8 is TRANSLATED
if translated and translated != "-":
translations[bid] = translated
except ValueError:
@@ -98,70 +83,23 @@ def parse_translations(filepath: str) -> Dict[int, str]:
return translations
def parse_bubbles(filepath: str):
"""Returns the full JSON data."""
with open(filepath, "r", encoding="utf-8") as f:
data = json.load(f)
return data
# ============================================================
# ERASE — white-fill every OCR quad (with small padding)
# ============================================================
def erase_quads(
image_bgr,
bubbles_data: Dict[str, dict],
translations: Dict[int, str],
skip_ids: Set[int],
pad: int = QUAD_PAD
):
"""
White-fills OCR quads ONLY for bubbles that:
- have a translation in output.txt AND
- are NOT in SKIP_BUBBLE_IDS
"""
ih, iw = image_bgr.shape[:2]
result = image_bgr.copy()
erased_count = 0
skipped_count = 0
for bid_str, val in bubbles_data.items():
bid = int(bid_str)
quads = val.get("quads", [])
if bid in skip_ids or bid not in translations:
skipped_count += 1
continue
for quad in quads:
pts = np.array(quad, dtype=np.int32)
cv2.fillPoly(result, [pts], (255, 255, 255))
xs = [p[0] for p in quad]; ys = [p[1] for p in quad]
x1 = max(0, min(xs) - pad)
y1 = max(0, min(ys) - pad)
x2 = min(iw - 1, max(xs) + pad)
y2 = min(ih - 1, max(ys) + pad)
cv2.rectangle(result, (x1, y1), (x2, y2), (255, 255, 255), -1)
erased_count += 1
print(f" Erased : {erased_count} bubbles")
print(f" Ignored: {skipped_count} bubbles (no translation or in skip list)")
return result
return json.load(f)
# ============================================================
# DYNAMIC TEXT FITTING
# ============================================================
def get_original_font_size(bubble_data: dict, fallback_size: int = DEFAULT_FONT_SIZE) -> int:
"""Calculates the original font size based on the OCR bounding boxes."""
line_bboxes = bubble_data.get("line_bboxes", [])
if not line_bboxes:
box = bubble_data.get("box")
lines = bubble_data.get("lines", [])
if not box or not lines:
return fallback_size
heights = [box["h"] for box in line_bboxes]
median_h = int(np.median(heights))
line_count = len(lines)
estimated_line_height = box["h"] / max(1, line_count)
estimated_size = int(estimated_line_height * 0.85)
estimated_size = int(median_h * 0.85)
return max(MIN_FONT_SIZE, min(estimated_size, 60))
def fit_text_dynamically(
@@ -171,16 +109,11 @@ def fit_text_dynamically(
max_h: int,
target_font_size: int
) -> Tuple[List[str], Any, int, int]:
"""
Wraps text and scales down font size if it exceeds the bubble dimensions.
Returns: (wrapped_lines, font_object, line_spacing, final_font_size)
"""
font_size = target_font_size
if not font_path:
font = ImageFont.load_default()
char_w = 6
chars_per_line = max(1, int(max_w / char_w))
chars_per_line = max(1, int(max_w / 6))
wrapped_lines = textwrap.wrap(text, width=chars_per_line)
return wrapped_lines, font, 4, 10
@@ -196,7 +129,6 @@ def fit_text_dynamically(
wrapped_lines = textwrap.wrap(text, width=chars_per_line)
# Use uniform font metrics for height to protect accents like È
line_spacing = max(2, int(font_size * 0.15))
if hasattr(font, 'getmetrics'):
ascent, descent = font.getmetrics()
@@ -235,10 +167,6 @@ def render_text(
font_path: str,
skip_ids: Set[int]
):
"""
Draws the translated text centered in the line_union_bbox of each bubble.
Adds a dynamic white stroke (outline) to cover any residual original characters.
"""
image_rgb = cv2.cvtColor(image_bgr, cv2.COLOR_BGR2RGB)
pil_img = Image.fromarray(image_rgb)
draw = ImageDraw.Draw(pil_img)
@@ -252,14 +180,11 @@ def render_text(
continue
text = translations[bid]
union_box = val.get("line_union_bbox")
if not union_box:
union_box = val.get("text_bbox")
if not union_box:
box = val.get("box")
if not box:
continue
bx, by, bw, bh = union_box["x"], union_box["y"], union_box["w"], union_box["h"]
bx, by, bw, bh = box["x"], box["y"], box["w"], box["h"]
pad_x = int(bw * 0.1)
pad_y = int(bh * 0.1)
@@ -271,7 +196,6 @@ def render_text(
target_size = get_original_font_size(val)
wrapped_lines, font, line_spacing, final_size = fit_text_dynamically(text, font_path, bw, bh, target_size)
# Use uniform typographic line height for rendering to protect accents
if hasattr(font, 'getmetrics'):
ascent, descent = font.getmetrics()
line_h = ascent + descent
@@ -279,11 +203,17 @@ def render_text(
line_h = final_size
total_text_height = (line_h * len(wrapped_lines)) + (line_spacing * max(0, len(wrapped_lines) - 1))
current_y = by + (bh - total_text_height) // 2
# Dynamic outline thickness based on the final scaled font size
outline_thickness = max(2, int(final_size * 0.10))
# --- SMART OUTLINE LOGIC ---
bg_type = val.get("background_type", "white")
# Only use a white outline if the background is complex (inpainted artwork).
# If it's a white bubble, or if we are using the tiny default font, disable the outline.
if bg_type == "complex" and font_path:
outline_thickness = max(1, int(final_size * 0.05))
else:
outline_thickness = 0
for i, line in enumerate(wrapped_lines):
if hasattr(font, 'getbbox'):
@@ -294,7 +224,6 @@ def render_text(
current_x = bx + (bw - lw) // 2
# Draw text with white stroke for artifact coverage
draw.text(
(current_x, current_y),
line,
@@ -304,53 +233,59 @@ def render_text(
stroke_fill=(255, 255, 255)
)
# Advance Y by the uniform line height + spacing
current_y += line_h + line_spacing
rendered_count += 1
print(f" Rendered: {rendered_count} bubbles (with uniform line spacing & outlines)")
print(f" Rendered: {rendered_count} bubbles")
return cv2.cvtColor(np.array(pil_img), cv2.COLOR_RGB2BGR)
# ============================================================
# MAIN
# ============================================================
def main():
print(f"Loading image: {IMAGE_PATH}")
image_bgr = cv2.imread(IMAGE_PATH)
if image_bgr is None:
print(f"❌ Error: Could not load {IMAGE_PATH}")
parser = argparse.ArgumentParser(description="Render translated text onto cleaned manga pages.")
parser.add_argument("-i", "--image", required=True, help="Path to the CLEANED manga image")
parser.add_argument("-j", "--json", required=True, help="Path to bubbles.json")
parser.add_argument("-t", "--txt", required=True, help="Path to output.txt")
parser.add_argument("-o", "--output", help="Path to save the final translated image")
args = parser.parse_args()
if not os.path.exists(args.image):
print(f"❌ Error: Image file not found at {args.image}")
return
print(f"Loading translations: {TRANSLATIONS_PATH}")
translations = parse_translations(TRANSLATIONS_PATH)
print(f"📂 Loading cleaned image: {args.image}")
image_bgr = cv2.imread(args.image)
print(f"Loading bubble data: {BUBBLES_PATH}")
bubbles_data = parse_bubbles(BUBBLES_PATH)
print(f"📂 Loading translations: {args.txt}")
translations = parse_translations(args.txt)
print("Resolving font...")
print(f"📂 Loading bubble data: {args.json}")
bubbles_data = parse_bubbles(args.json)
print("🔍 Resolving font...")
font_path = resolve_font_path()
print("\n--- Step 1: Erasing original text ---")
erased_bgr = erase_quads(
image_bgr=image_bgr,
bubbles_data=bubbles_data,
translations=translations,
skip_ids=SKIP_BUBBLE_IDS,
pad=QUAD_PAD
)
print("\n--- Step 2: Rendering translated text ---")
print("\n--- Rendering translated text ---")
final_bgr = render_text(
image_bgr=erased_bgr,
image_bgr=image_bgr,
bubbles_data=bubbles_data,
translations=translations,
font_path=font_path,
skip_ids=SKIP_BUBBLE_IDS
)
print(f"\nSaving final image to: {OUTPUT_PATH}")
cv2.imwrite(OUTPUT_PATH, final_bgr)
if args.output:
out_path = args.output
else:
base_name = args.image.replace("_cleaned", "")
base_name, ext = os.path.splitext(base_name)
out_path = f"{base_name}_translated{ext}"
print(f"\n💾 Saving final image to: {out_path}")
cv2.imwrite(out_path, final_bgr)
print("✅ Done!")
if __name__ == "__main__":