#!/usr/bin/env bash # ============================================================ # batch-translate.sh # Batch manga OCR + translation for all images in a folder. # # Usage: # ./batch-translate.sh # ./batch-translate.sh --source en --target es # ./batch-translate.sh --start 3 --end 7 # ./batch-translate.sh -s en -t fr --start 2 # # Output per page lands in: # /translated// # ├── bubbles.json # ├── output.txt # └── debug_clusters.png # ============================================================ set -uo pipefail # ───────────────────────────────────────────────────────────── # CONFIGURATION # ───────────────────────────────────────────────────────────── SOURCE_LANG="en" TARGET_LANG="ca" START_PAGE=1 END_PAGE=999999 PYTHON_BIN="python" SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" TRANSLATOR="${SCRIPT_DIR}/manga-translator.py" # ───────────────────────────────────────────────────────────── # COLOURS # ───────────────────────────────────────────────────────────── RED='\033[0;31m' GREEN='\033[0;32m' YELLOW='\033[1;33m' CYAN='\033[0;36m' BOLD='\033[1m' RESET='\033[0m' # ───────────────────────────────────────────────────────────── # HELPERS # ───────────────────────────────────────────────────────────── usage() { echo "" echo -e "${BOLD}Usage:${RESET}" echo " $0 [options]" echo "" echo -e "${BOLD}Options:${RESET}" echo " --source, -s Source language code (default: en)" echo " --target, -t Target language code (default: ca)" echo " --start First page number (default: 1)" echo " --end Last page number (default: all)" echo " --python Python binary (default: python)" echo " --help, -h Show this help" echo "" echo -e "${BOLD}Examples:${RESET}" echo " $0 pages-for-tests" echo " $0 pages-for-tests --source en --target es" echo " $0 pages-for-tests --start 3 --end 7" echo " $0 pages-for-tests -s en -t fr --start 2" echo "" } log_info() { echo -e "${CYAN}ℹ️ $*${RESET}"; } log_ok() { echo -e "${GREEN}✅ $*${RESET}"; } log_warn() { echo -e "${YELLOW}⚠️ $*${RESET}"; } log_error() { echo -e "${RED}❌ $*${RESET}"; } log_section() { echo -e "\n${BOLD}${CYAN}══════════════════════════════════════════${RESET}" echo -e "${BOLD}${CYAN} 📖 $*${RESET}" echo -e "${BOLD}${CYAN}══════════════════════════════════════════${RESET}" } # ───────────────────────────────────────────────────────────── # ARGUMENT PARSING # ───────────────────────────────────────────────────────────── if [[ $# -eq 0 ]]; then log_error "No folder specified." usage exit 1 fi FOLDER="$1" shift while [[ $# -gt 0 ]]; do case "$1" in --source|-s) SOURCE_LANG="$2"; shift 2 ;; --target|-t) TARGET_LANG="$2"; shift 2 ;; --start) START_PAGE="$2"; shift 2 ;; --end) END_PAGE="$2"; shift 2 ;; --python) PYTHON_BIN="$2"; shift 2 ;; --help|-h) usage; exit 0 ;; *) log_error "Unknown option: $1" usage exit 1 ;; esac done # ───────────────────────────────────────────────────────────── # VALIDATION # ───────────────────────────────────────────────────────────── if [[ ! -d "$FOLDER" ]]; then log_error "Folder not found: $FOLDER" exit 1 fi if [[ ! -f "$TRANSLATOR" ]]; then log_error "manga-translator.py not found at: $TRANSLATOR" exit 1 fi if ! command -v "$PYTHON_BIN" &>/dev/null; then log_error "Python binary not found: $PYTHON_BIN" log_error "Try --python python3" exit 1 fi # ───────────────────────────────────────────────────────────── # PURGE BYTECODE CACHE # ───────────────────────────────────────────────────────────── log_info "🗑️ Purging Python bytecode caches..." find "${SCRIPT_DIR}" -type d -name __pycache__ -exec rm -rf {} + 2>/dev/null || true log_ok "Cache cleared." # ───────────────────────────────────────────────────────────── # DISCOVER IMAGES # NOTE: uses while-read loop instead of mapfile for Bash 3.2 # compatibility (macOS default shell) # ───────────────────────────────────────────────────────────── ALL_IMAGES=() while IFS= read -r -d '' img; do ALL_IMAGES+=("$img") done < <( find "$FOLDER" -maxdepth 1 -type f \ \( -iname "*.jpg" -o -iname "*.jpeg" \ -o -iname "*.png" -o -iname "*.webp" \) \ -print0 | sort -z ) TOTAL=${#ALL_IMAGES[@]} if [[ $TOTAL -eq 0 ]]; then log_error "No image files found in: $FOLDER" exit 1 fi # ───────────────────────────────────────────────────────────── # SLICE TO REQUESTED PAGE RANGE (1-based) # ───────────────────────────────────────────────────────────── PAGES=() for i in "${!ALL_IMAGES[@]}"; do PAGE_NUM=$(( i + 1 )) if [[ $PAGE_NUM -ge $START_PAGE && $PAGE_NUM -le $END_PAGE ]]; then PAGES+=("${ALL_IMAGES[$i]}") fi done if [[ ${#PAGES[@]} -eq 0 ]]; then log_error "No pages in range [${START_PAGE}, ${END_PAGE}] (total: ${TOTAL})" exit 1 fi # ───────────────────────────────────────────────────────────── # SUMMARY HEADER # ───────────────────────────────────────────────────────────── log_section "BATCH MANGA TRANSLATOR" log_info "📂 Folder : $(realpath "$FOLDER")" log_info "📄 Pages : ${#PAGES[@]} of ${TOTAL} total" log_info "🔢 Range : ${START_PAGE} → ${END_PAGE}" log_info "🌐 Source : ${SOURCE_LANG}" log_info "🎯 Target : ${TARGET_LANG}" log_info "💾 Output : ${FOLDER}/translated//" echo "" # ───────────────────────────────────────────────────────────── # PROCESS EACH PAGE # ───────────────────────────────────────────────────────────── PASS=0 FAIL=0 FAIL_LIST=() for i in "${!PAGES[@]}"; do IMAGE="${PAGES[$i]}" PAGE_NUM=$(( START_PAGE + i )) STEM="$(basename "${IMAGE%.*}")" WORKDIR="${FOLDER}/translated/${STEM}" echo "" echo -e "${BOLD}──────────────────────────────────────────${RESET}" echo -e "${BOLD} 🖼️ [${PAGE_NUM}/${TOTAL}] ${STEM}${RESET}" echo -e "${BOLD}──────────────────────────────────────────${RESET}" mkdir -p "$WORKDIR" OUTPUT_JSON="${WORKDIR}/bubbles.json" OUTPUT_TXT="${WORKDIR}/output.txt" OUTPUT_DEBUG="${WORKDIR}/debug_clusters.png" log_info "🗂️ Image : $(basename "$IMAGE")" log_info "📁 Out : ${WORKDIR}" # ── Run the translator ──────────────────────────────────── if "$PYTHON_BIN" "$TRANSLATOR" \ "$IMAGE" \ --source "$SOURCE_LANG" \ --target "$TARGET_LANG" \ --json "$OUTPUT_JSON" \ --txt "$OUTPUT_TXT" \ --debug "$OUTPUT_DEBUG"; then # Verify outputs exist and are non-empty MISSING=0 for FNAME in "bubbles.json" "output.txt"; do FPATH="${WORKDIR}/${FNAME}" if [[ ! -f "$FPATH" || ! -s "$FPATH" ]]; then log_warn "${FNAME} is missing or empty." MISSING=$(( MISSING + 1 )) else SIZE=$(wc -c < "$FPATH" | tr -d ' ') log_ok "${FNAME} → ${SIZE} bytes" fi done if [[ -f "$OUTPUT_DEBUG" ]]; then log_ok "debug_clusters.png written." fi if [[ $MISSING -eq 0 ]]; then log_ok "Page ${PAGE_NUM} complete." PASS=$(( PASS + 1 )) else log_warn "Page ${PAGE_NUM} finished with warnings." FAIL=$(( FAIL + 1 )) FAIL_LIST+=("${STEM}") fi else log_error "Page ${PAGE_NUM} FAILED — check output above." FAIL=$(( FAIL + 1 )) FAIL_LIST+=("${STEM}") fi done # ───────────────────────────────────────────────────────────── # FINAL SUMMARY # ───────────────────────────────────────────────────────────── log_section "BATCH COMPLETE" echo -e " ✅ ${GREEN}Passed : ${PASS}${RESET}" echo -e " ❌ ${RED}Failed : ${FAIL}${RESET}" if [[ ${#FAIL_LIST[@]} -gt 0 ]]; then echo "" log_warn "Failed pages:" for NAME in "${FAIL_LIST[@]}"; do echo -e " ❌ ${RED}${NAME}${RESET}" done fi echo "" log_info "📦 Output folder: $(realpath "${FOLDER}/translated")" echo "" [[ $FAIL -eq 0 ]] && exit 0 || exit 1