Files
manga-translator/batch-translate.sh
Guillem Hernandez Sola 243e5bad47 Added all
2026-04-23 16:20:37 +02:00

269 lines
11 KiB
Bash
Executable File
Raw Blame History

This file contains invisible Unicode characters
This file contains invisible Unicode characters that are indistinguishable to humans but may be processed differently by a computer. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
#!/usr/bin/env bash
# ============================================================
# batch-translate.sh
# Batch manga OCR + translation for all images in a folder.
#
# Usage:
# ./batch-translate.sh <folder>
# ./batch-translate.sh <folder> --source en --target es
# ./batch-translate.sh <folder> --start 3 --end 7
# ./batch-translate.sh <folder> -s en -t fr --start 2
#
# Output per page lands in:
# <folder>/translated/<page_stem>/
# ├── bubbles.json
# ├── output.txt
# └── debug_clusters.png
# ============================================================
set -uo pipefail
# ─────────────────────────────────────────────────────────────
# CONFIGURATION
# ─────────────────────────────────────────────────────────────
SOURCE_LANG="en"
TARGET_LANG="ca"
START_PAGE=1
END_PAGE=999999
PYTHON_BIN="python"
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
TRANSLATOR="${SCRIPT_DIR}/manga-translator.py"
# ─────────────────────────────────────────────────────────────
# COLOURS
# ─────────────────────────────────────────────────────────────
RED='\033[0;31m'
GREEN='\033[0;32m'
YELLOW='\033[1;33m'
CYAN='\033[0;36m'
BOLD='\033[1m'
RESET='\033[0m'
# ─────────────────────────────────────────────────────────────
# HELPERS
# ─────────────────────────────────────────────────────────────
usage() {
echo ""
echo -e "${BOLD}Usage:${RESET}"
echo " $0 <folder> [options]"
echo ""
echo -e "${BOLD}Options:${RESET}"
echo " --source, -s Source language code (default: en)"
echo " --target, -t Target language code (default: ca)"
echo " --start First page number (default: 1)"
echo " --end Last page number (default: all)"
echo " --python Python binary (default: python)"
echo " --help, -h Show this help"
echo ""
echo -e "${BOLD}Examples:${RESET}"
echo " $0 pages-for-tests"
echo " $0 pages-for-tests --source en --target es"
echo " $0 pages-for-tests --start 3 --end 7"
echo " $0 pages-for-tests -s en -t fr --start 2"
echo ""
}
log_info() { echo -e "${CYAN} $*${RESET}"; }
log_ok() { echo -e "${GREEN}$*${RESET}"; }
log_warn() { echo -e "${YELLOW}⚠️ $*${RESET}"; }
log_error() { echo -e "${RED}$*${RESET}"; }
log_section() {
echo -e "\n${BOLD}${CYAN}══════════════════════════════════════════${RESET}"
echo -e "${BOLD}${CYAN} 📖 $*${RESET}"
echo -e "${BOLD}${CYAN}══════════════════════════════════════════${RESET}"
}
# ─────────────────────────────────────────────────────────────
# ARGUMENT PARSING
# ─────────────────────────────────────────────────────────────
if [[ $# -eq 0 ]]; then
log_error "No folder specified."
usage
exit 1
fi
FOLDER="$1"
shift
while [[ $# -gt 0 ]]; do
case "$1" in
--source|-s) SOURCE_LANG="$2"; shift 2 ;;
--target|-t) TARGET_LANG="$2"; shift 2 ;;
--start) START_PAGE="$2"; shift 2 ;;
--end) END_PAGE="$2"; shift 2 ;;
--python) PYTHON_BIN="$2"; shift 2 ;;
--help|-h) usage; exit 0 ;;
*)
log_error "Unknown option: $1"
usage
exit 1
;;
esac
done
# ─────────────────────────────────────────────────────────────
# VALIDATION
# ─────────────────────────────────────────────────────────────
if [[ ! -d "$FOLDER" ]]; then
log_error "Folder not found: $FOLDER"
exit 1
fi
if [[ ! -f "$TRANSLATOR" ]]; then
log_error "manga-translator.py not found at: $TRANSLATOR"
exit 1
fi
if ! command -v "$PYTHON_BIN" &>/dev/null; then
log_error "Python binary not found: $PYTHON_BIN"
log_error "Try --python python3"
exit 1
fi
# ─────────────────────────────────────────────────────────────
# PURGE BYTECODE CACHE
# ─────────────────────────────────────────────────────────────
log_info "🗑️ Purging Python bytecode caches..."
find "${SCRIPT_DIR}" -type d -name __pycache__ -exec rm -rf {} + 2>/dev/null || true
log_ok "Cache cleared."
# ─────────────────────────────────────────────────────────────
# DISCOVER IMAGES
# NOTE: uses while-read loop instead of mapfile for Bash 3.2
# compatibility (macOS default shell)
# ─────────────────────────────────────────────────────────────
ALL_IMAGES=()
while IFS= read -r -d '' img; do
ALL_IMAGES+=("$img")
done < <(
find "$FOLDER" -maxdepth 1 -type f \
\( -iname "*.jpg" -o -iname "*.jpeg" \
-o -iname "*.png" -o -iname "*.webp" \) \
-print0 | sort -z
)
TOTAL=${#ALL_IMAGES[@]}
if [[ $TOTAL -eq 0 ]]; then
log_error "No image files found in: $FOLDER"
exit 1
fi
# ─────────────────────────────────────────────────────────────
# SLICE TO REQUESTED PAGE RANGE (1-based)
# ─────────────────────────────────────────────────────────────
PAGES=()
for i in "${!ALL_IMAGES[@]}"; do
PAGE_NUM=$(( i + 1 ))
if [[ $PAGE_NUM -ge $START_PAGE && $PAGE_NUM -le $END_PAGE ]]; then
PAGES+=("${ALL_IMAGES[$i]}")
fi
done
if [[ ${#PAGES[@]} -eq 0 ]]; then
log_error "No pages in range [${START_PAGE}, ${END_PAGE}] (total: ${TOTAL})"
exit 1
fi
# ─────────────────────────────────────────────────────────────
# SUMMARY HEADER
# ─────────────────────────────────────────────────────────────
log_section "BATCH MANGA TRANSLATOR"
log_info "📂 Folder : $(realpath "$FOLDER")"
log_info "📄 Pages : ${#PAGES[@]} of ${TOTAL} total"
log_info "🔢 Range : ${START_PAGE}${END_PAGE}"
log_info "🌐 Source : ${SOURCE_LANG}"
log_info "🎯 Target : ${TARGET_LANG}"
log_info "💾 Output : ${FOLDER}/translated/<page>/"
echo ""
# ─────────────────────────────────────────────────────────────
# PROCESS EACH PAGE
# ─────────────────────────────────────────────────────────────
PASS=0
FAIL=0
FAIL_LIST=()
for i in "${!PAGES[@]}"; do
IMAGE="${PAGES[$i]}"
PAGE_NUM=$(( START_PAGE + i ))
STEM="$(basename "${IMAGE%.*}")"
WORKDIR="${FOLDER}/translated/${STEM}"
echo ""
echo -e "${BOLD}──────────────────────────────────────────${RESET}"
echo -e "${BOLD} 🖼️ [${PAGE_NUM}/${TOTAL}] ${STEM}${RESET}"
echo -e "${BOLD}──────────────────────────────────────────${RESET}"
mkdir -p "$WORKDIR"
OUTPUT_JSON="${WORKDIR}/bubbles.json"
OUTPUT_TXT="${WORKDIR}/output.txt"
OUTPUT_DEBUG="${WORKDIR}/debug_clusters.png"
log_info "🗂️ Image : $(basename "$IMAGE")"
log_info "📁 Out : ${WORKDIR}"
# ── Run the translator ────────────────────────────────────
if "$PYTHON_BIN" "$TRANSLATOR" \
"$IMAGE" \
--source "$SOURCE_LANG" \
--target "$TARGET_LANG" \
--json "$OUTPUT_JSON" \
--txt "$OUTPUT_TXT" \
--debug "$OUTPUT_DEBUG"; then
# Verify outputs exist and are non-empty
MISSING=0
for FNAME in "bubbles.json" "output.txt"; do
FPATH="${WORKDIR}/${FNAME}"
if [[ ! -f "$FPATH" || ! -s "$FPATH" ]]; then
log_warn "${FNAME} is missing or empty."
MISSING=$(( MISSING + 1 ))
else
SIZE=$(wc -c < "$FPATH" | tr -d ' ')
log_ok "${FNAME}${SIZE} bytes"
fi
done
if [[ -f "$OUTPUT_DEBUG" ]]; then
log_ok "debug_clusters.png written."
fi
if [[ $MISSING -eq 0 ]]; then
log_ok "Page ${PAGE_NUM} complete."
PASS=$(( PASS + 1 ))
else
log_warn "Page ${PAGE_NUM} finished with warnings."
FAIL=$(( FAIL + 1 ))
FAIL_LIST+=("${STEM}")
fi
else
log_error "Page ${PAGE_NUM} FAILED — check output above."
FAIL=$(( FAIL + 1 ))
FAIL_LIST+=("${STEM}")
fi
done
# ─────────────────────────────────────────────────────────────
# FINAL SUMMARY
# ─────────────────────────────────────────────────────────────
log_section "BATCH COMPLETE"
echo -e "${GREEN}Passed : ${PASS}${RESET}"
echo -e "${RED}Failed : ${FAIL}${RESET}"
if [[ ${#FAIL_LIST[@]} -gt 0 ]]; then
echo ""
log_warn "Failed pages:"
for NAME in "${FAIL_LIST[@]}"; do
echo -e "${RED}${NAME}${RESET}"
done
fi
echo ""
log_info "📦 Output folder: $(realpath "${FOLDER}/translated")"
echo ""
[[ $FAIL -eq 0 ]] && exit 0 || exit 1