Added all

This commit is contained in:
Guillem Hernandez Sola
2026-04-23 16:20:37 +02:00
parent 3ca01dae8c
commit 243e5bad47
5 changed files with 500 additions and 579 deletions

269
batch-translate.sh Executable file
View File

@@ -0,0 +1,269 @@
#!/usr/bin/env bash
# ============================================================
# batch-translate.sh
# Batch manga OCR + translation for all images in a folder.
#
# Usage:
# ./batch-translate.sh <folder>
# ./batch-translate.sh <folder> --source en --target es
# ./batch-translate.sh <folder> --start 3 --end 7
# ./batch-translate.sh <folder> -s en -t fr --start 2
#
# Output per page lands in:
# <folder>/translated/<page_stem>/
# ├── bubbles.json
# ├── output.txt
# └── debug_clusters.png
# ============================================================
set -uo pipefail
# ─────────────────────────────────────────────────────────────
# CONFIGURATION
# ─────────────────────────────────────────────────────────────
SOURCE_LANG="en"
TARGET_LANG="ca"
START_PAGE=1
END_PAGE=999999
PYTHON_BIN="python"
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
TRANSLATOR="${SCRIPT_DIR}/manga-translator.py"
# ─────────────────────────────────────────────────────────────
# COLOURS
# ─────────────────────────────────────────────────────────────
RED='\033[0;31m'
GREEN='\033[0;32m'
YELLOW='\033[1;33m'
CYAN='\033[0;36m'
BOLD='\033[1m'
RESET='\033[0m'
# ─────────────────────────────────────────────────────────────
# HELPERS
# ─────────────────────────────────────────────────────────────
usage() {
echo ""
echo -e "${BOLD}Usage:${RESET}"
echo " $0 <folder> [options]"
echo ""
echo -e "${BOLD}Options:${RESET}"
echo " --source, -s Source language code (default: en)"
echo " --target, -t Target language code (default: ca)"
echo " --start First page number (default: 1)"
echo " --end Last page number (default: all)"
echo " --python Python binary (default: python)"
echo " --help, -h Show this help"
echo ""
echo -e "${BOLD}Examples:${RESET}"
echo " $0 pages-for-tests"
echo " $0 pages-for-tests --source en --target es"
echo " $0 pages-for-tests --start 3 --end 7"
echo " $0 pages-for-tests -s en -t fr --start 2"
echo ""
}
log_info() { echo -e "${CYAN} $*${RESET}"; }
log_ok() { echo -e "${GREEN}$*${RESET}"; }
log_warn() { echo -e "${YELLOW}⚠️ $*${RESET}"; }
log_error() { echo -e "${RED}$*${RESET}"; }
log_section() {
echo -e "\n${BOLD}${CYAN}══════════════════════════════════════════${RESET}"
echo -e "${BOLD}${CYAN} 📖 $*${RESET}"
echo -e "${BOLD}${CYAN}══════════════════════════════════════════${RESET}"
}
# ─────────────────────────────────────────────────────────────
# ARGUMENT PARSING
# ─────────────────────────────────────────────────────────────
if [[ $# -eq 0 ]]; then
log_error "No folder specified."
usage
exit 1
fi
FOLDER="$1"
shift
while [[ $# -gt 0 ]]; do
case "$1" in
--source|-s) SOURCE_LANG="$2"; shift 2 ;;
--target|-t) TARGET_LANG="$2"; shift 2 ;;
--start) START_PAGE="$2"; shift 2 ;;
--end) END_PAGE="$2"; shift 2 ;;
--python) PYTHON_BIN="$2"; shift 2 ;;
--help|-h) usage; exit 0 ;;
*)
log_error "Unknown option: $1"
usage
exit 1
;;
esac
done
# ─────────────────────────────────────────────────────────────
# VALIDATION
# ─────────────────────────────────────────────────────────────
if [[ ! -d "$FOLDER" ]]; then
log_error "Folder not found: $FOLDER"
exit 1
fi
if [[ ! -f "$TRANSLATOR" ]]; then
log_error "manga-translator.py not found at: $TRANSLATOR"
exit 1
fi
if ! command -v "$PYTHON_BIN" &>/dev/null; then
log_error "Python binary not found: $PYTHON_BIN"
log_error "Try --python python3"
exit 1
fi
# ─────────────────────────────────────────────────────────────
# PURGE BYTECODE CACHE
# ─────────────────────────────────────────────────────────────
log_info "🗑️ Purging Python bytecode caches..."
find "${SCRIPT_DIR}" -type d -name __pycache__ -exec rm -rf {} + 2>/dev/null || true
log_ok "Cache cleared."
# ─────────────────────────────────────────────────────────────
# DISCOVER IMAGES
# NOTE: uses while-read loop instead of mapfile for Bash 3.2
# compatibility (macOS default shell)
# ─────────────────────────────────────────────────────────────
ALL_IMAGES=()
while IFS= read -r -d '' img; do
ALL_IMAGES+=("$img")
done < <(
find "$FOLDER" -maxdepth 1 -type f \
\( -iname "*.jpg" -o -iname "*.jpeg" \
-o -iname "*.png" -o -iname "*.webp" \) \
-print0 | sort -z
)
TOTAL=${#ALL_IMAGES[@]}
if [[ $TOTAL -eq 0 ]]; then
log_error "No image files found in: $FOLDER"
exit 1
fi
# ─────────────────────────────────────────────────────────────
# SLICE TO REQUESTED PAGE RANGE (1-based)
# ─────────────────────────────────────────────────────────────
PAGES=()
for i in "${!ALL_IMAGES[@]}"; do
PAGE_NUM=$(( i + 1 ))
if [[ $PAGE_NUM -ge $START_PAGE && $PAGE_NUM -le $END_PAGE ]]; then
PAGES+=("${ALL_IMAGES[$i]}")
fi
done
if [[ ${#PAGES[@]} -eq 0 ]]; then
log_error "No pages in range [${START_PAGE}, ${END_PAGE}] (total: ${TOTAL})"
exit 1
fi
# ─────────────────────────────────────────────────────────────
# SUMMARY HEADER
# ─────────────────────────────────────────────────────────────
log_section "BATCH MANGA TRANSLATOR"
log_info "📂 Folder : $(realpath "$FOLDER")"
log_info "📄 Pages : ${#PAGES[@]} of ${TOTAL} total"
log_info "🔢 Range : ${START_PAGE}${END_PAGE}"
log_info "🌐 Source : ${SOURCE_LANG}"
log_info "🎯 Target : ${TARGET_LANG}"
log_info "💾 Output : ${FOLDER}/translated/<page>/"
echo ""
# ─────────────────────────────────────────────────────────────
# PROCESS EACH PAGE
# ─────────────────────────────────────────────────────────────
PASS=0
FAIL=0
FAIL_LIST=()
for i in "${!PAGES[@]}"; do
IMAGE="${PAGES[$i]}"
PAGE_NUM=$(( START_PAGE + i ))
STEM="$(basename "${IMAGE%.*}")"
WORKDIR="${FOLDER}/translated/${STEM}"
echo ""
echo -e "${BOLD}──────────────────────────────────────────${RESET}"
echo -e "${BOLD} 🖼️ [${PAGE_NUM}/${TOTAL}] ${STEM}${RESET}"
echo -e "${BOLD}──────────────────────────────────────────${RESET}"
mkdir -p "$WORKDIR"
OUTPUT_JSON="${WORKDIR}/bubbles.json"
OUTPUT_TXT="${WORKDIR}/output.txt"
OUTPUT_DEBUG="${WORKDIR}/debug_clusters.png"
log_info "🗂️ Image : $(basename "$IMAGE")"
log_info "📁 Out : ${WORKDIR}"
# ── Run the translator ────────────────────────────────────
if "$PYTHON_BIN" "$TRANSLATOR" \
"$IMAGE" \
--source "$SOURCE_LANG" \
--target "$TARGET_LANG" \
--json "$OUTPUT_JSON" \
--txt "$OUTPUT_TXT" \
--debug "$OUTPUT_DEBUG"; then
# Verify outputs exist and are non-empty
MISSING=0
for FNAME in "bubbles.json" "output.txt"; do
FPATH="${WORKDIR}/${FNAME}"
if [[ ! -f "$FPATH" || ! -s "$FPATH" ]]; then
log_warn "${FNAME} is missing or empty."
MISSING=$(( MISSING + 1 ))
else
SIZE=$(wc -c < "$FPATH" | tr -d ' ')
log_ok "${FNAME}${SIZE} bytes"
fi
done
if [[ -f "$OUTPUT_DEBUG" ]]; then
log_ok "debug_clusters.png written."
fi
if [[ $MISSING -eq 0 ]]; then
log_ok "Page ${PAGE_NUM} complete."
PASS=$(( PASS + 1 ))
else
log_warn "Page ${PAGE_NUM} finished with warnings."
FAIL=$(( FAIL + 1 ))
FAIL_LIST+=("${STEM}")
fi
else
log_error "Page ${PAGE_NUM} FAILED — check output above."
FAIL=$(( FAIL + 1 ))
FAIL_LIST+=("${STEM}")
fi
done
# ─────────────────────────────────────────────────────────────
# FINAL SUMMARY
# ─────────────────────────────────────────────────────────────
log_section "BATCH COMPLETE"
echo -e "${GREEN}Passed : ${PASS}${RESET}"
echo -e "${RED}Failed : ${FAIL}${RESET}"
if [[ ${#FAIL_LIST[@]} -gt 0 ]]; then
echo ""
log_warn "Failed pages:"
for NAME in "${FAIL_LIST[@]}"; do
echo -e "${RED}${NAME}${RESET}"
done
fi
echo ""
log_info "📦 Output folder: $(realpath "${FOLDER}/translated")"
echo ""
[[ $FAIL -eq 0 ]] && exit 0 || exit 1