From 60e989477153d403cb110c5e5cb85d14b48a33c7 Mon Sep 17 00:00:00 2001 From: Guillem Hernandez Sola Date: Fri, 10 Apr 2026 17:27:04 +0200 Subject: [PATCH] first commit --- .gitignore | 228 ++++++++++++++++++++++++++++++++++++++++++++ README.md | 0 manga-translator.py | 139 +++++++++++++++++++++++++++ 3 files changed, 367 insertions(+) create mode 100644 .gitignore create mode 100644 README.md create mode 100644 manga-translator.py diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..e5c7c08 --- /dev/null +++ b/.gitignore @@ -0,0 +1,228 @@ +# Created by https://www.toptal.com/developers/gitignore/api/python,visualstudiocode,macos +# Edit at https://www.toptal.com/developers/gitignore?templates=python,visualstudiocode,macos + +### macOS ### +# General +.DS_Store +.AppleDouble +.LSOverride + +# Icon must end with two \r +Icon + + +# Thumbnails +._* + +# Files that might appear in the root of a volume +.DocumentRevisions-V100 +.fseventsd +.Spotlight-V100 +.TemporaryItems +.Trashes +.VolumeIcon.icns +.com.apple.timemachine.donotpresent + +# Directories potentially created on remote AFP share +.AppleDB +.AppleDesktop +Network Trash Folder +Temporary Items +.apdisk + +### macOS Patch ### +# iCloud generated files +*.icloud + +### Python ### +# Byte-compiled / optimized / DLL files +__pycache__/ +*.py[cod] +*$py.class + +# C extensions +*.so + +# Distribution / packaging +.Python +build/ +develop-eggs/ +dist/ +downloads/ +eggs/ +.eggs/ +lib/ +lib64/ +parts/ +sdist/ +var/ +wheels/ +share/python-wheels/ +*.egg-info/ +.installed.cfg +*.egg +MANIFEST + +# PyInstaller +# Usually these files are written by a python script from a template +# before PyInstaller builds the exe, so as to inject date/other infos into it. +*.manifest +*.spec + +# Installer logs +pip-log.txt +pip-delete-this-directory.txt + +# Unit test / coverage reports +htmlcov/ +.tox/ +.nox/ +.coverage +.coverage.* +.cache +nosetests.xml +coverage.xml +*.cover +*.py,cover +.hypothesis/ +.pytest_cache/ +cover/ + +# Translations +*.mo +*.pot + +# Django stuff: +*.log +local_settings.py +db.sqlite3 +db.sqlite3-journal + +# Flask stuff: +instance/ +.webassets-cache + +# Scrapy stuff: +.scrapy + +# Sphinx documentation +docs/_build/ + +# PyBuilder +.pybuilder/ +target/ + +# Jupyter Notebook +.ipynb_checkpoints + +# IPython +profile_default/ +ipython_config.py + +# pyenv +# For a library or package, you might want to ignore these files since the code is +# intended to run in multiple environments; otherwise, check them in: +# .python-version + +# pipenv +# According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. +# However, in case of collaboration, if having platform-specific dependencies or dependencies +# having no cross-platform support, pipenv may install dependencies that don't work, or not +# install all needed dependencies. +#Pipfile.lock + +# poetry +# Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control. +# This is especially recommended for binary packages to ensure reproducibility, and is more +# commonly ignored for libraries. +# https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control +#poetry.lock + +# pdm +# Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control. +#pdm.lock +# pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it +# in version control. +# https://pdm.fming.dev/#use-with-ide +.pdm.toml + +# PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm +__pypackages__/ + +# Celery stuff +celerybeat-schedule +celerybeat.pid + +# SageMath parsed files +*.sage.py + +# Environments +.env +.venv +env/ +venv/ +ENV/ +env.bak/ +venv.bak/ + +# Spyder project settings +.spyderproject +.spyproject + +# Rope project settings +.ropeproject + +# mkdocs documentation +/site + +# mypy +.mypy_cache/ +.dmypy.json +dmypy.json + +# Pyre type checker +.pyre/ + +# pytype static type analyzer +.pytype/ + +# Cython debug symbols +cython_debug/ + +# PyCharm +# JetBrains specific template is maintained in a separate JetBrains.gitignore that can +# be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore +# and can be added to the global gitignore or merged into this file. For a more nuclear +# option (not recommended) you can uncomment the following to ignore the entire idea folder. +#.idea/ + +### Python Patch ### +# Poetry local configuration file - https://python-poetry.org/docs/configuration/#local-configuration +poetry.toml + +# ruff +.ruff_cache/ + +# LSP config files +pyrightconfig.json + +### VisualStudioCode ### +.vscode/* +!.vscode/settings.json +!.vscode/tasks.json +!.vscode/launch.json +!.vscode/extensions.json +!.vscode/*.code-snippets + +# Local History for Visual Studio Code +.history/ + +# Built Visual Studio Code Extensions +*.vsix + +### VisualStudioCode Patch ### +# Ignore all local history of files +.history +.ionide + +# End of https://www.toptal.com/developers/gitignore/api/python,visualstudiocode,macos diff --git a/README.md b/README.md new file mode 100644 index 0000000..e69de29 diff --git a/manga-translator.py b/manga-translator.py new file mode 100644 index 0000000..727dc5a --- /dev/null +++ b/manga-translator.py @@ -0,0 +1,139 @@ +import easyocr +from deep_translator import GoogleTranslator + + +# ───────────────────────────────────────────── +# LANGUAGE CODE REFERENCE +# ───────────────────────────────────────────── +SUPPORTED_LANGUAGES = { + "Vietnamese" : "vi", + "Japanese" : "ja", + "English" : "en", + "Spanish" : "es", + "Korean" : "ko", + "Chinese (Simplified)": "ch_sim", + "Chinese (Traditional)": "ch_tra", + "French" : "fr", + "German" : "de", + "Italian" : "it", + "Portuguese" : "pt", + "Arabic" : "ar", + "Russian" : "ru", + "Thai" : "th", + "Catalan" : "ca", # ← Added! +} + + +# ───────────────────────────────────────────── +# CORE FUNCTION +# ───────────────────────────────────────────── +def translate_manga_text( + image_path, + source_lang="vi", + target_lang="en", + confidence_threshold=0.3, + export_to_file=None, + paragraph_mode=False, +): + """ + Reads text from a manga/comic image using OCR and translates it. + + Args: + image_path : Path to your image file (PNG, JPG, etc.) + source_lang : Language code of the original text (default: 'vi') + target_lang : Language code to translate into (default: 'en') + confidence_threshold : Minimum OCR confidence to keep a result (default: 0.3) + export_to_file : Optional path to save output as .txt (default: None) + paragraph_mode : Group nearby text into paragraphs (default: False) + """ + + # ── 1. Initialize OCR reader ────────────────────────────────────────────── + print("Loading OCR model (first run downloads ~100MB, cached after)...") + + # EasyOCR doesn't support 'ca' (Catalan) natively for OCR — + # but Catalan uses the Latin alphabet, so 'en' + 'es' covers it perfectly. + ocr_lang = source_lang + if source_lang == "ca": + print(" ℹ️ Catalan detected: using Latin-script OCR (en+es) for best results.") + ocr_lang_list = ["en", "es"] + else: + ocr_lang_list = [source_lang] + + reader = easyocr.Reader(ocr_lang_list) + + # ── 2. Initialize translator ────────────────────────────────────────────── + translator = GoogleTranslator(source=source_lang, target=target_lang) + + # ── 3. Run OCR ──────────────────────────────────────────────────────────── + print(f"Scanning image: {image_path}\n") + results = reader.readtext(image_path, paragraph=paragraph_mode) + + # ── 4. Filter & translate ───────────────────────────────────────────────── + header = f"{'#':<5} {'ORIGINAL TEXT':<45} {'TRANSLATED TEXT'}" + divider = "─" * 90 + + output_lines = [header, divider] + + print(header) + print(divider) + + count = 0 + for i, (bbox, text, confidence) in enumerate(results, start=1): + + # Skip low-confidence detections (noise, borders, artifacts) + if confidence < confidence_threshold: + continue + + count += 1 + + try: + translated = translator.translate(text) + except Exception as e: + translated = f"[Translation error: {e}]" + + line = f"{count:<5} {text:<45} {translated}" + print(line) + output_lines.append(line) + + output_lines.append(divider) + output_lines.append(f"✅ Done! {count} text block(s) detected and translated.") + + print(divider) + print(f"✅ Done! {count} text block(s) detected and translated.") + + # ── 5. Optional: export to file ─────────────────────────────────────────── + if export_to_file: + with open(export_to_file, "w", encoding="utf-8") as f: + f.write("\n".join(output_lines)) + print(f"📄 Output saved to: {export_to_file}") + + +# ───────────────────────────────────────────── +# HELPER: print all supported languages +# ───────────────────────────────────────────── +def list_languages(): + """Prints all supported language names and their codes.""" + print(f"\n{'LANGUAGE':<30} {'CODE'}") + print("─" * 40) + for name, code in SUPPORTED_LANGUAGES.items(): + print(f"{name:<30} {code}") + print("─" * 40) + + +# ───────────────────────────────────────────── +# ENTRY POINT — edit these values and run! +# ───────────────────────────────────────────── +if __name__ == "__main__": + + # 🔧 Configure your run here: + translate_manga_text( + image_path = "page.png", # ← your image here + source_lang = "vi", # ← original language + target_lang = "en", # ← target language + confidence_threshold = 0.3, # ← raise to 0.5 for noisy images + export_to_file = "output.txt", # ← set None to skip saving + paragraph_mode = False, # ← True groups nearby lines + ) + + # Uncomment to see all supported languages: + # list_languages() \ No newline at end of file