Added all
This commit is contained in:
177
.gitignore
vendored
Normal file
177
.gitignore
vendored
Normal file
@@ -0,0 +1,177 @@
|
||||
# Created by https://www.toptal.com/developers/gitignore/api/intellij+all,zsh
|
||||
# Edit at https://www.toptal.com/developers/gitignore?templates=intellij+all,zsh
|
||||
media/downloads
|
||||
media/xhs_videos
|
||||
### Intellij+all ###
|
||||
# Covers JetBrains IDEs: IntelliJ, RubyMine, PhpStorm, AppCode, PyCharm, CLion, Android Studio, WebStorm and Rider
|
||||
# Reference: https://intellij-support.jetbrains.com/hc/en-us/articles/206544839
|
||||
.DS_Store
|
||||
# User-specific stuff
|
||||
.idea/**/workspace.xml
|
||||
.idea/**/tasks.xml
|
||||
.idea/**/usage.statistics.xml
|
||||
.idea/**/dictionaries
|
||||
.idea/**/shelf
|
||||
|
||||
# AWS User-specific
|
||||
.idea/**/aws.xml
|
||||
|
||||
*.txt
|
||||
*.csv
|
||||
*.xlsx
|
||||
*.xls
|
||||
.env
|
||||
*.env
|
||||
|
||||
# Generated files
|
||||
.idea/**/contentModel.xml
|
||||
|
||||
# Sensitive or high-churn files
|
||||
.idea/**/dataSources/
|
||||
.idea/**/dataSources.ids
|
||||
.idea/**/dataSources.local.xml
|
||||
.idea/**/sqlDataSources.xml
|
||||
.idea/**/dynamic.xml
|
||||
.idea/**/uiDesigner.xml
|
||||
.idea/**/dbnavigator.xml
|
||||
|
||||
# Gradle
|
||||
.idea/**/gradle.xml
|
||||
.idea/**/libraries
|
||||
|
||||
# Gradle and Maven with auto-import
|
||||
# When using Gradle or Maven with auto-import, you should exclude module files,
|
||||
# since they will be recreated, and may cause churn. Uncomment if using
|
||||
# auto-import.
|
||||
# .idea/artifacts
|
||||
# .idea/compiler.xml
|
||||
# .idea/jarRepositories.xml
|
||||
# .idea/modules.xml
|
||||
# .idea/*.iml
|
||||
# .idea/modules
|
||||
# *.iml
|
||||
# *.ipr
|
||||
|
||||
# CMake
|
||||
cmake-build-*/
|
||||
|
||||
# Mongo Explorer plugin
|
||||
.idea/**/mongoSettings.xml
|
||||
|
||||
# File-based project format
|
||||
*.iws
|
||||
|
||||
# IntelliJ
|
||||
out/
|
||||
|
||||
# mpeltonen/sbt-idea plugin
|
||||
.idea_modules/
|
||||
|
||||
# JIRA plugin
|
||||
atlassian-ide-plugin.xml
|
||||
|
||||
# Cursive Clojure plugin
|
||||
.idea/replstate.xml
|
||||
|
||||
# SonarLint plugin
|
||||
.idea/sonarlint/
|
||||
|
||||
# Crashlytics plugin (for Android Studio and IntelliJ)
|
||||
com_crashlytics_export_strings.xml
|
||||
crashlytics.properties
|
||||
crashlytics-build.properties
|
||||
fabric.properties
|
||||
|
||||
# Editor-based Rest Client
|
||||
.idea/httpRequests
|
||||
|
||||
# Android studio 3.1+ serialized cache file
|
||||
.idea/caches/build_file_checksums.ser
|
||||
|
||||
### Intellij+all Patch ###
|
||||
# Ignore everything but code style settings and run configurations
|
||||
# that are supposed to be shared within teams.
|
||||
|
||||
.idea/*
|
||||
|
||||
!.idea/codeStyles
|
||||
!.idea/runConfigurations
|
||||
|
||||
### Zsh ###
|
||||
# Zsh compiled script + zrecompile backup
|
||||
*.zwc
|
||||
*.zwc.old
|
||||
|
||||
# Zsh completion-optimization dumpfile
|
||||
*zcompdump*
|
||||
|
||||
# Zsh history
|
||||
.zsh_history
|
||||
|
||||
# Zsh sessions
|
||||
.zsh_sessions
|
||||
|
||||
# Zsh zcalc history
|
||||
.zcalc_history
|
||||
|
||||
# A popular plugin manager's files
|
||||
._zinit
|
||||
.zinit_lstupd
|
||||
|
||||
# zdharma/zshelldoc tool's files
|
||||
zsdoc/data
|
||||
|
||||
# robbyrussell/oh-my-zsh/plugins/per-directory-history plugin's files
|
||||
# (when set-up to store the history in the local directory)
|
||||
.directory_history
|
||||
|
||||
# MichaelAquilina/zsh-autoswitch-virtualenv plugin's files
|
||||
# (for Zsh plugins using Python)
|
||||
.venv
|
||||
|
||||
# Zunit tests' output
|
||||
/tests/_output/*
|
||||
!/tests/_output/.gitkeep
|
||||
|
||||
# End of https://www.toptal.com/developers/gitignore/api/intellij+all,zsh
|
||||
|
||||
downloads/
|
||||
|
||||
# Created by https://www.toptal.com/developers/gitignore/api/macos
|
||||
# Edit at https://www.toptal.com/developers/gitignore?templates=macos
|
||||
|
||||
### macOS ###
|
||||
# General
|
||||
.DS_Store
|
||||
.AppleDouble
|
||||
.LSOverride
|
||||
|
||||
# Icon must end with two \r
|
||||
Icon
|
||||
|
||||
|
||||
# Thumbnails
|
||||
._*
|
||||
|
||||
# Files that might appear in the root of a volume
|
||||
.DocumentRevisions-V100
|
||||
.fseventsd
|
||||
.Spotlight-V100
|
||||
.TemporaryItems
|
||||
.Trashes
|
||||
.VolumeIcon.icns
|
||||
.com.apple.timemachine.donotpresent
|
||||
|
||||
# Directories potentially created on remote AFP share
|
||||
.AppleDB
|
||||
.AppleDesktop
|
||||
Network Trash Folder
|
||||
Temporary Items
|
||||
.apdisk
|
||||
|
||||
### macOS Patch ###
|
||||
# iCloud generated files
|
||||
*.icloud
|
||||
*.csv
|
||||
|
||||
# End of https://www.toptal.com/developers/gitignore/api/macos
|
||||
9
file-management/buscar_grandes.sh
Normal file
9
file-management/buscar_grandes.sh
Normal file
@@ -0,0 +1,9 @@
|
||||
#!/bin/bash
|
||||
|
||||
# Directorio base desde donde buscar (puedes cambiarlo o usar el argumento del script)
|
||||
DIRECTORIO=${1:-.}
|
||||
|
||||
# Encuentra archivos mayores a 1GB y muestra su tamaño ordenado
|
||||
echo "Archivos mayores a 1GB en el directorio: $DIRECTORIO"
|
||||
find "$DIRECTORIO" -type f -size +1G -exec du -h {} + | sort -hr
|
||||
echo "Búsqueda completada."
|
||||
57
file-management/filtrar_excel_per_csv.py
Normal file
57
file-management/filtrar_excel_per_csv.py
Normal file
@@ -0,0 +1,57 @@
|
||||
import pandas as pd
|
||||
import os
|
||||
import sys
|
||||
|
||||
print("🔍 --- Filtrant Excel segons usuaris actius del CSV ---")
|
||||
|
||||
# Noms dels fitxers
|
||||
fitxer_csv = "Newsletter_Combinat_Final_Actius.csv"
|
||||
fitxer_excel = "Contactos_Unificats.xlsx"
|
||||
fitxer_resultat = "Contactos_Unificats_Filtrats.xlsx"
|
||||
|
||||
# Comprovem que existeixen
|
||||
if not os.path.exists(fitxer_csv) or not os.path.exists(fitxer_excel):
|
||||
print("❌ Error: Assegura't que els fitxers CSV i Excel són a la mateixa carpeta.")
|
||||
sys.exit(1)
|
||||
|
||||
try:
|
||||
print("⏳ Llegint els usuaris actius del CSV...")
|
||||
# Llegim el CSV (sabem que està separat per punt i coma)
|
||||
df_csv = pd.read_csv(fitxer_csv, sep=';', encoding='utf-8')
|
||||
|
||||
# Extraiem els correus de la columna 'Email_ID', els passem a minúscules i traiem espais en blanc
|
||||
# per assegurar-nos que coincideixen perfectament
|
||||
correus_actius = set(df_csv['Email_ID'].dropna().astype(str).str.lower().str.strip())
|
||||
print(f"✅ S'han carregat {len(correus_actius)} correus únics del CSV.")
|
||||
|
||||
print("⏳ Processant l'Excel pestanya per pestanya...")
|
||||
# Obrim l'Excel original i preparem el nou per escriure
|
||||
excel_original = pd.ExcelFile(fitxer_excel)
|
||||
|
||||
with pd.ExcelWriter(fitxer_resultat, engine='openpyxl') as writer:
|
||||
for nom_pestanya in excel_original.sheet_names:
|
||||
# Llegim la pestanya actual
|
||||
df_pestanya = pd.read_excel(excel_original, sheet_name=nom_pestanya)
|
||||
total_inicial = len(df_pestanya)
|
||||
|
||||
# Normalitzem la columna 'email' de l'Excel per poder comparar-la
|
||||
if 'email' in df_pestanya.columns:
|
||||
correus_excel = df_pestanya['email'].astype(str).str.lower().str.strip()
|
||||
|
||||
# Filtrem: ens quedem només amb les files on el correu estigui a la nostra llista d'actius
|
||||
df_filtrat = df_pestanya[correus_excel.isin(correus_actius)]
|
||||
total_final = len(df_filtrat)
|
||||
|
||||
print(f" 👉 Pestanya '{nom_pestanya}': s'han mantingut {total_final} de {total_inicial} contactes.")
|
||||
else:
|
||||
print(f" ⚠️ Avís: La pestanya '{nom_pestanya}' no té cap columna anomenada 'email'. Es deixa intacta.")
|
||||
df_filtrat = df_pestanya
|
||||
|
||||
# Guardem el resultat a la mateixa pestanya del nou fitxer
|
||||
df_filtrat.to_excel(writer, sheet_name=nom_pestanya, index=False)
|
||||
|
||||
print(f"\n🎉 Procés completat amb èxit!")
|
||||
print(f"📄 S'ha creat el fitxer amb els contactes filtrats: {fitxer_resultat}")
|
||||
|
||||
except Exception as e:
|
||||
print(f"❌ S'ha produït un error: {e}")
|
||||
67
file-management/juntar_csv.py
Normal file
67
file-management/juntar_csv.py
Normal file
@@ -0,0 +1,67 @@
|
||||
import csv
|
||||
import glob
|
||||
import os
|
||||
import sys
|
||||
|
||||
# 1. Comprovem si s'ha passat la ruta com a argument
|
||||
if len(sys.argv) < 2:
|
||||
print("❌ Error: Has d'indicar la ruta de la carpeta.")
|
||||
print("💡 Ús correcte: python juntar_csv.py /ruta/de/la/carpeta")
|
||||
sys.exit(1)
|
||||
|
||||
# 2. Agafem la ruta (el primer argument després del nom de l'script)
|
||||
carpeta_origen = sys.argv[1]
|
||||
|
||||
# Netegem possibles cometes que la terminal hagi afegit
|
||||
carpeta_origen = carpeta_origen.strip("'").strip('"').strip()
|
||||
|
||||
# 3. Comprovem que la carpeta existeix
|
||||
if not os.path.isdir(carpeta_origen):
|
||||
print(f"❌ Error: La ruta '{carpeta_origen}' no és vàlida o no és una carpeta.")
|
||||
sys.exit(1)
|
||||
|
||||
print(f"📁 Cercant fitxers CSV a: {carpeta_origen}")
|
||||
|
||||
ruta_cerca = os.path.join(carpeta_origen, "*.csv")
|
||||
fitxers_csv = glob.glob(ruta_cerca)
|
||||
|
||||
# Definim el nom del fitxer final
|
||||
fitxer_resultat = os.path.join(carpeta_origen, "Newsletter_Combinat_Final.csv")
|
||||
|
||||
if not fitxers_csv:
|
||||
print("⚠️ No s'han trobat fitxers CSV a la carpeta indicada.")
|
||||
else:
|
||||
# Filtrem per no incloure el fitxer resultat si ja existeix d'una execució anterior
|
||||
fitxers_a_processar = [f for f in fitxers_csv if f != fitxer_resultat]
|
||||
|
||||
if not fitxers_a_processar:
|
||||
print("⚠️ Només s'ha trobat el fitxer combinat anterior. No hi ha res de nou per unir.")
|
||||
sys.exit(0)
|
||||
|
||||
print(f"🔄 S'han trobat {len(fitxers_a_processar)} fitxers. Processant...")
|
||||
|
||||
# 4. Unifiquem els fitxers
|
||||
with open(fitxer_resultat, 'w', newline='', encoding='utf-8') as sortida:
|
||||
escrivent = csv.writer(sortida, delimiter=';')
|
||||
|
||||
fitxers_processats = 0
|
||||
for nom_fitxer in fitxers_a_processar:
|
||||
with open(nom_fitxer, 'r', encoding='utf-8') as f:
|
||||
lector = csv.reader(f, delimiter=';')
|
||||
try:
|
||||
capcalera = next(lector)
|
||||
except StopIteration:
|
||||
continue # Saltem el fitxer si està completament buit
|
||||
|
||||
# Escrivim la capçalera només pel primer fitxer vàlid
|
||||
if fitxers_processats == 0:
|
||||
escrivent.writerow(capcalera)
|
||||
|
||||
# Escrivim la resta de dades
|
||||
for fila in lector:
|
||||
escrivent.writerow(fila)
|
||||
|
||||
fitxers_processats += 1
|
||||
|
||||
print(f"✅ Èxit! S'han combinat {fitxers_processats} fitxers.")
|
||||
print(f"📄 Fitxer creat a: {fitxer_resultat}")
|
||||
73
file-management/mautic.py
Normal file
73
file-management/mautic.py
Normal file
@@ -0,0 +1,73 @@
|
||||
import pandas as pd
|
||||
import csv
|
||||
|
||||
def filtrar_i_consolidar_etiquetes(tags_str):
|
||||
"""Filtra i agrupa les etiquetes segons les regles de negoci."""
|
||||
if pd.isna(tags_str) or str(tags_str).strip().lower() == 'nan':
|
||||
return ''
|
||||
|
||||
# 1. Separem les etiquetes i les netegem
|
||||
llista_tags = [t.strip().lower() for t in str(tags_str).split(',')]
|
||||
|
||||
# Utilitzem un 'set' per evitar etiquetes duplicades al final
|
||||
tags_finals = set()
|
||||
|
||||
for t in llista_tags:
|
||||
# 2. Regla de conservació directa: test_, int_ i rrhh
|
||||
if t.startswith('test_') or t.startswith('int_') or t == 'rrhh':
|
||||
tags_finals.add(t)
|
||||
continue
|
||||
|
||||
# 3. Regles d'agrupació (l'ordre és important per als prefixos llargs)
|
||||
if t.startswith('skupspo2-') or t.startswith('pspo2-'):
|
||||
tags_finals.add('pspo2')
|
||||
elif t.startswith('pspo-'):
|
||||
tags_finals.add('pspo')
|
||||
elif t.startswith('psm2-'):
|
||||
tags_finals.add('psm2')
|
||||
elif t.startswith('psm-'):
|
||||
tags_finals.add('psm')
|
||||
elif t.startswith('psux-') or t.startswith('psu-'):
|
||||
tags_finals.add('psu')
|
||||
elif t.startswith('sps-'):
|
||||
tags_finals.add('sps')
|
||||
elif t.startswith('safe-'):
|
||||
tags_finals.add('safe-ls')
|
||||
elif t.startswith('pal_ebm-'):
|
||||
tags_finals.add('pal_ebm')
|
||||
elif t.startswith('pal-'):
|
||||
tags_finals.add('pal')
|
||||
|
||||
# 4. Si tenim etiquetes vàlides, les ordenem, les unim amb | i hi posem cometes
|
||||
if tags_finals:
|
||||
# Ordenar-les (sorted) fa que el resultat sigui més fàcil de llegir al CSV
|
||||
tags_units = '|'.join(sorted(list(tags_finals)))
|
||||
return f'"{tags_units}"'
|
||||
else:
|
||||
return ''
|
||||
|
||||
# --- INICI DEL PROCÉS ---
|
||||
|
||||
# 1. Carregar el fitxer original
|
||||
df = pd.read_excel('contactes_mautic/Contactos_2602026_7881pax.xlsx')
|
||||
|
||||
# 2. Seleccionar només les columnes que ens interessen
|
||||
columnes_mautic = ['email', 'nombre_x', 'apellidos_x', '*ciudad_x', '*pais_x', 'etiquetas_x']
|
||||
df_net = df[columnes_mautic].copy()
|
||||
|
||||
# 3. Reanomenar les columnes perquè Mautic les auto-detecti
|
||||
df_net.rename(columns={
|
||||
'nombre_x': 'firstname',
|
||||
'apellidos_x': 'lastname',
|
||||
'*ciudad_x': 'city',
|
||||
'*pais_x': 'country',
|
||||
'etiquetas_x': 'etiquetes'
|
||||
}, inplace=True)
|
||||
|
||||
# 4. APLICAR EL FILTRE I LA CONSOLIDACIÓ
|
||||
df_net['etiquetes'] = df_net['etiquetes'].apply(filtrar_i_consolidar_etiquetes)
|
||||
|
||||
# 5. Guardar com a CSV llest per importar
|
||||
df_net.to_csv('Contactes_Mautic_Consolidats.csv', index=False, encoding='utf-8', quoting=csv.QUOTE_NONE, escapechar='\\')
|
||||
|
||||
print("✅ Fitxer preparat! Les etiquetes s'han filtrat i agrupat correctament.")
|
||||
84
file-management/mautic_tags.py
Normal file
84
file-management/mautic_tags.py
Normal file
@@ -0,0 +1,84 @@
|
||||
import pandas as pd
|
||||
import os
|
||||
import csv
|
||||
|
||||
def simplificar_etiqueta(tag):
|
||||
"""Filtra i redueix les etiquetes a la seva essència."""
|
||||
tag = tag.strip().lower()
|
||||
|
||||
# 1. Traducció dels noms llargs oficials
|
||||
if 'professional-scrum-master' in tag: return 'psm'
|
||||
if 'professional-scrum-product-owner' in tag: return 'pspo'
|
||||
if 'professional-scrum-developer' in tag: return 'psd'
|
||||
if 'professional-agile-leadership' in tag: return 'pal'
|
||||
|
||||
# 2. Protecció de nivells avançats (Molt important posar-ho abans que els normals)
|
||||
if 'psm2' in tag or 'psm-2' in tag: return 'psm2'
|
||||
if 'pspo2' in tag or 'pspo-2' in tag: return 'pspo2'
|
||||
if 'pspo-a' in tag: return 'pspo-a'
|
||||
|
||||
# 3. Agrupació per paraula clau (Si conté la paraula, s'assigna a l'etiqueta base)
|
||||
if 'psm' in tag: return 'psm'
|
||||
if 'pspo' in tag: return 'pspo'
|
||||
if 'psu' in tag: return 'psu'
|
||||
if 'psd' in tag: return 'psd'
|
||||
if 'aps' in tag: return 'aps'
|
||||
if 'apk' in tag: return 'apk'
|
||||
if 'pal' in tag: return 'pal'
|
||||
if 'sps' in tag: return 'sps'
|
||||
|
||||
# 4. Altres agrupacions útils que he detectat a la teva llista
|
||||
if 'okr' in tag: return 'okr'
|
||||
if 'scrumday' in tag or 'sd20' in tag or 'sd21' in tag: return 'scrumday'
|
||||
|
||||
# 5. Si no és cap de les importants, la deixem tal qual (però neta de test_)
|
||||
if tag.startswith('test_'):
|
||||
return tag.replace('test_', 'int_', 1)
|
||||
|
||||
return tag
|
||||
|
||||
# --- INICI DEL PROCÉS ---
|
||||
|
||||
nom_fitxer = 'contactes_mautic/Contactos_2602026_7881pax.xlsx'
|
||||
print(f"Llegint el fitxer {nom_fitxer}...")
|
||||
df = pd.read_excel(nom_fitxer)
|
||||
|
||||
columnes = {
|
||||
'email': 'email',
|
||||
'nombre_x': 'firstname',
|
||||
'apellidos_x': 'lastname',
|
||||
'*ciudad_x': 'city',
|
||||
'*pais_x': 'country',
|
||||
'etiquetas_x': 'tags'
|
||||
}
|
||||
df_net = df[list(columnes.keys())].rename(columns=columnes)
|
||||
df_net = df_net.dropna(subset=['email', 'tags'])
|
||||
|
||||
carpeta_sortida = 'Mautic_CSVs_per_Tag'
|
||||
os.makedirs(carpeta_sortida, exist_ok=True)
|
||||
|
||||
# Apliquem la nostra súper funció de simplificació
|
||||
df_net['tag_individual'] = df_net['tags'].apply(
|
||||
lambda x: [simplificar_etiqueta(tag) for tag in str(x).split(',') if tag.strip()]
|
||||
)
|
||||
|
||||
# Expandim i eliminem duplicats
|
||||
df_exploded = df_net.explode('tag_individual')
|
||||
df_exploded = df_exploded.drop_duplicates(subset=['email', 'tag_individual'])
|
||||
|
||||
etiquetes_uniques = df_exploded['tag_individual'].unique()
|
||||
print(f"🎉 Màgia feta! Hem reduït centenars d'etiquetes a només {len(etiquetes_uniques)} úniques.")
|
||||
|
||||
for tag in etiquetes_uniques:
|
||||
df_tag = df_exploded[df_exploded['tag_individual'] == tag].copy()
|
||||
|
||||
df_tag['tags'] = tag
|
||||
df_tag = df_tag.rename(columns={'tags': tag})
|
||||
df_tag = df_tag.drop(columns=['tag_individual'])
|
||||
|
||||
nom_tag_net = str(tag).replace(' ', '_').replace('/', '_').replace(':', '')
|
||||
ruta_fitxer = os.path.join(carpeta_sortida, f"etiqueta_{nom_tag_net}.csv")
|
||||
|
||||
df_tag.to_csv(ruta_fitxer, index=False, encoding='utf-8', sep=',', quoting=csv.QUOTE_ALL)
|
||||
|
||||
print(f"✅ Tots els fitxers nets estan a la carpeta '{carpeta_sortida}'.")
|
||||
17
file-management/nan-mautic.py
Normal file
17
file-management/nan-mautic.py
Normal file
@@ -0,0 +1,17 @@
|
||||
import pandas as pd
|
||||
|
||||
# 1. Carregar el fitxer original
|
||||
df = pd.read_excel('contactes_mautic/Contactos_2602026_7881pax.xlsx')
|
||||
|
||||
# 2. Quedar-nos només amb els emails vàlids
|
||||
df_net = df.dropna(subset=['email'])[['email']].copy()
|
||||
|
||||
# 3. Ordre d'esborrat múltiple
|
||||
# Afegim el signe '-' davant de l'etiqueta estranya i també del 'nan' normal
|
||||
# La 'r' al davant indica a Python que llegeixi les barres invertides literalment
|
||||
df_net['etiquetes'] = r'-\"-nan\"|-nan'
|
||||
|
||||
# 4. Guardem el CSV de manera natural (sense forçar paràmetres d'escapament)
|
||||
df_net.to_csv('Neteja_Definitiva_Tags.csv', index=False, encoding='utf-8')
|
||||
|
||||
print("✅ Fitxer preparat! Les etiquetes errònies s'han marcat per ser esborrades.")
|
||||
53
file-management/netejar_desuscrits.py
Normal file
53
file-management/netejar_desuscrits.py
Normal file
@@ -0,0 +1,53 @@
|
||||
import csv
|
||||
import sys
|
||||
import os
|
||||
|
||||
# Comprovem que s'ha passat el fitxer com a argument
|
||||
if len(sys.argv) < 2:
|
||||
print("❌ Error: Has d'indicar la ruta del fitxer CSV.")
|
||||
print("💡 Ús correcte: python netejar_desuscrits.py /ruta/al/Newsletter_Combinat_Final.csv")
|
||||
sys.exit(1)
|
||||
|
||||
fitxer_origen = sys.argv[1].strip("'").strip('"').strip()
|
||||
|
||||
if not os.path.isfile(fitxer_origen):
|
||||
print(f"❌ Error: No s'ha trobat el fitxer '{fitxer_origen}'.")
|
||||
sys.exit(1)
|
||||
|
||||
fitxer_desti = fitxer_origen.replace(".csv", "_Actius.csv")
|
||||
|
||||
usuaris_eliminats = 0
|
||||
usuaris_actius = 0
|
||||
|
||||
print(f"🔍 Analitzant el fitxer: {fitxer_origen}")
|
||||
|
||||
with open(fitxer_origen, 'r', encoding='utf-8') as f_in, \
|
||||
open(fitxer_desti, 'w', newline='', encoding='utf-8') as f_out:
|
||||
|
||||
lector = csv.reader(f_in, delimiter=';')
|
||||
escrivent = csv.writer(f_out, delimiter=';')
|
||||
|
||||
# Llegim i escrivim la capçalera
|
||||
capcalera = next(lector)
|
||||
escrivent.writerow(capcalera)
|
||||
|
||||
# Busquem l'índex de la columna de desuscripció
|
||||
try:
|
||||
index_unsub = capcalera.index("Unsubscribe_Date")
|
||||
except ValueError:
|
||||
print("❌ Error: No s'ha trobat la columna 'Unsubscribe_Date' al fitxer.")
|
||||
sys.exit(1)
|
||||
|
||||
# Filtrem les files
|
||||
for fila in lector:
|
||||
# Si la columna està buida, l'usuari és actiu
|
||||
if len(fila) > index_unsub and not fila[index_unsub].strip():
|
||||
escrivent.writerow(fila)
|
||||
usuaris_actius += 1
|
||||
else:
|
||||
usuaris_eliminats += 1
|
||||
|
||||
print("✅ Neteja completada amb èxit!")
|
||||
print(f"📉 Usuaris desuscrits eliminats: {usuaris_eliminats}")
|
||||
print(f"📈 Usuaris actius conservats: {usuaris_actius}")
|
||||
print(f"📄 Nou fitxer creat a: {fitxer_desti}")
|
||||
32
file-management/netejar_excel.py
Normal file
32
file-management/netejar_excel.py
Normal file
@@ -0,0 +1,32 @@
|
||||
import pandas as pd
|
||||
|
||||
# 1. Definir los nombres de los archivos
|
||||
archivo_entrada = 'Contactos_2602026_totales.xlsx'
|
||||
archivo_salida = 'Contactos_2602026_Limpios.xlsx'
|
||||
|
||||
print(f"Cargando el archivo: {archivo_entrada}...")
|
||||
|
||||
# 2. Leer el archivo Excel
|
||||
# Asumimos que los datos están en la primera hoja (Sheet1)
|
||||
df = pd.read_excel(archivo_entrada)
|
||||
|
||||
# Contar cuántos registros hay inicialmente
|
||||
total_inicial = len(df)
|
||||
print(f"Registros iniciales encontrados: {total_inicial}")
|
||||
|
||||
# 3. Eliminar duplicados basados en la columna 'email'
|
||||
# keep='first' conserva la primera aparición del correo y elimina las siguientes
|
||||
df_limpio = df.drop_duplicates(subset=['email'], keep='first')
|
||||
|
||||
# Contar cuántos registros quedaron y cuántos se eliminaron
|
||||
total_final = len(df_limpio)
|
||||
duplicados_eliminados = total_inicial - total_final
|
||||
|
||||
print(f"Se han eliminado {duplicados_eliminados} contactos duplicados.")
|
||||
print(f"Registros finales únicos: {total_final}")
|
||||
|
||||
# 4. Exportar el resultado a un nuevo archivo Excel
|
||||
print("Guardando el nuevo archivo limpio...")
|
||||
df_limpio.to_excel(archivo_salida, index=False)
|
||||
|
||||
print(f"¡Proceso completado! Archivo guardado como: {archivo_salida}")
|
||||
46
file-management/split_excel.py
Normal file
46
file-management/split_excel.py
Normal file
@@ -0,0 +1,46 @@
|
||||
import pandas as pd
|
||||
import numpy as np
|
||||
import math
|
||||
|
||||
# Load the Excel file
|
||||
file_name = 'Contactos_2602026_7881pax.xlsx'
|
||||
|
||||
print("Llegint el fitxer Excel...")
|
||||
|
||||
# --- CORRECCIÓ AQUÍ ---
|
||||
# Fem servir read_excel en lloc de read_csv
|
||||
df = pd.read_excel(file_name)
|
||||
# ----------------------
|
||||
|
||||
# Display head and info to understand the structure
|
||||
print(df.head())
|
||||
print(df.info())
|
||||
|
||||
# Total number of rows
|
||||
total_rows = len(df)
|
||||
print(f"Total rows: {total_rows}")
|
||||
|
||||
# Number of splits
|
||||
n = 6
|
||||
# Fem servir math.ceil o np.ceil per assegurar que no deixem files fora
|
||||
chunk_size = int(np.ceil(total_rows / n))
|
||||
|
||||
# Split and save
|
||||
output_files = []
|
||||
print(f"Dividint en {n} parts...")
|
||||
|
||||
for i in range(n):
|
||||
start_row = i * chunk_size
|
||||
end_row = min((i + 1) * chunk_size, total_rows)
|
||||
|
||||
# Si per algun motiu start_row supera el total, parem
|
||||
if start_row >= total_rows:
|
||||
break
|
||||
|
||||
chunk = df.iloc[start_row:end_row]
|
||||
|
||||
output_filename = f'Contactos_linkedin_part_{i+1}.xlsx'
|
||||
chunk.to_excel(output_filename, index=False)
|
||||
output_files.append(output_filename)
|
||||
|
||||
print(f"Files created: {output_files}")
|
||||
104
file-management/teams.py
Normal file
104
file-management/teams.py
Normal file
@@ -0,0 +1,104 @@
|
||||
import pandas as pd
|
||||
import glob
|
||||
import re
|
||||
import os
|
||||
import csv
|
||||
|
||||
def extreure_minuts(temps_str):
|
||||
"""Converteix cadenes de text com '2h 15m', '45m' o '1h' a minuts totals."""
|
||||
if pd.isna(temps_str):
|
||||
return 0
|
||||
temps_str = str(temps_str).lower()
|
||||
hores = 0
|
||||
minuts = 0
|
||||
|
||||
match_h = re.search(r'(\d+)\s*h', temps_str)
|
||||
if match_h:
|
||||
hores = int(match_h.group(1))
|
||||
|
||||
match_m = re.search(r'(\d+)\s*m', temps_str)
|
||||
if match_m:
|
||||
minuts = int(match_m.group(1))
|
||||
|
||||
return (hores * 60) + minuts
|
||||
|
||||
fitxers = glob.glob("*.csv")
|
||||
dades_alumnes = {}
|
||||
|
||||
if not fitxers:
|
||||
print("⚠️ No s'han trobat fitxers CSV a la carpeta actual.")
|
||||
else:
|
||||
print(f"S'han trobat {len(fitxers)} fitxers. Processant dades...\n")
|
||||
|
||||
for fitxer in fitxers:
|
||||
try:
|
||||
# 1. Obrim el fitxer manualment per buscar a quina fila comencen les dades
|
||||
with open(fitxer, 'r', encoding='utf-16') as f:
|
||||
linies = f.readlines()
|
||||
|
||||
fila_capcalera = -1
|
||||
separador = '\t'
|
||||
|
||||
# Busquem la línia que conté "Nom" o "Nombre" o "Name"
|
||||
for i, linia in enumerate(linies):
|
||||
if 'Nom' in linia or 'Nombre' in linia or 'Name' in linia:
|
||||
fila_capcalera = i
|
||||
# Detectem si fa servir tabulacions o comes
|
||||
if ',' in linia and '\t' not in linia:
|
||||
separador = ','
|
||||
break
|
||||
|
||||
if fila_capcalera == -1:
|
||||
print(f"⚠️ Saltant '{fitxer}': No s'ha trobat cap fila amb la paraula 'Nom'.")
|
||||
continue
|
||||
|
||||
# 2. Llegim el CSV dient-li exactament on comença
|
||||
df = pd.read_csv(fitxer, sep=separador, encoding='utf-16', skiprows=fila_capcalera)
|
||||
|
||||
# 3. Busquem les columnes dinàmicament
|
||||
col_durada = next((col for col in df.columns if 'durada' in col.lower() or 'duración' in col.lower() or 'duration' in col.lower()), None)
|
||||
col_nom = next((col for col in df.columns if 'nom' in col.lower() or 'nombre' in col.lower() or 'name' in col.lower()), None)
|
||||
|
||||
if not col_nom or not col_durada:
|
||||
print(f"⚠️ Saltant '{fitxer}': Columnes invàlides. Trobades: {list(df.columns)}")
|
||||
continue
|
||||
|
||||
for index, row in df.iterrows():
|
||||
nom = row[col_nom]
|
||||
if pd.isna(nom):
|
||||
continue
|
||||
|
||||
minuts = extreure_minuts(row[col_durada])
|
||||
|
||||
# Apliquem la regla del 5 de febrer (límit de 205 minuts)
|
||||
if "2-05-26" in fitxer and minuts > 205:
|
||||
minuts = 205
|
||||
|
||||
if nom in dades_alumnes:
|
||||
dades_alumnes[nom] += minuts
|
||||
else:
|
||||
dades_alumnes[nom] = minuts
|
||||
|
||||
except UnicodeError:
|
||||
print(f"❌ Error de codificació al fitxer '{fitxer}'. Intenta obrir-lo i guardar-lo de nou.")
|
||||
except Exception as e:
|
||||
print(f"❌ Error processant el fitxer '{fitxer}': {e}")
|
||||
|
||||
# 4. Resultats
|
||||
if dades_alumnes:
|
||||
print("="*50)
|
||||
print("📊 RESULTATS D'ASSISTÈNCIA (Mínim requerit: 1581 min)")
|
||||
print("="*50)
|
||||
|
||||
for nom in sorted(dades_alumnes.keys()):
|
||||
minuts_totals = dades_alumnes[nom]
|
||||
|
||||
if minuts_totals >= 1581:
|
||||
estat = "✅ Supera el 80%"
|
||||
else:
|
||||
estat = "❌ No arriba"
|
||||
|
||||
h = minuts_totals // 60
|
||||
m = minuts_totals % 60
|
||||
|
||||
print(f"{nom}: {minuts_totals} minuts ({h}h {m}m) -> {estat}")
|
||||
40
file-management/treure_etiquetes_mautic.py
Normal file
40
file-management/treure_etiquetes_mautic.py
Normal file
@@ -0,0 +1,40 @@
|
||||
import pandas as pd
|
||||
import csv
|
||||
|
||||
def preparar_etiquetes_per_esborrar(tags_str):
|
||||
"""Afegeix un '-' a cada etiqueta, les uneix amb | i les tanca entre cometes."""
|
||||
# 1. Si la cel·la està buida, la deixem en blanc
|
||||
if pd.isna(tags_str) or str(tags_str).strip().lower() == 'nan':
|
||||
return ''
|
||||
|
||||
# 2. Separem les etiquetes originals per coma i traiem espais
|
||||
llista_tags = [t.strip() for t in str(tags_str).split(',')]
|
||||
|
||||
# 3. Afegim el signe '-' davant de CADA etiqueta (excepte si està buida)
|
||||
tags_per_esborrar = [f"-{t}" for t in llista_tags if t]
|
||||
|
||||
# 4. Les unim amb | i hi afegim les cometes dobles manualment
|
||||
if tags_per_esborrar:
|
||||
tags_units = '|'.join(tags_per_esborrar)
|
||||
return f'"{tags_units}"'
|
||||
else:
|
||||
return ''
|
||||
|
||||
# --- INICI DEL PROCÉS ---
|
||||
|
||||
# 1. Carregar el fitxer original
|
||||
df = pd.read_excel('contactes_mautic/Contactos_2602026_7881pax.xlsx')
|
||||
|
||||
# 2. Seleccionar només l'email i les etiquetes
|
||||
df_esborrar = df[['email', 'etiquetas_x']].copy()
|
||||
|
||||
# 3. Reanomenar la columna
|
||||
df_esborrar.rename(columns={'etiquetas_x': 'etiquetes'}, inplace=True)
|
||||
|
||||
# 4. APLICAR LA TRANSFORMACIÓ
|
||||
df_esborrar['etiquetes'] = df_esborrar['etiquetes'].apply(preparar_etiquetes_per_esborrar)
|
||||
|
||||
# 5. Guardar el CSV respectant les nostres cometes literals
|
||||
df_esborrar.to_csv('Contactes_Mautic_Esborrar_Etiquetes.csv', index=False, encoding='utf-8', quoting=csv.QUOTE_NONE, escapechar='\\')
|
||||
|
||||
print("✅ Fitxer preparat! Les etiquetes tenen el '-' i estan entre cometes dobles.")
|
||||
19
file-management/unificar_excels.py
Normal file
19
file-management/unificar_excels.py
Normal file
@@ -0,0 +1,19 @@
|
||||
import pandas as pd
|
||||
|
||||
# 1. Cargar el primer archivo (tiene encabezados)
|
||||
df1 = pd.read_excel('Contactos_linkedin_120126_agile_6589pax_filtrado.xlsx')
|
||||
|
||||
# 2. Cargar el segundo archivo (sin encabezados, asignamos los principales)
|
||||
# Asumimos que las primeras 3 columnas son email, apellidos y nombre
|
||||
df2 = pd.read_excel('lista_contactos_brevo_20251023_1394pax_AGILE611.xlsx', header=None)
|
||||
df2.rename(columns={0: 'email', 1: 'apellidos_x', 2: 'nombre_x'}, inplace=True)
|
||||
|
||||
# 3. Unir ambos archivos (apilarlos uno debajo del otro)
|
||||
df_final = pd.concat([df1, df2], ignore_index=True)
|
||||
|
||||
# 4. Eliminar contactos duplicados basados en la columna 'email'
|
||||
df_final.drop_duplicates(subset=['email'], keep='first', inplace=True)
|
||||
|
||||
# 5. Exportar el resultado a un nuevo archivo Excel
|
||||
df_final.to_excel('Contactos_Unificados_Agile611.xlsx', index=False)
|
||||
print("¡Archivo unificado creado con éxito!")
|
||||
40
file-management/unir_excels_pestanyes.py
Normal file
40
file-management/unir_excels_pestanyes.py
Normal file
@@ -0,0 +1,40 @@
|
||||
import pandas as pd
|
||||
import os
|
||||
import sys
|
||||
|
||||
print("📊 --- Unificador de fitxers Excel ---")
|
||||
|
||||
# Definim els noms dels fitxers d'entrada i el de sortida
|
||||
fitxer1 = "Contactos_rrhh_040226.xlsx"
|
||||
fitxer2 = "Contactos_2602026_7881pax.xlsx"
|
||||
fitxer_resultat = "Contactos_Unificats.xlsx"
|
||||
|
||||
# Comprovem que els fitxers existeixen a la carpeta actual
|
||||
if not os.path.exists(fitxer1):
|
||||
print(f"❌ Error: No s'ha trobat el fitxer '{fitxer1}'.")
|
||||
sys.exit(1)
|
||||
|
||||
if not os.path.exists(fitxer2):
|
||||
print(f"❌ Error: No s'ha trobat el fitxer '{fitxer2}'.")
|
||||
sys.exit(1)
|
||||
|
||||
print("⏳ Llegint els fitxers... (això pot trigar uns segons depenent de la mida)")
|
||||
|
||||
try:
|
||||
# Llegim els dos fitxers Excel
|
||||
df1 = pd.read_excel(fitxer1)
|
||||
df2 = pd.read_excel(fitxer2)
|
||||
|
||||
# Creem el nou fitxer Excel amb múltiples pestanyes
|
||||
with pd.ExcelWriter(fitxer_resultat, engine='openpyxl') as writer:
|
||||
# Escrivim cada DataFrame en una pestanya diferent
|
||||
df1.to_excel(writer, sheet_name='RRHH', index=False)
|
||||
df2.to_excel(writer, sheet_name='Contactos_7881', index=False)
|
||||
|
||||
print(f"✅ Procés completat amb èxit!")
|
||||
print(f"📄 S'ha creat el fitxer: {fitxer_resultat}")
|
||||
print(" - Pestanya 1: 'RRHH'")
|
||||
print(" - Pestanya 2: 'Contactos_7881'")
|
||||
|
||||
except Exception as e:
|
||||
print(f"❌ S'ha produït un error durant el procés: {e}")
|
||||
81
linkedin/linkedin-posts.py
Normal file
81
linkedin/linkedin-posts.py
Normal file
@@ -0,0 +1,81 @@
|
||||
import os
|
||||
import requests
|
||||
from dotenv import load_dotenv
|
||||
|
||||
# 1. Load the environment variables
|
||||
load_dotenv()
|
||||
|
||||
ACCESS_TOKEN = os.getenv('LINKEDIN_ACCESS_TOKEN')
|
||||
ORGANIZATION_ID = os.getenv('LINKEDIN_ORG_ID')
|
||||
|
||||
# LinkedIn requires a version header (Format: YYYYMM)
|
||||
API_VERSION = '202602'
|
||||
|
||||
def get_all_linkedin_posts(access_token, org_id):
|
||||
"""
|
||||
Fetches ALL posts from a specific LinkedIn Organization Page using pagination.
|
||||
"""
|
||||
if not access_token or not org_id:
|
||||
print("🚨 Error: Missing credentials. Please check your .env file.")
|
||||
return []
|
||||
|
||||
url = "https://api.linkedin.com/rest/posts"
|
||||
|
||||
headers = {
|
||||
"Authorization": f"Bearer {access_token}",
|
||||
"LinkedIn-Version": API_VERSION,
|
||||
"X-Restli-Protocol-Version": "2.0.0"
|
||||
}
|
||||
|
||||
all_posts = []
|
||||
start = 0
|
||||
count = 100 # Maximum allowed by LinkedIn per request
|
||||
|
||||
print(f"📥 Starting to fetch posts for Organization ID: {org_id}...")
|
||||
|
||||
while True:
|
||||
params = {
|
||||
"q": "author",
|
||||
"author": f"urn:li:organization:{org_id}",
|
||||
"count": count,
|
||||
"start": start
|
||||
}
|
||||
|
||||
try:
|
||||
response = requests.get(url, headers=headers, params=params)
|
||||
response.raise_for_status()
|
||||
|
||||
data = response.json()
|
||||
elements = data.get('elements', [])
|
||||
|
||||
# If no more posts are returned, we've reached the end!
|
||||
if not elements:
|
||||
break
|
||||
|
||||
all_posts.extend(elements)
|
||||
print(f"✅ Fetched posts {start + 1} to {start + len(elements)}...")
|
||||
|
||||
# Increment the starting point for the next page
|
||||
start += count
|
||||
|
||||
except requests.exceptions.RequestException as e:
|
||||
print(f"❌ Error fetching posts at offset {start}: {e}")
|
||||
if response.text:
|
||||
print(f"LinkedIn API Response: {response.text}")
|
||||
break
|
||||
|
||||
print(f"\n🎉 Finished! Successfully retrieved a total of {len(all_posts)} posts.")
|
||||
return all_posts
|
||||
|
||||
# --- Run the application ---
|
||||
if __name__ == "__main__":
|
||||
posts = get_all_linkedin_posts(ACCESS_TOKEN, ORGANIZATION_ID)
|
||||
|
||||
# Print a quick preview of the first 3 posts
|
||||
if posts:
|
||||
print("\n--- Preview of latest 3 posts ---")
|
||||
for post in posts[:3]:
|
||||
# Safely extract the text content
|
||||
text = post.get('commentary', {}).get('text', 'No text content')
|
||||
print(f"ID: {post.get('id')}")
|
||||
print(f"Content: {text[:100]}...\n")
|
||||
36
media/audio-extractor.sh
Executable file
36
media/audio-extractor.sh
Executable file
@@ -0,0 +1,36 @@
|
||||
#!/bin/bash
|
||||
|
||||
|
||||
# Check if ffmpeg is installed
|
||||
if ! command -v ffmpeg &> /dev/null
|
||||
then
|
||||
echo "ffmpeg could not be found. Please install ffmpeg first."
|
||||
exit 1
|
||||
fi
|
||||
|
||||
# Check for correct number of arguments
|
||||
if [ "$#" -ne 2 ]; then
|
||||
echo "Usage: $0 <input_mp4_file> <output_mp3_file>"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
# Input and output files
|
||||
INPUT_FILE="$1"
|
||||
OUTPUT_FILE="$2"
|
||||
|
||||
# Check if the input file exists
|
||||
if [ ! -f "$INPUT_FILE" ]; then
|
||||
echo "Input file '$INPUT_FILE' does not exist."
|
||||
exit 1
|
||||
fi
|
||||
|
||||
# Extract MP3 from MP4
|
||||
ffmpeg -i "$INPUT_FILE" -q:a 0 -map a "$OUTPUT_FILE"
|
||||
|
||||
# Check if the operation was successful
|
||||
if [ $? -eq 0 ]; then
|
||||
echo "MP3 file successfully created: $OUTPUT_FILE"
|
||||
else
|
||||
echo "Failed to extract MP3 from $INPUT_FILE."
|
||||
exit 1
|
||||
fi
|
||||
45
media/convert_mp4_to_webm.sh
Executable file
45
media/convert_mp4_to_webm.sh
Executable file
@@ -0,0 +1,45 @@
|
||||
#!/bin/bash
|
||||
|
||||
# Function to convert MP4 to WebM
|
||||
convert_mp4_to_webm() {
|
||||
input_file="$1"
|
||||
output_file="$2"
|
||||
|
||||
# Check if input file exists
|
||||
if [ ! -f "$input_file" ]; then
|
||||
echo "Error: File '$input_file' does not exist."
|
||||
exit 1
|
||||
fi
|
||||
|
||||
# Check if the input file is an MP4
|
||||
if [[ "$input_file" != *.mp4 ]]; then
|
||||
echo "Error: Input file must be an MP4 file."
|
||||
exit 1
|
||||
fi
|
||||
|
||||
# Set output file name if not provided
|
||||
if [ -z "$output_file" ]; then
|
||||
output_file="${input_file%.mp4}.webm"
|
||||
fi
|
||||
|
||||
# Convert the MP4 to WebM using ffmpeg
|
||||
ffmpeg -i "$input_file" -c:v libvpx -c:a libvorbis "$output_file"
|
||||
|
||||
# Check if conversion was successful
|
||||
if [ $? -eq 0 ]; then
|
||||
echo "Successfully converted to '$output_file'."
|
||||
else
|
||||
echo "Error: Conversion failed."
|
||||
exit 1
|
||||
fi
|
||||
}
|
||||
|
||||
# Check for input arguments
|
||||
if [ $# -lt 1 ]; then
|
||||
echo "Usage: $0 <input_file> [output_file]"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
# Call the function with arguments
|
||||
convert_mp4_to_webm "$1" "$2"
|
||||
|
||||
12
media/copy_cbr_files.sh
Normal file
12
media/copy_cbr_files.sh
Normal file
@@ -0,0 +1,12 @@
|
||||
#!/bin/bash
|
||||
|
||||
# Destination directory for .cbr files
|
||||
DEST_DIR="cbr_files"
|
||||
|
||||
# Create the destination directory if it doesn't exist
|
||||
mkdir -p "$DEST_DIR"
|
||||
|
||||
# Find all .cbr files recursively in volum_* directories and copy them to cbr_files
|
||||
find volum_* -type f -name "*.cbr" -exec cp {} "$DEST_DIR" \;
|
||||
|
||||
echo "All .cbr files have been copied to $DEST_DIR."
|
||||
48
media/download_instagram_images.sh
Executable file
48
media/download_instagram_images.sh
Executable file
@@ -0,0 +1,48 @@
|
||||
#!/bin/bash
|
||||
|
||||
# Script to download images from an Instagram post
|
||||
# Requires: curl, jq
|
||||
|
||||
# Function to display usage
|
||||
function usage() {
|
||||
echo "Usage: $0 <instagram_post_url>"
|
||||
exit 1
|
||||
}
|
||||
|
||||
# Check if URL is provided
|
||||
if [ $# -ne 1 ]; then
|
||||
usage
|
||||
fi
|
||||
|
||||
# Instagram post URL
|
||||
POST_URL=$1
|
||||
|
||||
# Fetch the HTML of the Instagram post
|
||||
echo "Fetching post data..."
|
||||
HTML=$(curl -s -L "$POST_URL")
|
||||
|
||||
# Extract image URLs from the HTML
|
||||
echo "Extracting image URLs..."
|
||||
IMAGE_URLS=$(echo "$HTML" | sed -n 's/.*"display_url":"\([^"]*\)".*/\1/p')
|
||||
|
||||
# Check if any image URLs were found
|
||||
if [ -z "$IMAGE_URLS" ]; then
|
||||
echo "Failed to extract image URLs. Make sure the URL is valid and public."
|
||||
exit 1
|
||||
fi
|
||||
|
||||
# Create a directory to save images
|
||||
SAVE_DIR="instagram_images"
|
||||
mkdir -p "$SAVE_DIR"
|
||||
|
||||
# Download each image
|
||||
echo "Downloading images..."
|
||||
COUNT=1
|
||||
for URL in $IMAGE_URLS; do
|
||||
FILE_NAME="$SAVE_DIR/image_$COUNT.jpg"
|
||||
curl -s -o "$FILE_NAME" "$URL"
|
||||
echo "Downloaded: $FILE_NAME"
|
||||
((COUNT++))
|
||||
done
|
||||
|
||||
echo "All images downloaded to $SAVE_DIR."
|
||||
22
media/export_instagram_cookies.py
Normal file
22
media/export_instagram_cookies.py
Normal file
@@ -0,0 +1,22 @@
|
||||
# export_instagram_cookies.py
|
||||
import browser_cookie3
|
||||
|
||||
cj = browser_cookie3.chrome(domain_name='instagram.com')
|
||||
|
||||
with open('cookies.txt', 'w') as f:
|
||||
f.write("# Netscape HTTP Cookie File\n")
|
||||
f.write("# This file was generated by browser-cookie3\n")
|
||||
f.write("# https://curl.se/docs/http-cookies.html\n\n")
|
||||
for cookie in cj:
|
||||
# domain, flag, path, secure, expiration, name, value
|
||||
f.write(
|
||||
f"{cookie.domain}\t"
|
||||
f"{'TRUE' if cookie.domain.startswith('.') else 'FALSE'}\t"
|
||||
f"{cookie.path}\t"
|
||||
f"{'TRUE' if cookie.secure else 'FALSE'}\t"
|
||||
f"{int(cookie.expires) if cookie.expires else 0}\t"
|
||||
f"{cookie.name}\t"
|
||||
f"{cookie.value}\n"
|
||||
)
|
||||
print("cookies.txt exported!")
|
||||
# This script exports Instagram cookies from Chrome to a cookies.txt file.
|
||||
57
media/image-instagram-downloader.sh
Executable file
57
media/image-instagram-downloader.sh
Executable file
@@ -0,0 +1,57 @@
|
||||
#!/bin/bash
|
||||
|
||||
# Directory to save downloaded images
|
||||
OUTPUT_DIR="./downloads"
|
||||
mkdir -p "$OUTPUT_DIR"
|
||||
|
||||
# Instagram Full Image Downloader
|
||||
# Usage: ./image-instagram-downloader.sh <instagram_image_url>
|
||||
|
||||
if [ $# -ne 1 ]; then
|
||||
echo "Usage: $0 <instagram_image_url>"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
URL="$1"
|
||||
|
||||
# Fetch HTML content of the Instagram post
|
||||
HTML_DATA=$(curl -s "$URL")
|
||||
|
||||
if [ -z "$HTML_DATA" ]; then
|
||||
echo "Could not fetch HTML data."
|
||||
exit 2
|
||||
fi
|
||||
|
||||
# Extract JSON data containing the full-resolution image URL
|
||||
JSON_DATA=$(echo "$HTML_DATA" | grep -oE '<script type="application/ld\+json">[^<]+' | sed 's/<script type="application\/ld+json">//')
|
||||
|
||||
if [ -z "$JSON_DATA" ]; then
|
||||
echo "Could not find JSON data. Falling back to thumbnail."
|
||||
# Extract thumbnail image URL
|
||||
ENCODED_IMG_URL=$(echo "$HTML_DATA" | awk -F'<meta property="og:image" content="' '{print $2}' | awk -F'"' '{print $1}')
|
||||
FULL_IMG_URL=$(echo "$ENCODED_IMG_URL" | sed 's/&/\&/g')
|
||||
else
|
||||
# Parse JSON data to extract full-resolution image URL
|
||||
FULL_IMG_URL=$(echo "$JSON_DATA" | grep -oE '"url":"https:[^"]+' | sed 's/"url":"//')
|
||||
fi
|
||||
|
||||
if [ -z "$FULL_IMG_URL" ]; then
|
||||
echo "Could not find full image URL."
|
||||
exit 3
|
||||
fi
|
||||
|
||||
echo "Extracted Full Image URL: $FULL_IMG_URL"
|
||||
|
||||
# Download the image
|
||||
FILENAME=$(basename "$FULL_IMG_URL" | cut -d'?' -f1)
|
||||
curl -L -A "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36" "$FULL_IMG_URL" -o "$OUTPUT_DIR/$FILENAME"
|
||||
|
||||
# Check if the file is a valid image
|
||||
FILE_TYPE=$(file "$OUTPUT_DIR/$FILENAME" | grep -oE 'image|HTML')
|
||||
|
||||
if [[ "$FILE_TYPE" == "HTML" ]]; then
|
||||
echo "Downloaded file is not an image. Please check the extracted URL."
|
||||
exit 4
|
||||
fi
|
||||
|
||||
echo "Downloaded: $OUTPUT_DIR/$FILENAME"
|
||||
33
media/instagram-downloader.sh
Executable file
33
media/instagram-downloader.sh
Executable file
@@ -0,0 +1,33 @@
|
||||
#!/bin/bash
|
||||
|
||||
OUTPUT_DIR="./downloads"
|
||||
mkdir -p "$OUTPUT_DIR"
|
||||
|
||||
if [ -z "$1" ]; then
|
||||
echo "Usage: $0 <INSTAGRAM_REEL_URL>"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
URL="$1"
|
||||
|
||||
if ! command -v yt-dlp &> /dev/null; then
|
||||
echo "yt-dlp is required. Install it with: pip install yt-dlp"
|
||||
exit 2
|
||||
fi
|
||||
|
||||
if ! command -v python3 &> /dev/null; then
|
||||
echo "Python3 is required. Please install it."
|
||||
exit 3
|
||||
fi
|
||||
|
||||
# Export cookies.txt using Python script
|
||||
python3 export_instagram_cookies.py
|
||||
if [ ! -f cookies.txt ]; then
|
||||
echo "Failed to export cookies.txt. Aborting."
|
||||
exit 4
|
||||
fi
|
||||
|
||||
# Use cookies.txt to handle private reels
|
||||
yt-dlp --cookies cookies.txt "$URL" -o "$OUTPUT_DIR/%(title)s.%(ext)s"
|
||||
echo "Download completed. Files are saved in $OUTPUT_DIR"
|
||||
#rm cookies.txt
|
||||
61
media/spotify-rss.py
Normal file
61
media/spotify-rss.py
Normal file
@@ -0,0 +1,61 @@
|
||||
import spotipy
|
||||
from spotipy.oauth2 import SpotifyClientCredentials
|
||||
import requests
|
||||
import urllib.parse
|
||||
|
||||
# --- Configuration ---
|
||||
# Replace these with your actual Spotify Developer credentials
|
||||
SPOTIPY_CLIENT_ID = '3756ae92386d45e9971aa03d2f4b1eca'
|
||||
SPOTIPY_CLIENT_SECRET = '6c8ea8c409d944cc895571f3f49985df'
|
||||
|
||||
# The ID from your link: https://open.spotify.com/show/13Gs651PIKKI7zRF0p3PcJ
|
||||
SHOW_ID = '13Gs651PIKKI7zRF0p3PcJ'
|
||||
|
||||
def find_real_rss_feed():
|
||||
# Step 1: Authenticate with Spotify
|
||||
auth_manager = SpotifyClientCredentials(
|
||||
client_id=SPOTIPY_CLIENT_ID,
|
||||
client_secret=SPOTIPY_CLIENT_SECRET
|
||||
)
|
||||
sp = spotipy.Spotify(auth_manager=auth_manager)
|
||||
|
||||
# Step 2: Get the Show Name from Spotify
|
||||
print(f"Fetching details for Spotify Show ID: {SHOW_ID}...")
|
||||
try:
|
||||
show = sp.show(SHOW_ID)
|
||||
show_name = show['name']
|
||||
publisher = show['publisher']
|
||||
print(f"Found Show: '{show_name}' by {publisher}")
|
||||
except Exception as e:
|
||||
print(f"Error fetching from Spotify: {e}")
|
||||
return
|
||||
|
||||
# Step 3: Search the iTunes API for that Show Name
|
||||
print("\nSearching public directories (iTunes) for the real RSS feed...")
|
||||
# We encode the name so it can be safely used in a URL (e.g., spaces become %20)
|
||||
encoded_name = urllib.parse.quote(show_name)
|
||||
itunes_api_url = f"https://itunes.apple.com/search?term={encoded_name}&entity=podcast&limit=3"
|
||||
|
||||
try:
|
||||
response = requests.get(itunes_api_url)
|
||||
data = response.json()
|
||||
|
||||
if data['resultCount'] > 0:
|
||||
# Grab the first result's RSS feed URL
|
||||
# We check the first few results to ensure the author matches, but usually the first is correct
|
||||
real_rss_url = data['results'][0].get('feedUrl')
|
||||
|
||||
print("\n✅ SUCCESS! Found the public RSS feed:")
|
||||
print("-" * 50)
|
||||
print(real_rss_url)
|
||||
print("-" * 50)
|
||||
print("You can copy and paste this URL directly into Apple Podcasts, Pocket Casts, or any standard RSS reader. It contains all the real .mp3 files!")
|
||||
else:
|
||||
print("\n❌ Could not find this show in public directories.")
|
||||
print("This usually means the podcast is a 'Spotify Exclusive' and does not have a public RSS feed.")
|
||||
|
||||
except Exception as e:
|
||||
print(f"Error searching iTunes: {e}")
|
||||
|
||||
if __name__ == '__main__':
|
||||
find_real_rss_feed()
|
||||
33
media/url-video-downloader.sh
Executable file
33
media/url-video-downloader.sh
Executable file
@@ -0,0 +1,33 @@
|
||||
#!/bin/bash
|
||||
|
||||
# Script to download videos from URLs shared on Bluesky
|
||||
# Requires yt-dlp and ffmpeg to be installed
|
||||
|
||||
# Directory to save downloaded videos
|
||||
OUTPUT_DIR="./downloads"
|
||||
mkdir -p "$OUTPUT_DIR"
|
||||
|
||||
# Check if yt-dlp is installed
|
||||
if ! command -v yt-dlp &>/dev/null; then
|
||||
echo "yt-dlp is not installed. Please install it and try again."
|
||||
exit 1
|
||||
fi
|
||||
|
||||
# Check if the URL is passed as an argument
|
||||
if [[ -z "$1" ]]; then
|
||||
echo "No URL provided as an argument. Usage: ./script.sh <video_url>"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
VIDEO_URL="$1"
|
||||
|
||||
# Download the video with a shortened filename (using video ID)
|
||||
echo "Downloading video from $VIDEO_URL..."
|
||||
yt-dlp -k -o "$OUTPUT_DIR/%(id)s.%(ext)s" "$VIDEO_URL"
|
||||
|
||||
if [[ $? -eq 0 ]]; then
|
||||
echo "Video downloaded successfully to $OUTPUT_DIR."
|
||||
else
|
||||
echo "Failed to download the video. Please check the URL and try again."
|
||||
exit 1
|
||||
fi
|
||||
34
media/video-downloader.sh
Executable file
34
media/video-downloader.sh
Executable file
@@ -0,0 +1,34 @@
|
||||
#!/bin/bash
|
||||
|
||||
# Script to download videos from URLs shared on Bluesky
|
||||
# Requires yt-dlp and ffmpeg to be installed
|
||||
|
||||
# Directory to save downloaded videos
|
||||
OUTPUT_DIR="./downloads"
|
||||
mkdir -p "$OUTPUT_DIR"
|
||||
|
||||
# Check if yt-dlp is installed
|
||||
if ! command -v yt-dlp &>/dev/null; then
|
||||
echo "yt-dlp is not installed. Please install it and try again."
|
||||
exit 1
|
||||
fi
|
||||
|
||||
# Prompt for the video URL
|
||||
read -p "Enter the video URL: " VIDEO_URL
|
||||
|
||||
# Validate the URL
|
||||
if [[ -z "$VIDEO_URL" ]]; then
|
||||
echo "No URL entered. Exiting."
|
||||
exit 1
|
||||
fi
|
||||
|
||||
# Download the video
|
||||
echo "Downloading video from $VIDEO_URL..."
|
||||
yt-dlp -o "$OUTPUT_DIR/%(title)s.%(ext)s" "$VIDEO_URL"
|
||||
|
||||
if [[ $? -eq 0 ]]; then
|
||||
echo "Video downloaded successfully to $OUTPUT_DIR."
|
||||
else
|
||||
echo "Failed to download the video. Please check the URL and try again."
|
||||
exit 1
|
||||
fi
|
||||
44
media/webm-to-mp4-converter.sh
Executable file
44
media/webm-to-mp4-converter.sh
Executable file
@@ -0,0 +1,44 @@
|
||||
#!/bin/bash
|
||||
|
||||
# Script to convert a .webm file to .mp4 using ffmpeg
|
||||
|
||||
# Check if ffmpeg is installed
|
||||
if ! command -v ffmpeg &> /dev/null
|
||||
then
|
||||
echo "ffmpeg could not be found. Please install ffmpeg first."
|
||||
exit 1
|
||||
fi
|
||||
|
||||
# Check if input file is provided
|
||||
if [ "$#" -lt 1 ]; then
|
||||
echo "Usage: $0 input_file.webm [output_file.mp4]"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
# Input file
|
||||
input_file="$1"
|
||||
|
||||
# Check if the input file exists
|
||||
if [ ! -f "$input_file" ]; then
|
||||
echo "Input file '$input_file' not found."
|
||||
exit 1
|
||||
fi
|
||||
|
||||
# Set output file name
|
||||
if [ "$#" -ge 2 ]; then
|
||||
output_file="$2"
|
||||
else
|
||||
output_file="${input_file%.*}.mp4"
|
||||
fi
|
||||
|
||||
# Convert .webm to .mp4 using ffmpeg
|
||||
ffmpeg -i "$input_file" -c:v libx264 -preset slow -crf 23 -c:a aac "$output_file"
|
||||
|
||||
# Check if conversion was successful
|
||||
if [ $? -eq 0 ]; then
|
||||
echo "Conversion successful. Output file: $output_file"
|
||||
else
|
||||
echo "Conversion failed."
|
||||
exit 1
|
||||
fi
|
||||
|
||||
13
media/zip_cbr_files.sh
Executable file
13
media/zip_cbr_files.sh
Executable file
@@ -0,0 +1,13 @@
|
||||
#!/bin/bash
|
||||
|
||||
# Loop over a range of episode numbers
|
||||
for i in {1..27}; do
|
||||
# Format the episode number with leading zeros if needed
|
||||
episode_number=$(printf $i)
|
||||
echo $episode_number
|
||||
zip capitol_${episode_number}.cbr volume_1_episode_${episode_number}_*.jpg
|
||||
echo "Created capitol_${episode_number}.cbr"
|
||||
done
|
||||
|
||||
echo "All .cbr files have been created."
|
||||
|
||||
31
odilo/biblio_odilo.py
Normal file
31
odilo/biblio_odilo.py
Normal file
@@ -0,0 +1,31 @@
|
||||
import requests
|
||||
|
||||
# 1. Definim les capçaleres
|
||||
headers = {
|
||||
'Content-Type': 'application/json',
|
||||
'User-Agent': 'ie',
|
||||
'Host': 'odiloid.odilo.us'
|
||||
}
|
||||
|
||||
url_api = 'https://odiloid.odilo.us/ClientId'
|
||||
|
||||
try:
|
||||
# 2. Fem la petició
|
||||
print("⏳ Obtenint dades del servidor...")
|
||||
response = requests.get(url_api, headers=headers)
|
||||
response.raise_for_status()
|
||||
|
||||
# 3. Convertim a JSON (llista de diccionaris)
|
||||
bibliolist_response = response.json()
|
||||
|
||||
# 4. Filtrem només les URLs
|
||||
llista_urls = [biblio['url'] for biblio in bibliolist_response if 'url' in biblio]
|
||||
|
||||
# 5. Mostrem el resultat per pantalla
|
||||
print(f"✅ S'han trobat {len(llista_urls)} biblioteques. Aquí tens les URLs:\n")
|
||||
|
||||
for url in llista_urls:
|
||||
print(url)
|
||||
|
||||
except requests.exceptions.RequestException as e:
|
||||
print(f"❌ Hi ha hagut un error amb la petició: {e}")
|
||||
21
odilo/recollir_epubs.sh
Normal file
21
odilo/recollir_epubs.sh
Normal file
@@ -0,0 +1,21 @@
|
||||
#!/bin/bash
|
||||
|
||||
# Comprovem que s'han passat exactament 2 paràmetres
|
||||
if [ "$#" -ne 2 ]; then
|
||||
echo "❌ Error: Falten paràmetres."
|
||||
echo "💡 Ús correcte: $0 <carpeta_origen> <carpeta_desti>"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
# Assignem els paràmetres a les variables
|
||||
origen="$1"
|
||||
desti="$2"
|
||||
|
||||
# Creem la carpeta destí si no existeix
|
||||
mkdir -p "$desti"
|
||||
|
||||
# Executem la cerca i còpia
|
||||
echo "Cercant i copiant els arxius .epub..."
|
||||
find "$origen" -type f -name "*.epub" -exec cp {} "$desti" \;
|
||||
|
||||
echo "✅ Procés completat! Tots els epubs estan a $desti"
|
||||
43
outlook/email_login.py
Normal file
43
outlook/email_login.py
Normal file
@@ -0,0 +1,43 @@
|
||||
import imaplib
|
||||
|
||||
# ==========================================
|
||||
# 1. CONFIGURATION
|
||||
# ==========================================
|
||||
EMAIL_ADDRESS = "marti@agile611.com"
|
||||
|
||||
# ⚠️ Insert your generated App Password here (no spaces)
|
||||
PASSWORD = "fcztyfdwpfrgqjgl" # Replace with your actual App Password
|
||||
|
||||
IMAP_SERVER = "outlook.office365.com"
|
||||
IMAP_PORT = 993
|
||||
|
||||
def test_app_password_login():
|
||||
print(f"🔌 Connecting to {IMAP_SERVER} on port {IMAP_PORT}...")
|
||||
|
||||
try:
|
||||
# Connect to the server using SSL/TLS encryption
|
||||
mail = imaplib.IMAP4_SSL(IMAP_SERVER, IMAP_PORT)
|
||||
|
||||
# Attempt plaintext login using the App Password
|
||||
print("🔐 Attempting login with App Password...")
|
||||
mail.login(EMAIL_ADDRESS, PASSWORD)
|
||||
|
||||
print("✅ Success! The App Password worked perfectly.")
|
||||
|
||||
# Select the inbox to verify we can read data
|
||||
status, messages = mail.select("INBOX")
|
||||
if status == "OK":
|
||||
message_count = messages[0].decode('utf-8')
|
||||
print(f"📥 INBOX selected successfully. Total messages: {message_count}")
|
||||
|
||||
# Safely log out
|
||||
mail.logout()
|
||||
print("👋 Logged out successfully.")
|
||||
|
||||
except imaplib.IMAP4.error as e:
|
||||
print("\n❌ Login failed!")
|
||||
print(f"Error details: {e}")
|
||||
print("\n⚠️ Note: If you see 'BasicAuthBlocked' again, your organization's global Azure settings have completely disabled basic authentication, overriding the App Password.")
|
||||
|
||||
if __name__ == "__main__":
|
||||
test_app_password_login()
|
||||
177
outlook/get_token.py
Normal file
177
outlook/get_token.py
Normal file
@@ -0,0 +1,177 @@
|
||||
import msal
|
||||
import imaplib
|
||||
import os
|
||||
import csv
|
||||
import re
|
||||
import time
|
||||
import socket
|
||||
|
||||
# 🚨 Force Python to drop the connection if Microsoft stops responding (tarpitting)
|
||||
socket.setdefaulttimeout(30)
|
||||
|
||||
# ==========================================
|
||||
# 1. CONFIGURATION
|
||||
# ==========================================
|
||||
CLIENT_ID = "05332268-8149-449f-a1f8-1efadd17166f"
|
||||
EMAIL_ADDRESS = "guillem@agile611.com"
|
||||
|
||||
AUTHORITY = "https://login.microsoftonline.com/884a3c53-8a5a-4d79-b0e0-a62ab5a794a1"
|
||||
# MSAL automatically requests offline_access, so we only list the IMAP scope here
|
||||
SCOPES = ["https://outlook.office.com/IMAP.AccessAsUser.All"]
|
||||
|
||||
# ==========================================
|
||||
# 2. HELPER FUNCTION: GENERATE CSV
|
||||
# ==========================================
|
||||
def generate_user_csv(email_address, first_name="Guillem", last_name="Hernandez Sola"):
|
||||
username = email_address.split("@")[0]
|
||||
headers = [
|
||||
"originUsername", "targetUsername", "password", "pop3enabled",
|
||||
"pop3password", "aliases", "forwards", "filters",
|
||||
"forename", "surname", "mailboxStatus"
|
||||
]
|
||||
row_data = {
|
||||
"originUsername": username, "targetUsername": username,
|
||||
"password": "TempWebmailPassword123!", "pop3enabled": "true",
|
||||
"pop3password": "", "aliases": "", "forwards": "", "filters": "",
|
||||
"forename": first_name, "surname": last_name, "mailboxStatus": "premium"
|
||||
}
|
||||
csv_filename = f"{username}_import.csv"
|
||||
with open(csv_filename, mode="w", newline="", encoding="utf-8") as file:
|
||||
writer = csv.DictWriter(file, fieldnames=headers, delimiter=";")
|
||||
writer.writeheader()
|
||||
writer.writerow(row_data)
|
||||
print(f"📝 Migration CSV generated: {csv_filename}")
|
||||
|
||||
# ==========================================
|
||||
# 3. TOKEN & CONNECTION MANAGERS
|
||||
# ==========================================
|
||||
def get_valid_token(app):
|
||||
"""Checks the cache for a valid token, refreshes silently if needed, or prompts user."""
|
||||
accounts = app.get_accounts()
|
||||
if accounts:
|
||||
result = app.acquire_token_silent(SCOPES, account=accounts[0])
|
||||
if result and "access_token" in result:
|
||||
return result["access_token"]
|
||||
|
||||
flow = app.initiate_device_flow(scopes=SCOPES)
|
||||
if "user_code" not in flow:
|
||||
raise ValueError("Failed to create device flow. Check your Client ID and Azure settings.")
|
||||
|
||||
print("\n🚨 ACTION REQUIRED 🚨")
|
||||
print(flow["message"])
|
||||
print("⏳ Waiting for browser authentication...")
|
||||
|
||||
result = app.acquire_token_by_device_flow(flow)
|
||||
if "access_token" not in result:
|
||||
raise Exception(f"Failed to get token: {result.get('error_description')}")
|
||||
|
||||
return result["access_token"]
|
||||
|
||||
def connect_to_imap(email, token):
|
||||
"""Creates a fresh, authenticated connection to the IMAP server."""
|
||||
auth_string = f"user={email}\x01auth=Bearer {token}\x01\x01"
|
||||
mail = imaplib.IMAP4_SSL("outlook.office365.com", 993)
|
||||
mail.authenticate("XOAUTH2", lambda x: auth_string.encode("utf-8"))
|
||||
return mail
|
||||
|
||||
# ==========================================
|
||||
# 4. MAIN EXECUTION
|
||||
# ==========================================
|
||||
def main():
|
||||
print("🔄 Initializing Microsoft Authentication...")
|
||||
app = msal.PublicClientApplication(CLIENT_ID, authority=AUTHORITY)
|
||||
|
||||
try:
|
||||
access_token = get_valid_token(app)
|
||||
print("\n✅ Access Token Acquired Successfully!")
|
||||
|
||||
print("\n🔌 Connecting to Outlook IMAP server...")
|
||||
mail = connect_to_imap(EMAIL_ADDRESS, access_token)
|
||||
print("✅ Successfully logged into IMAP via OAuth2!")
|
||||
except Exception as e:
|
||||
print(f"\n❌ Authentication failed: {e}")
|
||||
return
|
||||
|
||||
try:
|
||||
username = EMAIL_ADDRESS.split("@")[0]
|
||||
base_download_dir = f"downloaded_emails_{username}"
|
||||
os.makedirs(base_download_dir, exist_ok=True)
|
||||
|
||||
status, folders = mail.list()
|
||||
if status == "OK":
|
||||
print(f"📂 Found {len(folders)} folders. Starting full account backup...\n")
|
||||
|
||||
for folder_data in folders:
|
||||
folder_string = folder_data.decode('utf-8')
|
||||
|
||||
if "\\Noselect" in folder_string:
|
||||
continue
|
||||
|
||||
match = re.search(r'\"([^\"]+)\"$', folder_string)
|
||||
folder_name = match.group(1) if match else folder_string.split()[-1].strip('"')
|
||||
|
||||
print(f"📁 Scanning folder: {folder_name}")
|
||||
|
||||
status, _ = mail.select(f'"{folder_name}"', readonly=True)
|
||||
if status != "OK":
|
||||
print(f" ⚠️ Could not open {folder_name}. Skipping.")
|
||||
continue
|
||||
|
||||
status, data = mail.search(None, "ALL")
|
||||
email_ids = data[0].split()
|
||||
|
||||
if not email_ids:
|
||||
print(" ↳ Folder is empty.")
|
||||
continue
|
||||
|
||||
print(f" ↳ Found {len(email_ids)} emails. Downloading...")
|
||||
|
||||
safe_folder_name = "".join([c for c in folder_name if c.isalnum() or c in (' ', '-', '_')]).strip()
|
||||
folder_dir = os.path.join(base_download_dir, safe_folder_name)
|
||||
os.makedirs(folder_dir, exist_ok=True)
|
||||
|
||||
# Download loop with Reconnect & Timeout Logic
|
||||
for e_id in email_ids:
|
||||
file_path = os.path.join(folder_dir, f"email_{e_id.decode('utf-8')}.eml")
|
||||
|
||||
# 🚀 SKIP EXISTING: Don't re-download emails we already have!
|
||||
if os.path.exists(file_path):
|
||||
continue
|
||||
|
||||
success = False
|
||||
while not success:
|
||||
try:
|
||||
status, msg_data = mail.fetch(e_id, "(RFC822)")
|
||||
for response_part in msg_data:
|
||||
if isinstance(response_part, tuple):
|
||||
raw_email = response_part[1]
|
||||
with open(file_path, "wb") as f:
|
||||
f.write(raw_email)
|
||||
success = True
|
||||
|
||||
# Catch token expiration, forced disconnects, AND silent timeouts
|
||||
except (imaplib.IMAP4.abort, imaplib.IMAP4.error, ConnectionResetError, socket.timeout, TimeoutError) as e:
|
||||
print(f"\n ⚠️ Connection lost or timed out. Refreshing token and reconnecting...")
|
||||
try:
|
||||
access_token = get_valid_token(app)
|
||||
mail = connect_to_imap(EMAIL_ADDRESS, access_token)
|
||||
mail.select(f'"{folder_name}"', readonly=True)
|
||||
print(" ✅ Reconnected! Resuming download...")
|
||||
except Exception as reconnect_error:
|
||||
print(f" ❌ Reconnection failed: {reconnect_error}. Retrying in 5 seconds...")
|
||||
time.sleep(5)
|
||||
|
||||
print(f"\n🎉 All folders successfully downloaded to '{base_download_dir}'!")
|
||||
|
||||
mail.logout()
|
||||
print("👋 Logged out successfully.\n")
|
||||
|
||||
print("⚙️ Generating configuration files...")
|
||||
generate_user_csv(EMAIL_ADDRESS)
|
||||
print("🎉 Migration prep complete!")
|
||||
|
||||
except Exception as e:
|
||||
print(f"\n❌ A critical error occurred: {e}")
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
142
outlook/marti.py
Normal file
142
outlook/marti.py
Normal file
@@ -0,0 +1,142 @@
|
||||
import msal
|
||||
import imaplib
|
||||
import os
|
||||
import csv
|
||||
import re # Added for parsing folder names safely
|
||||
|
||||
# ==========================================
|
||||
# 1. CONFIGURATION
|
||||
# ==========================================
|
||||
CLIENT_ID = "05332268-8149-449f-a1f8-1efadd17166f"
|
||||
EMAIL_ADDRESS = "marti@agile611.com"
|
||||
|
||||
AUTHORITY = "https://login.microsoftonline.com/884a3c53-8a5a-4d79-b0e0-a62ab5a794a1"
|
||||
SCOPES = ["https://outlook.office.com/IMAP.AccessAsUser.All"]
|
||||
|
||||
# ==========================================
|
||||
# 2. HELPER FUNCTION: GENERATE CSV
|
||||
# ==========================================
|
||||
def generate_user_csv(email_address, first_name="Marti", last_name="Montfort Ruiz"):
|
||||
username = email_address.split("@")[0]
|
||||
headers = [
|
||||
"originUsername", "targetUsername", "password", "pop3enabled",
|
||||
"pop3password", "aliases", "forwards", "filters",
|
||||
"forename", "surname", "mailboxStatus"
|
||||
]
|
||||
row_data = {
|
||||
"originUsername": username, "targetUsername": username,
|
||||
"password": "TempWebmailPassword123!", "pop3enabled": "true",
|
||||
"pop3password": "", "aliases": "", "forwards": "", "filters": "",
|
||||
"forename": first_name, "surname": last_name, "mailboxStatus": "premium"
|
||||
}
|
||||
csv_filename = f"{username}_import.csv"
|
||||
with open(csv_filename, mode="w", newline="", encoding="utf-8") as file:
|
||||
writer = csv.DictWriter(file, fieldnames=headers, delimiter=";")
|
||||
writer.writeheader()
|
||||
writer.writerow(row_data)
|
||||
print(f"📝 Migration CSV generated: {csv_filename}")
|
||||
|
||||
def main():
|
||||
# ==========================================
|
||||
# 3. GETTING THE OAUTH2 TOKEN
|
||||
# ==========================================
|
||||
print("🔄 Initializing Microsoft Authentication...")
|
||||
app = msal.PublicClientApplication(CLIENT_ID, authority=AUTHORITY)
|
||||
flow = app.initiate_device_flow(scopes=SCOPES)
|
||||
|
||||
if "user_code" not in flow:
|
||||
raise ValueError("Failed to create device flow. Check your Client ID and Azure settings.")
|
||||
|
||||
print("\n🚨 ACTION REQUIRED 🚨")
|
||||
print(flow["message"])
|
||||
print("\n⏳ Waiting for browser authentication...")
|
||||
|
||||
result = app.acquire_token_by_device_flow(flow)
|
||||
if "access_token" not in result:
|
||||
print("\n❌ Failed to get token:", result.get("error_description"))
|
||||
return
|
||||
|
||||
access_token = result["access_token"]
|
||||
print("\n✅ Access Token Acquired Successfully!")
|
||||
|
||||
# ==========================================
|
||||
# 4. CONNECTING TO IMAP & DOWNLOADING ALL FOLDERS
|
||||
# ==========================================
|
||||
print("\n🔌 Connecting to Outlook IMAP server...")
|
||||
auth_string = f"user={EMAIL_ADDRESS}\x01auth=Bearer {access_token}\x01\x01"
|
||||
|
||||
try:
|
||||
mail = imaplib.IMAP4_SSL("outlook.office365.com", 993)
|
||||
mail.authenticate("XOAUTH2", lambda x: auth_string.encode("utf-8"))
|
||||
print("✅ Successfully logged into IMAP via OAuth2!")
|
||||
|
||||
# Base directory for this user
|
||||
username = EMAIL_ADDRESS.split("@")[0]
|
||||
base_download_dir = f"downloaded_emails_{username}"
|
||||
os.makedirs(base_download_dir, exist_ok=True)
|
||||
|
||||
# Fetch all folders in the mailbox
|
||||
status, folders = mail.list()
|
||||
if status == "OK":
|
||||
print(f"📂 Found {len(folders)} folders. Starting full account backup...\n")
|
||||
|
||||
for folder_data in folders:
|
||||
folder_string = folder_data.decode('utf-8')
|
||||
|
||||
# Skip unselectable folders (like root directory markers)
|
||||
if "\\Noselect" in folder_string:
|
||||
continue
|
||||
|
||||
# Safely extract the folder name (handles spaces and quotes)
|
||||
match = re.search(r'\"([^\"]+)\"$', folder_string)
|
||||
folder_name = match.group(1) if match else folder_string.split()[-1].strip('"')
|
||||
|
||||
print(f"📁 Scanning folder: {folder_name}")
|
||||
|
||||
# Select the folder (readonly to prevent accidental modifications)
|
||||
status, _ = mail.select(f'"{folder_name}"', readonly=True)
|
||||
if status != "OK":
|
||||
print(f" ⚠️ Could not open {folder_name}. Skipping.")
|
||||
continue
|
||||
|
||||
status, data = mail.search(None, "ALL")
|
||||
email_ids = data[0].split()
|
||||
|
||||
if not email_ids:
|
||||
print(" ↳ Folder is empty.")
|
||||
continue
|
||||
|
||||
print(f" ↳ Found {len(email_ids)} emails. Downloading...")
|
||||
|
||||
# Create a safe subfolder name for the OS
|
||||
safe_folder_name = "".join([c for c in folder_name if c.isalnum() or c in (' ', '-', '_')]).strip()
|
||||
folder_dir = os.path.join(base_download_dir, safe_folder_name)
|
||||
os.makedirs(folder_dir, exist_ok=True)
|
||||
|
||||
# Download each email into its respective folder
|
||||
for e_id in email_ids:
|
||||
status, msg_data = mail.fetch(e_id, "(RFC822)")
|
||||
for response_part in msg_data:
|
||||
if isinstance(response_part, tuple):
|
||||
raw_email = response_part[1]
|
||||
file_path = os.path.join(folder_dir, f"email_{e_id.decode('utf-8')}.eml")
|
||||
with open(file_path, "wb") as f:
|
||||
f.write(raw_email)
|
||||
|
||||
print(f"\n🎉 All folders successfully downloaded to '{base_download_dir}'!")
|
||||
|
||||
mail.logout()
|
||||
print("👋 Logged out successfully.\n")
|
||||
|
||||
# ==========================================
|
||||
# 5. GENERATE THE CSV
|
||||
# ==========================================
|
||||
print("⚙️ Generating configuration files...")
|
||||
generate_user_csv(EMAIL_ADDRESS)
|
||||
print("🎉 Migration prep complete!")
|
||||
|
||||
except imaplib.IMAP4.error as e:
|
||||
print(f"\n❌ IMAP Authentication failed: {e}")
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
33
pdfs/split_pdf_pages.sh
Normal file
33
pdfs/split_pdf_pages.sh
Normal file
@@ -0,0 +1,33 @@
|
||||
#!/bin/bash
|
||||
|
||||
# Check if the input file is provided
|
||||
if [ "$#" -ne 1 ]; then
|
||||
echo "Usage: $0 input.pdf"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
# Input PDF file
|
||||
input_pdf="$1"
|
||||
|
||||
# Check if the file exists
|
||||
if [ ! -f "$input_pdf" ]; then
|
||||
echo "Error: File '$input_pdf' not found."
|
||||
exit 1
|
||||
fi
|
||||
|
||||
# Create an output directory
|
||||
output_dir="${input_pdf%.pdf}_pages"
|
||||
mkdir -p "$output_dir"
|
||||
|
||||
# Get the number of pages in the PDF
|
||||
total_pages=$(pdftk "$input_pdf" dump_data | grep NumberOfPages | awk '{print $2}')
|
||||
|
||||
# Split each page into a separate PDF
|
||||
echo "Splitting '$input_pdf' into separate pages..."
|
||||
for ((i=1; i<=total_pages; i++)); do
|
||||
output_file="$output_dir/page_$i.pdf"
|
||||
pdftk "$input_pdf" cat "$i" output "$output_file"
|
||||
echo "Created $output_file"
|
||||
done
|
||||
|
||||
echo "All pages have been split and saved in '$output_dir'."
|
||||
7
replace_remove/fix-all-special-characters.sh
Executable file
7
replace_remove/fix-all-special-characters.sh
Executable file
@@ -0,0 +1,7 @@
|
||||
#!/bin/bash
|
||||
|
||||
./remove_parentheses_files_folders.sh $1
|
||||
./replace_hyphen_with_underscores.sh $1
|
||||
./replace_spaces_with_underscores.sh $1
|
||||
./replace_under_hyphen_under_with_underscores.sh $1
|
||||
./replace_triple_with_underscores.sh $1
|
||||
23
replace_remove/remove_parentheses_files_folders.sh
Executable file
23
replace_remove/remove_parentheses_files_folders.sh
Executable file
@@ -0,0 +1,23 @@
|
||||
#!/bin/bash
|
||||
|
||||
# Function to rename files and folders recursively
|
||||
rename_files_and_folders() {
|
||||
find "$1" -depth | while read -r item; do
|
||||
# Remove parentheses from names
|
||||
new_item=$(echo "$item" | sed 's/[()]//g')
|
||||
|
||||
# Rename item if the new name is different
|
||||
if [ "$item" != "$new_item" ]; then
|
||||
mv "$item" "$new_item"
|
||||
echo "Renamed: $item -> $new_item"
|
||||
fi
|
||||
done
|
||||
}
|
||||
|
||||
# Starting directory (default is current directory if no argument is provided)
|
||||
start_dir="${1:-.}"
|
||||
|
||||
echo "Starting from directory: $start_dir"
|
||||
rename_files_and_folders "$start_dir"
|
||||
|
||||
echo "Done!"
|
||||
23
replace_remove/remove_parentheses_folders.sh
Executable file
23
replace_remove/remove_parentheses_folders.sh
Executable file
@@ -0,0 +1,23 @@
|
||||
#!/bin/bash
|
||||
|
||||
# Function to rename folders recursively
|
||||
rename_folders() {
|
||||
find "$1" -depth -type d | while read -r dir; do
|
||||
# Remove parentheses from folder names
|
||||
new_dir=$(echo "$dir" | sed 's/[()]//g')
|
||||
|
||||
# Rename folder if the new name is different
|
||||
if [ "$dir" != "$new_dir" ]; then
|
||||
mv "$dir" "$new_dir"
|
||||
echo "Renamed: $dir -> $new_dir"
|
||||
fi
|
||||
done
|
||||
}
|
||||
|
||||
# Starting directory (default is current directory if no argument is provided)
|
||||
start_dir="${1:-.}"
|
||||
|
||||
echo "Starting from directory: $start_dir"
|
||||
rename_folders "$start_dir"
|
||||
|
||||
echo "Done!"
|
||||
23
replace_remove/replace_hyphen_with_underscores.sh
Executable file
23
replace_remove/replace_hyphen_with_underscores.sh
Executable file
@@ -0,0 +1,23 @@
|
||||
#!/bin/bash
|
||||
|
||||
# Function to rename files and folders recursively
|
||||
replace_spaces_with_underscores() {
|
||||
find "$1" -depth | while read -r item; do
|
||||
# Replace spaces with underscores in names
|
||||
new_item=$(echo "$item" | sed 's/-/_/g')
|
||||
|
||||
# Rename item if the new name is different
|
||||
if [ "$item" != "$new_item" ]; then
|
||||
mv "$item" "$new_item"
|
||||
echo "Renamed: $item -> $new_item"
|
||||
fi
|
||||
done
|
||||
}
|
||||
|
||||
# Starting directory (default is current directory if no argument is provided)
|
||||
start_dir="${1:-.}"
|
||||
|
||||
echo "Starting from directory: $start_dir"
|
||||
replace_spaces_with_underscores "$start_dir"
|
||||
|
||||
echo "Done!"
|
||||
23
replace_remove/replace_spaces_with_underscores.sh
Executable file
23
replace_remove/replace_spaces_with_underscores.sh
Executable file
@@ -0,0 +1,23 @@
|
||||
#!/bin/bash
|
||||
|
||||
# Function to rename files and folders recursively
|
||||
replace_spaces_with_underscores() {
|
||||
find "$1" -depth | while read -r item; do
|
||||
# Replace spaces with underscores in names
|
||||
new_item=$(echo "$item" | sed 's/ /_/g')
|
||||
|
||||
# Rename item if the new name is different
|
||||
if [ "$item" != "$new_item" ]; then
|
||||
mv "$item" "$new_item"
|
||||
echo "Renamed: $item -> $new_item"
|
||||
fi
|
||||
done
|
||||
}
|
||||
|
||||
# Starting directory (default is current directory if no argument is provided)
|
||||
start_dir="${1:-.}"
|
||||
|
||||
echo "Starting from directory: $start_dir"
|
||||
replace_spaces_with_underscores "$start_dir"
|
||||
|
||||
echo "Done!"
|
||||
23
replace_remove/replace_triple_with_underscores.sh
Executable file
23
replace_remove/replace_triple_with_underscores.sh
Executable file
@@ -0,0 +1,23 @@
|
||||
#!/bin/bash
|
||||
|
||||
# Function to rename files and folders recursively
|
||||
replace_spaces_with_underscores() {
|
||||
find "$1" -depth | while read -r item; do
|
||||
# Replace spaces with underscores in names
|
||||
new_item=$(echo "$item" | sed 's/___/_/g')
|
||||
|
||||
# Rename item if the new name is different
|
||||
if [ "$item" != "$new_item" ]; then
|
||||
mv "$item" "$new_item"
|
||||
echo "Renamed: $item -> $new_item"
|
||||
fi
|
||||
done
|
||||
}
|
||||
|
||||
# Starting directory (default is current directory if no argument is provided)
|
||||
start_dir="${1:-.}"
|
||||
|
||||
echo "Starting from directory: $start_dir"
|
||||
replace_spaces_with_underscores "$start_dir"
|
||||
|
||||
echo "Done!"
|
||||
23
replace_remove/replace_under_hyphen_under_with_underscores.sh
Executable file
23
replace_remove/replace_under_hyphen_under_with_underscores.sh
Executable file
@@ -0,0 +1,23 @@
|
||||
#!/bin/bash
|
||||
|
||||
# Function to rename files and folders recursively
|
||||
replace_spaces_with_underscores() {
|
||||
find "$1" -depth | while read -r item; do
|
||||
# Replace spaces with underscores in names
|
||||
new_item=$(echo "$item" | sed 's/_-_/_/g')
|
||||
|
||||
# Rename item if the new name is different
|
||||
if [ "$item" != "$new_item" ]; then
|
||||
mv "$item" "$new_item"
|
||||
echo "Renamed: $item -> $new_item"
|
||||
fi
|
||||
done
|
||||
}
|
||||
|
||||
# Starting directory (default is current directory if no argument is provided)
|
||||
start_dir="${1:-.}"
|
||||
|
||||
echo "Starting from directory: $start_dir"
|
||||
replace_spaces_with_underscores "$start_dir"
|
||||
|
||||
echo "Done!"
|
||||
29
wordpress/export-articles.py
Normal file
29
wordpress/export-articles.py
Normal file
@@ -0,0 +1,29 @@
|
||||
import requests
|
||||
import csv
|
||||
|
||||
posts = []
|
||||
page = 1
|
||||
|
||||
while True:
|
||||
r = requests.get(f"https://www.agile611.com/wp-json/wp/v2/posts?per_page=100&page={page}")
|
||||
r.encoding = 'utf-8'
|
||||
data = r.json()
|
||||
if not isinstance(data, list) or not data:
|
||||
break
|
||||
for post in data:
|
||||
if isinstance(post, dict) and 'title' in post and isinstance(post['title'], dict):
|
||||
title = post['title']['rendered']
|
||||
else:
|
||||
title = str(post.get('title', ''))
|
||||
title = title.replace(',', '') # Remove commas from the title
|
||||
date = post.get('date', '')
|
||||
link = post.get('link', '')
|
||||
posts.append([title, date, link])
|
||||
page += 1
|
||||
|
||||
with open('agile611_posts.csv', 'w', newline='', encoding='utf-8-sig') as f:
|
||||
writer = csv.writer(f)
|
||||
writer.writerow(['Title', 'Date', 'URL'])
|
||||
writer.writerows(posts)
|
||||
|
||||
print(f"Saved {len(posts)} posts to agile611_posts.csv")
|
||||
Reference in New Issue
Block a user