Added all

This commit is contained in:
Guillem Hernandez Sola
2026-04-07 19:37:59 +02:00
commit da6dabcc62
42 changed files with 1959 additions and 0 deletions

177
.gitignore vendored Normal file
View File

@@ -0,0 +1,177 @@
# Created by https://www.toptal.com/developers/gitignore/api/intellij+all,zsh
# Edit at https://www.toptal.com/developers/gitignore?templates=intellij+all,zsh
media/downloads
media/xhs_videos
### Intellij+all ###
# Covers JetBrains IDEs: IntelliJ, RubyMine, PhpStorm, AppCode, PyCharm, CLion, Android Studio, WebStorm and Rider
# Reference: https://intellij-support.jetbrains.com/hc/en-us/articles/206544839
.DS_Store
# User-specific stuff
.idea/**/workspace.xml
.idea/**/tasks.xml
.idea/**/usage.statistics.xml
.idea/**/dictionaries
.idea/**/shelf
# AWS User-specific
.idea/**/aws.xml
*.txt
*.csv
*.xlsx
*.xls
.env
*.env
# Generated files
.idea/**/contentModel.xml
# Sensitive or high-churn files
.idea/**/dataSources/
.idea/**/dataSources.ids
.idea/**/dataSources.local.xml
.idea/**/sqlDataSources.xml
.idea/**/dynamic.xml
.idea/**/uiDesigner.xml
.idea/**/dbnavigator.xml
# Gradle
.idea/**/gradle.xml
.idea/**/libraries
# Gradle and Maven with auto-import
# When using Gradle or Maven with auto-import, you should exclude module files,
# since they will be recreated, and may cause churn. Uncomment if using
# auto-import.
# .idea/artifacts
# .idea/compiler.xml
# .idea/jarRepositories.xml
# .idea/modules.xml
# .idea/*.iml
# .idea/modules
# *.iml
# *.ipr
# CMake
cmake-build-*/
# Mongo Explorer plugin
.idea/**/mongoSettings.xml
# File-based project format
*.iws
# IntelliJ
out/
# mpeltonen/sbt-idea plugin
.idea_modules/
# JIRA plugin
atlassian-ide-plugin.xml
# Cursive Clojure plugin
.idea/replstate.xml
# SonarLint plugin
.idea/sonarlint/
# Crashlytics plugin (for Android Studio and IntelliJ)
com_crashlytics_export_strings.xml
crashlytics.properties
crashlytics-build.properties
fabric.properties
# Editor-based Rest Client
.idea/httpRequests
# Android studio 3.1+ serialized cache file
.idea/caches/build_file_checksums.ser
### Intellij+all Patch ###
# Ignore everything but code style settings and run configurations
# that are supposed to be shared within teams.
.idea/*
!.idea/codeStyles
!.idea/runConfigurations
### Zsh ###
# Zsh compiled script + zrecompile backup
*.zwc
*.zwc.old
# Zsh completion-optimization dumpfile
*zcompdump*
# Zsh history
.zsh_history
# Zsh sessions
.zsh_sessions
# Zsh zcalc history
.zcalc_history
# A popular plugin manager's files
._zinit
.zinit_lstupd
# zdharma/zshelldoc tool's files
zsdoc/data
# robbyrussell/oh-my-zsh/plugins/per-directory-history plugin's files
# (when set-up to store the history in the local directory)
.directory_history
# MichaelAquilina/zsh-autoswitch-virtualenv plugin's files
# (for Zsh plugins using Python)
.venv
# Zunit tests' output
/tests/_output/*
!/tests/_output/.gitkeep
# End of https://www.toptal.com/developers/gitignore/api/intellij+all,zsh
downloads/
# Created by https://www.toptal.com/developers/gitignore/api/macos
# Edit at https://www.toptal.com/developers/gitignore?templates=macos
### macOS ###
# General
.DS_Store
.AppleDouble
.LSOverride
# Icon must end with two \r
Icon
# Thumbnails
._*
# Files that might appear in the root of a volume
.DocumentRevisions-V100
.fseventsd
.Spotlight-V100
.TemporaryItems
.Trashes
.VolumeIcon.icns
.com.apple.timemachine.donotpresent
# Directories potentially created on remote AFP share
.AppleDB
.AppleDesktop
Network Trash Folder
Temporary Items
.apdisk
### macOS Patch ###
# iCloud generated files
*.icloud
*.csv
# End of https://www.toptal.com/developers/gitignore/api/macos

1
README.md Normal file
View File

@@ -0,0 +1 @@
# scripts

View File

@@ -0,0 +1,9 @@
#!/bin/bash
# Directorio base desde donde buscar (puedes cambiarlo o usar el argumento del script)
DIRECTORIO=${1:-.}
# Encuentra archivos mayores a 1GB y muestra su tamaño ordenado
echo "Archivos mayores a 1GB en el directorio: $DIRECTORIO"
find "$DIRECTORIO" -type f -size +1G -exec du -h {} + | sort -hr
echo "Búsqueda completada."

View File

@@ -0,0 +1,57 @@
import pandas as pd
import os
import sys
print("🔍 --- Filtrant Excel segons usuaris actius del CSV ---")
# Noms dels fitxers
fitxer_csv = "Newsletter_Combinat_Final_Actius.csv"
fitxer_excel = "Contactos_Unificats.xlsx"
fitxer_resultat = "Contactos_Unificats_Filtrats.xlsx"
# Comprovem que existeixen
if not os.path.exists(fitxer_csv) or not os.path.exists(fitxer_excel):
print("❌ Error: Assegura't que els fitxers CSV i Excel són a la mateixa carpeta.")
sys.exit(1)
try:
print("⏳ Llegint els usuaris actius del CSV...")
# Llegim el CSV (sabem que està separat per punt i coma)
df_csv = pd.read_csv(fitxer_csv, sep=';', encoding='utf-8')
# Extraiem els correus de la columna 'Email_ID', els passem a minúscules i traiem espais en blanc
# per assegurar-nos que coincideixen perfectament
correus_actius = set(df_csv['Email_ID'].dropna().astype(str).str.lower().str.strip())
print(f"✅ S'han carregat {len(correus_actius)} correus únics del CSV.")
print("⏳ Processant l'Excel pestanya per pestanya...")
# Obrim l'Excel original i preparem el nou per escriure
excel_original = pd.ExcelFile(fitxer_excel)
with pd.ExcelWriter(fitxer_resultat, engine='openpyxl') as writer:
for nom_pestanya in excel_original.sheet_names:
# Llegim la pestanya actual
df_pestanya = pd.read_excel(excel_original, sheet_name=nom_pestanya)
total_inicial = len(df_pestanya)
# Normalitzem la columna 'email' de l'Excel per poder comparar-la
if 'email' in df_pestanya.columns:
correus_excel = df_pestanya['email'].astype(str).str.lower().str.strip()
# Filtrem: ens quedem només amb les files on el correu estigui a la nostra llista d'actius
df_filtrat = df_pestanya[correus_excel.isin(correus_actius)]
total_final = len(df_filtrat)
print(f" 👉 Pestanya '{nom_pestanya}': s'han mantingut {total_final} de {total_inicial} contactes.")
else:
print(f" ⚠️ Avís: La pestanya '{nom_pestanya}' no té cap columna anomenada 'email'. Es deixa intacta.")
df_filtrat = df_pestanya
# Guardem el resultat a la mateixa pestanya del nou fitxer
df_filtrat.to_excel(writer, sheet_name=nom_pestanya, index=False)
print(f"\n🎉 Procés completat amb èxit!")
print(f"📄 S'ha creat el fitxer amb els contactes filtrats: {fitxer_resultat}")
except Exception as e:
print(f"❌ S'ha produït un error: {e}")

View File

@@ -0,0 +1,67 @@
import csv
import glob
import os
import sys
# 1. Comprovem si s'ha passat la ruta com a argument
if len(sys.argv) < 2:
print("❌ Error: Has d'indicar la ruta de la carpeta.")
print("💡 Ús correcte: python juntar_csv.py /ruta/de/la/carpeta")
sys.exit(1)
# 2. Agafem la ruta (el primer argument després del nom de l'script)
carpeta_origen = sys.argv[1]
# Netegem possibles cometes que la terminal hagi afegit
carpeta_origen = carpeta_origen.strip("'").strip('"').strip()
# 3. Comprovem que la carpeta existeix
if not os.path.isdir(carpeta_origen):
print(f"❌ Error: La ruta '{carpeta_origen}' no és vàlida o no és una carpeta.")
sys.exit(1)
print(f"📁 Cercant fitxers CSV a: {carpeta_origen}")
ruta_cerca = os.path.join(carpeta_origen, "*.csv")
fitxers_csv = glob.glob(ruta_cerca)
# Definim el nom del fitxer final
fitxer_resultat = os.path.join(carpeta_origen, "Newsletter_Combinat_Final.csv")
if not fitxers_csv:
print("⚠️ No s'han trobat fitxers CSV a la carpeta indicada.")
else:
# Filtrem per no incloure el fitxer resultat si ja existeix d'una execució anterior
fitxers_a_processar = [f for f in fitxers_csv if f != fitxer_resultat]
if not fitxers_a_processar:
print("⚠️ Només s'ha trobat el fitxer combinat anterior. No hi ha res de nou per unir.")
sys.exit(0)
print(f"🔄 S'han trobat {len(fitxers_a_processar)} fitxers. Processant...")
# 4. Unifiquem els fitxers
with open(fitxer_resultat, 'w', newline='', encoding='utf-8') as sortida:
escrivent = csv.writer(sortida, delimiter=';')
fitxers_processats = 0
for nom_fitxer in fitxers_a_processar:
with open(nom_fitxer, 'r', encoding='utf-8') as f:
lector = csv.reader(f, delimiter=';')
try:
capcalera = next(lector)
except StopIteration:
continue # Saltem el fitxer si està completament buit
# Escrivim la capçalera només pel primer fitxer vàlid
if fitxers_processats == 0:
escrivent.writerow(capcalera)
# Escrivim la resta de dades
for fila in lector:
escrivent.writerow(fila)
fitxers_processats += 1
print(f"✅ Èxit! S'han combinat {fitxers_processats} fitxers.")
print(f"📄 Fitxer creat a: {fitxer_resultat}")

73
file-management/mautic.py Normal file
View File

@@ -0,0 +1,73 @@
import pandas as pd
import csv
def filtrar_i_consolidar_etiquetes(tags_str):
"""Filtra i agrupa les etiquetes segons les regles de negoci."""
if pd.isna(tags_str) or str(tags_str).strip().lower() == 'nan':
return ''
# 1. Separem les etiquetes i les netegem
llista_tags = [t.strip().lower() for t in str(tags_str).split(',')]
# Utilitzem un 'set' per evitar etiquetes duplicades al final
tags_finals = set()
for t in llista_tags:
# 2. Regla de conservació directa: test_, int_ i rrhh
if t.startswith('test_') or t.startswith('int_') or t == 'rrhh':
tags_finals.add(t)
continue
# 3. Regles d'agrupació (l'ordre és important per als prefixos llargs)
if t.startswith('skupspo2-') or t.startswith('pspo2-'):
tags_finals.add('pspo2')
elif t.startswith('pspo-'):
tags_finals.add('pspo')
elif t.startswith('psm2-'):
tags_finals.add('psm2')
elif t.startswith('psm-'):
tags_finals.add('psm')
elif t.startswith('psux-') or t.startswith('psu-'):
tags_finals.add('psu')
elif t.startswith('sps-'):
tags_finals.add('sps')
elif t.startswith('safe-'):
tags_finals.add('safe-ls')
elif t.startswith('pal_ebm-'):
tags_finals.add('pal_ebm')
elif t.startswith('pal-'):
tags_finals.add('pal')
# 4. Si tenim etiquetes vàlides, les ordenem, les unim amb | i hi posem cometes
if tags_finals:
# Ordenar-les (sorted) fa que el resultat sigui més fàcil de llegir al CSV
tags_units = '|'.join(sorted(list(tags_finals)))
return f'"{tags_units}"'
else:
return ''
# --- INICI DEL PROCÉS ---
# 1. Carregar el fitxer original
df = pd.read_excel('contactes_mautic/Contactos_2602026_7881pax.xlsx')
# 2. Seleccionar només les columnes que ens interessen
columnes_mautic = ['email', 'nombre_x', 'apellidos_x', '*ciudad_x', '*pais_x', 'etiquetas_x']
df_net = df[columnes_mautic].copy()
# 3. Reanomenar les columnes perquè Mautic les auto-detecti
df_net.rename(columns={
'nombre_x': 'firstname',
'apellidos_x': 'lastname',
'*ciudad_x': 'city',
'*pais_x': 'country',
'etiquetas_x': 'etiquetes'
}, inplace=True)
# 4. APLICAR EL FILTRE I LA CONSOLIDACIÓ
df_net['etiquetes'] = df_net['etiquetes'].apply(filtrar_i_consolidar_etiquetes)
# 5. Guardar com a CSV llest per importar
df_net.to_csv('Contactes_Mautic_Consolidats.csv', index=False, encoding='utf-8', quoting=csv.QUOTE_NONE, escapechar='\\')
print("✅ Fitxer preparat! Les etiquetes s'han filtrat i agrupat correctament.")

View File

@@ -0,0 +1,84 @@
import pandas as pd
import os
import csv
def simplificar_etiqueta(tag):
"""Filtra i redueix les etiquetes a la seva essència."""
tag = tag.strip().lower()
# 1. Traducció dels noms llargs oficials
if 'professional-scrum-master' in tag: return 'psm'
if 'professional-scrum-product-owner' in tag: return 'pspo'
if 'professional-scrum-developer' in tag: return 'psd'
if 'professional-agile-leadership' in tag: return 'pal'
# 2. Protecció de nivells avançats (Molt important posar-ho abans que els normals)
if 'psm2' in tag or 'psm-2' in tag: return 'psm2'
if 'pspo2' in tag or 'pspo-2' in tag: return 'pspo2'
if 'pspo-a' in tag: return 'pspo-a'
# 3. Agrupació per paraula clau (Si conté la paraula, s'assigna a l'etiqueta base)
if 'psm' in tag: return 'psm'
if 'pspo' in tag: return 'pspo'
if 'psu' in tag: return 'psu'
if 'psd' in tag: return 'psd'
if 'aps' in tag: return 'aps'
if 'apk' in tag: return 'apk'
if 'pal' in tag: return 'pal'
if 'sps' in tag: return 'sps'
# 4. Altres agrupacions útils que he detectat a la teva llista
if 'okr' in tag: return 'okr'
if 'scrumday' in tag or 'sd20' in tag or 'sd21' in tag: return 'scrumday'
# 5. Si no és cap de les importants, la deixem tal qual (però neta de test_)
if tag.startswith('test_'):
return tag.replace('test_', 'int_', 1)
return tag
# --- INICI DEL PROCÉS ---
nom_fitxer = 'contactes_mautic/Contactos_2602026_7881pax.xlsx'
print(f"Llegint el fitxer {nom_fitxer}...")
df = pd.read_excel(nom_fitxer)
columnes = {
'email': 'email',
'nombre_x': 'firstname',
'apellidos_x': 'lastname',
'*ciudad_x': 'city',
'*pais_x': 'country',
'etiquetas_x': 'tags'
}
df_net = df[list(columnes.keys())].rename(columns=columnes)
df_net = df_net.dropna(subset=['email', 'tags'])
carpeta_sortida = 'Mautic_CSVs_per_Tag'
os.makedirs(carpeta_sortida, exist_ok=True)
# Apliquem la nostra súper funció de simplificació
df_net['tag_individual'] = df_net['tags'].apply(
lambda x: [simplificar_etiqueta(tag) for tag in str(x).split(',') if tag.strip()]
)
# Expandim i eliminem duplicats
df_exploded = df_net.explode('tag_individual')
df_exploded = df_exploded.drop_duplicates(subset=['email', 'tag_individual'])
etiquetes_uniques = df_exploded['tag_individual'].unique()
print(f"🎉 Màgia feta! Hem reduït centenars d'etiquetes a només {len(etiquetes_uniques)} úniques.")
for tag in etiquetes_uniques:
df_tag = df_exploded[df_exploded['tag_individual'] == tag].copy()
df_tag['tags'] = tag
df_tag = df_tag.rename(columns={'tags': tag})
df_tag = df_tag.drop(columns=['tag_individual'])
nom_tag_net = str(tag).replace(' ', '_').replace('/', '_').replace(':', '')
ruta_fitxer = os.path.join(carpeta_sortida, f"etiqueta_{nom_tag_net}.csv")
df_tag.to_csv(ruta_fitxer, index=False, encoding='utf-8', sep=',', quoting=csv.QUOTE_ALL)
print(f"✅ Tots els fitxers nets estan a la carpeta '{carpeta_sortida}'.")

View File

@@ -0,0 +1,17 @@
import pandas as pd
# 1. Carregar el fitxer original
df = pd.read_excel('contactes_mautic/Contactos_2602026_7881pax.xlsx')
# 2. Quedar-nos només amb els emails vàlids
df_net = df.dropna(subset=['email'])[['email']].copy()
# 3. Ordre d'esborrat múltiple
# Afegim el signe '-' davant de l'etiqueta estranya i també del 'nan' normal
# La 'r' al davant indica a Python que llegeixi les barres invertides literalment
df_net['etiquetes'] = r'-\"-nan\"|-nan'
# 4. Guardem el CSV de manera natural (sense forçar paràmetres d'escapament)
df_net.to_csv('Neteja_Definitiva_Tags.csv', index=False, encoding='utf-8')
print("✅ Fitxer preparat! Les etiquetes errònies s'han marcat per ser esborrades.")

View File

@@ -0,0 +1,53 @@
import csv
import sys
import os
# Comprovem que s'ha passat el fitxer com a argument
if len(sys.argv) < 2:
print("❌ Error: Has d'indicar la ruta del fitxer CSV.")
print("💡 Ús correcte: python netejar_desuscrits.py /ruta/al/Newsletter_Combinat_Final.csv")
sys.exit(1)
fitxer_origen = sys.argv[1].strip("'").strip('"').strip()
if not os.path.isfile(fitxer_origen):
print(f"❌ Error: No s'ha trobat el fitxer '{fitxer_origen}'.")
sys.exit(1)
fitxer_desti = fitxer_origen.replace(".csv", "_Actius.csv")
usuaris_eliminats = 0
usuaris_actius = 0
print(f"🔍 Analitzant el fitxer: {fitxer_origen}")
with open(fitxer_origen, 'r', encoding='utf-8') as f_in, \
open(fitxer_desti, 'w', newline='', encoding='utf-8') as f_out:
lector = csv.reader(f_in, delimiter=';')
escrivent = csv.writer(f_out, delimiter=';')
# Llegim i escrivim la capçalera
capcalera = next(lector)
escrivent.writerow(capcalera)
# Busquem l'índex de la columna de desuscripció
try:
index_unsub = capcalera.index("Unsubscribe_Date")
except ValueError:
print("❌ Error: No s'ha trobat la columna 'Unsubscribe_Date' al fitxer.")
sys.exit(1)
# Filtrem les files
for fila in lector:
# Si la columna està buida, l'usuari és actiu
if len(fila) > index_unsub and not fila[index_unsub].strip():
escrivent.writerow(fila)
usuaris_actius += 1
else:
usuaris_eliminats += 1
print("✅ Neteja completada amb èxit!")
print(f"📉 Usuaris desuscrits eliminats: {usuaris_eliminats}")
print(f"📈 Usuaris actius conservats: {usuaris_actius}")
print(f"📄 Nou fitxer creat a: {fitxer_desti}")

View File

@@ -0,0 +1,32 @@
import pandas as pd
# 1. Definir los nombres de los archivos
archivo_entrada = 'Contactos_2602026_totales.xlsx'
archivo_salida = 'Contactos_2602026_Limpios.xlsx'
print(f"Cargando el archivo: {archivo_entrada}...")
# 2. Leer el archivo Excel
# Asumimos que los datos están en la primera hoja (Sheet1)
df = pd.read_excel(archivo_entrada)
# Contar cuántos registros hay inicialmente
total_inicial = len(df)
print(f"Registros iniciales encontrados: {total_inicial}")
# 3. Eliminar duplicados basados en la columna 'email'
# keep='first' conserva la primera aparición del correo y elimina las siguientes
df_limpio = df.drop_duplicates(subset=['email'], keep='first')
# Contar cuántos registros quedaron y cuántos se eliminaron
total_final = len(df_limpio)
duplicados_eliminados = total_inicial - total_final
print(f"Se han eliminado {duplicados_eliminados} contactos duplicados.")
print(f"Registros finales únicos: {total_final}")
# 4. Exportar el resultado a un nuevo archivo Excel
print("Guardando el nuevo archivo limpio...")
df_limpio.to_excel(archivo_salida, index=False)
print(f"¡Proceso completado! Archivo guardado como: {archivo_salida}")

View File

@@ -0,0 +1,46 @@
import pandas as pd
import numpy as np
import math
# Load the Excel file
file_name = 'Contactos_2602026_7881pax.xlsx'
print("Llegint el fitxer Excel...")
# --- CORRECCIÓ AQUÍ ---
# Fem servir read_excel en lloc de read_csv
df = pd.read_excel(file_name)
# ----------------------
# Display head and info to understand the structure
print(df.head())
print(df.info())
# Total number of rows
total_rows = len(df)
print(f"Total rows: {total_rows}")
# Number of splits
n = 6
# Fem servir math.ceil o np.ceil per assegurar que no deixem files fora
chunk_size = int(np.ceil(total_rows / n))
# Split and save
output_files = []
print(f"Dividint en {n} parts...")
for i in range(n):
start_row = i * chunk_size
end_row = min((i + 1) * chunk_size, total_rows)
# Si per algun motiu start_row supera el total, parem
if start_row >= total_rows:
break
chunk = df.iloc[start_row:end_row]
output_filename = f'Contactos_linkedin_part_{i+1}.xlsx'
chunk.to_excel(output_filename, index=False)
output_files.append(output_filename)
print(f"Files created: {output_files}")

104
file-management/teams.py Normal file
View File

@@ -0,0 +1,104 @@
import pandas as pd
import glob
import re
import os
import csv
def extreure_minuts(temps_str):
"""Converteix cadenes de text com '2h 15m', '45m' o '1h' a minuts totals."""
if pd.isna(temps_str):
return 0
temps_str = str(temps_str).lower()
hores = 0
minuts = 0
match_h = re.search(r'(\d+)\s*h', temps_str)
if match_h:
hores = int(match_h.group(1))
match_m = re.search(r'(\d+)\s*m', temps_str)
if match_m:
minuts = int(match_m.group(1))
return (hores * 60) + minuts
fitxers = glob.glob("*.csv")
dades_alumnes = {}
if not fitxers:
print("⚠️ No s'han trobat fitxers CSV a la carpeta actual.")
else:
print(f"S'han trobat {len(fitxers)} fitxers. Processant dades...\n")
for fitxer in fitxers:
try:
# 1. Obrim el fitxer manualment per buscar a quina fila comencen les dades
with open(fitxer, 'r', encoding='utf-16') as f:
linies = f.readlines()
fila_capcalera = -1
separador = '\t'
# Busquem la línia que conté "Nom" o "Nombre" o "Name"
for i, linia in enumerate(linies):
if 'Nom' in linia or 'Nombre' in linia or 'Name' in linia:
fila_capcalera = i
# Detectem si fa servir tabulacions o comes
if ',' in linia and '\t' not in linia:
separador = ','
break
if fila_capcalera == -1:
print(f"⚠️ Saltant '{fitxer}': No s'ha trobat cap fila amb la paraula 'Nom'.")
continue
# 2. Llegim el CSV dient-li exactament on comença
df = pd.read_csv(fitxer, sep=separador, encoding='utf-16', skiprows=fila_capcalera)
# 3. Busquem les columnes dinàmicament
col_durada = next((col for col in df.columns if 'durada' in col.lower() or 'duración' in col.lower() or 'duration' in col.lower()), None)
col_nom = next((col for col in df.columns if 'nom' in col.lower() or 'nombre' in col.lower() or 'name' in col.lower()), None)
if not col_nom or not col_durada:
print(f"⚠️ Saltant '{fitxer}': Columnes invàlides. Trobades: {list(df.columns)}")
continue
for index, row in df.iterrows():
nom = row[col_nom]
if pd.isna(nom):
continue
minuts = extreure_minuts(row[col_durada])
# Apliquem la regla del 5 de febrer (límit de 205 minuts)
if "2-05-26" in fitxer and minuts > 205:
minuts = 205
if nom in dades_alumnes:
dades_alumnes[nom] += minuts
else:
dades_alumnes[nom] = minuts
except UnicodeError:
print(f"❌ Error de codificació al fitxer '{fitxer}'. Intenta obrir-lo i guardar-lo de nou.")
except Exception as e:
print(f"❌ Error processant el fitxer '{fitxer}': {e}")
# 4. Resultats
if dades_alumnes:
print("="*50)
print("📊 RESULTATS D'ASSISTÈNCIA (Mínim requerit: 1581 min)")
print("="*50)
for nom in sorted(dades_alumnes.keys()):
minuts_totals = dades_alumnes[nom]
if minuts_totals >= 1581:
estat = "✅ Supera el 80%"
else:
estat = "❌ No arriba"
h = minuts_totals // 60
m = minuts_totals % 60
print(f"{nom}: {minuts_totals} minuts ({h}h {m}m) -> {estat}")

View File

@@ -0,0 +1,40 @@
import pandas as pd
import csv
def preparar_etiquetes_per_esborrar(tags_str):
"""Afegeix un '-' a cada etiqueta, les uneix amb | i les tanca entre cometes."""
# 1. Si la cel·la està buida, la deixem en blanc
if pd.isna(tags_str) or str(tags_str).strip().lower() == 'nan':
return ''
# 2. Separem les etiquetes originals per coma i traiem espais
llista_tags = [t.strip() for t in str(tags_str).split(',')]
# 3. Afegim el signe '-' davant de CADA etiqueta (excepte si està buida)
tags_per_esborrar = [f"-{t}" for t in llista_tags if t]
# 4. Les unim amb | i hi afegim les cometes dobles manualment
if tags_per_esborrar:
tags_units = '|'.join(tags_per_esborrar)
return f'"{tags_units}"'
else:
return ''
# --- INICI DEL PROCÉS ---
# 1. Carregar el fitxer original
df = pd.read_excel('contactes_mautic/Contactos_2602026_7881pax.xlsx')
# 2. Seleccionar només l'email i les etiquetes
df_esborrar = df[['email', 'etiquetas_x']].copy()
# 3. Reanomenar la columna
df_esborrar.rename(columns={'etiquetas_x': 'etiquetes'}, inplace=True)
# 4. APLICAR LA TRANSFORMACIÓ
df_esborrar['etiquetes'] = df_esborrar['etiquetes'].apply(preparar_etiquetes_per_esborrar)
# 5. Guardar el CSV respectant les nostres cometes literals
df_esborrar.to_csv('Contactes_Mautic_Esborrar_Etiquetes.csv', index=False, encoding='utf-8', quoting=csv.QUOTE_NONE, escapechar='\\')
print("✅ Fitxer preparat! Les etiquetes tenen el '-' i estan entre cometes dobles.")

View File

@@ -0,0 +1,19 @@
import pandas as pd
# 1. Cargar el primer archivo (tiene encabezados)
df1 = pd.read_excel('Contactos_linkedin_120126_agile_6589pax_filtrado.xlsx')
# 2. Cargar el segundo archivo (sin encabezados, asignamos los principales)
# Asumimos que las primeras 3 columnas son email, apellidos y nombre
df2 = pd.read_excel('lista_contactos_brevo_20251023_1394pax_AGILE611.xlsx', header=None)
df2.rename(columns={0: 'email', 1: 'apellidos_x', 2: 'nombre_x'}, inplace=True)
# 3. Unir ambos archivos (apilarlos uno debajo del otro)
df_final = pd.concat([df1, df2], ignore_index=True)
# 4. Eliminar contactos duplicados basados en la columna 'email'
df_final.drop_duplicates(subset=['email'], keep='first', inplace=True)
# 5. Exportar el resultado a un nuevo archivo Excel
df_final.to_excel('Contactos_Unificados_Agile611.xlsx', index=False)
print("¡Archivo unificado creado con éxito!")

View File

@@ -0,0 +1,40 @@
import pandas as pd
import os
import sys
print("📊 --- Unificador de fitxers Excel ---")
# Definim els noms dels fitxers d'entrada i el de sortida
fitxer1 = "Contactos_rrhh_040226.xlsx"
fitxer2 = "Contactos_2602026_7881pax.xlsx"
fitxer_resultat = "Contactos_Unificats.xlsx"
# Comprovem que els fitxers existeixen a la carpeta actual
if not os.path.exists(fitxer1):
print(f"❌ Error: No s'ha trobat el fitxer '{fitxer1}'.")
sys.exit(1)
if not os.path.exists(fitxer2):
print(f"❌ Error: No s'ha trobat el fitxer '{fitxer2}'.")
sys.exit(1)
print("⏳ Llegint els fitxers... (això pot trigar uns segons depenent de la mida)")
try:
# Llegim els dos fitxers Excel
df1 = pd.read_excel(fitxer1)
df2 = pd.read_excel(fitxer2)
# Creem el nou fitxer Excel amb múltiples pestanyes
with pd.ExcelWriter(fitxer_resultat, engine='openpyxl') as writer:
# Escrivim cada DataFrame en una pestanya diferent
df1.to_excel(writer, sheet_name='RRHH', index=False)
df2.to_excel(writer, sheet_name='Contactos_7881', index=False)
print(f"✅ Procés completat amb èxit!")
print(f"📄 S'ha creat el fitxer: {fitxer_resultat}")
print(" - Pestanya 1: 'RRHH'")
print(" - Pestanya 2: 'Contactos_7881'")
except Exception as e:
print(f"❌ S'ha produït un error durant el procés: {e}")

View File

@@ -0,0 +1,81 @@
import os
import requests
from dotenv import load_dotenv
# 1. Load the environment variables
load_dotenv()
ACCESS_TOKEN = os.getenv('LINKEDIN_ACCESS_TOKEN')
ORGANIZATION_ID = os.getenv('LINKEDIN_ORG_ID')
# LinkedIn requires a version header (Format: YYYYMM)
API_VERSION = '202602'
def get_all_linkedin_posts(access_token, org_id):
"""
Fetches ALL posts from a specific LinkedIn Organization Page using pagination.
"""
if not access_token or not org_id:
print("🚨 Error: Missing credentials. Please check your .env file.")
return []
url = "https://api.linkedin.com/rest/posts"
headers = {
"Authorization": f"Bearer {access_token}",
"LinkedIn-Version": API_VERSION,
"X-Restli-Protocol-Version": "2.0.0"
}
all_posts = []
start = 0
count = 100 # Maximum allowed by LinkedIn per request
print(f"📥 Starting to fetch posts for Organization ID: {org_id}...")
while True:
params = {
"q": "author",
"author": f"urn:li:organization:{org_id}",
"count": count,
"start": start
}
try:
response = requests.get(url, headers=headers, params=params)
response.raise_for_status()
data = response.json()
elements = data.get('elements', [])
# If no more posts are returned, we've reached the end!
if not elements:
break
all_posts.extend(elements)
print(f"✅ Fetched posts {start + 1} to {start + len(elements)}...")
# Increment the starting point for the next page
start += count
except requests.exceptions.RequestException as e:
print(f"❌ Error fetching posts at offset {start}: {e}")
if response.text:
print(f"LinkedIn API Response: {response.text}")
break
print(f"\n🎉 Finished! Successfully retrieved a total of {len(all_posts)} posts.")
return all_posts
# --- Run the application ---
if __name__ == "__main__":
posts = get_all_linkedin_posts(ACCESS_TOKEN, ORGANIZATION_ID)
# Print a quick preview of the first 3 posts
if posts:
print("\n--- Preview of latest 3 posts ---")
for post in posts[:3]:
# Safely extract the text content
text = post.get('commentary', {}).get('text', 'No text content')
print(f"ID: {post.get('id')}")
print(f"Content: {text[:100]}...\n")

36
media/audio-extractor.sh Executable file
View File

@@ -0,0 +1,36 @@
#!/bin/bash
# Check if ffmpeg is installed
if ! command -v ffmpeg &> /dev/null
then
echo "ffmpeg could not be found. Please install ffmpeg first."
exit 1
fi
# Check for correct number of arguments
if [ "$#" -ne 2 ]; then
echo "Usage: $0 <input_mp4_file> <output_mp3_file>"
exit 1
fi
# Input and output files
INPUT_FILE="$1"
OUTPUT_FILE="$2"
# Check if the input file exists
if [ ! -f "$INPUT_FILE" ]; then
echo "Input file '$INPUT_FILE' does not exist."
exit 1
fi
# Extract MP3 from MP4
ffmpeg -i "$INPUT_FILE" -q:a 0 -map a "$OUTPUT_FILE"
# Check if the operation was successful
if [ $? -eq 0 ]; then
echo "MP3 file successfully created: $OUTPUT_FILE"
else
echo "Failed to extract MP3 from $INPUT_FILE."
exit 1
fi

45
media/convert_mp4_to_webm.sh Executable file
View File

@@ -0,0 +1,45 @@
#!/bin/bash
# Function to convert MP4 to WebM
convert_mp4_to_webm() {
input_file="$1"
output_file="$2"
# Check if input file exists
if [ ! -f "$input_file" ]; then
echo "Error: File '$input_file' does not exist."
exit 1
fi
# Check if the input file is an MP4
if [[ "$input_file" != *.mp4 ]]; then
echo "Error: Input file must be an MP4 file."
exit 1
fi
# Set output file name if not provided
if [ -z "$output_file" ]; then
output_file="${input_file%.mp4}.webm"
fi
# Convert the MP4 to WebM using ffmpeg
ffmpeg -i "$input_file" -c:v libvpx -c:a libvorbis "$output_file"
# Check if conversion was successful
if [ $? -eq 0 ]; then
echo "Successfully converted to '$output_file'."
else
echo "Error: Conversion failed."
exit 1
fi
}
# Check for input arguments
if [ $# -lt 1 ]; then
echo "Usage: $0 <input_file> [output_file]"
exit 1
fi
# Call the function with arguments
convert_mp4_to_webm "$1" "$2"

12
media/copy_cbr_files.sh Normal file
View File

@@ -0,0 +1,12 @@
#!/bin/bash
# Destination directory for .cbr files
DEST_DIR="cbr_files"
# Create the destination directory if it doesn't exist
mkdir -p "$DEST_DIR"
# Find all .cbr files recursively in volum_* directories and copy them to cbr_files
find volum_* -type f -name "*.cbr" -exec cp {} "$DEST_DIR" \;
echo "All .cbr files have been copied to $DEST_DIR."

View File

@@ -0,0 +1,48 @@
#!/bin/bash
# Script to download images from an Instagram post
# Requires: curl, jq
# Function to display usage
function usage() {
echo "Usage: $0 <instagram_post_url>"
exit 1
}
# Check if URL is provided
if [ $# -ne 1 ]; then
usage
fi
# Instagram post URL
POST_URL=$1
# Fetch the HTML of the Instagram post
echo "Fetching post data..."
HTML=$(curl -s -L "$POST_URL")
# Extract image URLs from the HTML
echo "Extracting image URLs..."
IMAGE_URLS=$(echo "$HTML" | sed -n 's/.*"display_url":"\([^"]*\)".*/\1/p')
# Check if any image URLs were found
if [ -z "$IMAGE_URLS" ]; then
echo "Failed to extract image URLs. Make sure the URL is valid and public."
exit 1
fi
# Create a directory to save images
SAVE_DIR="instagram_images"
mkdir -p "$SAVE_DIR"
# Download each image
echo "Downloading images..."
COUNT=1
for URL in $IMAGE_URLS; do
FILE_NAME="$SAVE_DIR/image_$COUNT.jpg"
curl -s -o "$FILE_NAME" "$URL"
echo "Downloaded: $FILE_NAME"
((COUNT++))
done
echo "All images downloaded to $SAVE_DIR."

View File

@@ -0,0 +1,22 @@
# export_instagram_cookies.py
import browser_cookie3
cj = browser_cookie3.chrome(domain_name='instagram.com')
with open('cookies.txt', 'w') as f:
f.write("# Netscape HTTP Cookie File\n")
f.write("# This file was generated by browser-cookie3\n")
f.write("# https://curl.se/docs/http-cookies.html\n\n")
for cookie in cj:
# domain, flag, path, secure, expiration, name, value
f.write(
f"{cookie.domain}\t"
f"{'TRUE' if cookie.domain.startswith('.') else 'FALSE'}\t"
f"{cookie.path}\t"
f"{'TRUE' if cookie.secure else 'FALSE'}\t"
f"{int(cookie.expires) if cookie.expires else 0}\t"
f"{cookie.name}\t"
f"{cookie.value}\n"
)
print("cookies.txt exported!")
# This script exports Instagram cookies from Chrome to a cookies.txt file.

View File

@@ -0,0 +1,57 @@
#!/bin/bash
# Directory to save downloaded images
OUTPUT_DIR="./downloads"
mkdir -p "$OUTPUT_DIR"
# Instagram Full Image Downloader
# Usage: ./image-instagram-downloader.sh <instagram_image_url>
if [ $# -ne 1 ]; then
echo "Usage: $0 <instagram_image_url>"
exit 1
fi
URL="$1"
# Fetch HTML content of the Instagram post
HTML_DATA=$(curl -s "$URL")
if [ -z "$HTML_DATA" ]; then
echo "Could not fetch HTML data."
exit 2
fi
# Extract JSON data containing the full-resolution image URL
JSON_DATA=$(echo "$HTML_DATA" | grep -oE '<script type="application/ld\+json">[^<]+' | sed 's/<script type="application\/ld+json">//')
if [ -z "$JSON_DATA" ]; then
echo "Could not find JSON data. Falling back to thumbnail."
# Extract thumbnail image URL
ENCODED_IMG_URL=$(echo "$HTML_DATA" | awk -F'<meta property="og:image" content="' '{print $2}' | awk -F'"' '{print $1}')
FULL_IMG_URL=$(echo "$ENCODED_IMG_URL" | sed 's/&amp;/\&/g')
else
# Parse JSON data to extract full-resolution image URL
FULL_IMG_URL=$(echo "$JSON_DATA" | grep -oE '"url":"https:[^"]+' | sed 's/"url":"//')
fi
if [ -z "$FULL_IMG_URL" ]; then
echo "Could not find full image URL."
exit 3
fi
echo "Extracted Full Image URL: $FULL_IMG_URL"
# Download the image
FILENAME=$(basename "$FULL_IMG_URL" | cut -d'?' -f1)
curl -L -A "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36" "$FULL_IMG_URL" -o "$OUTPUT_DIR/$FILENAME"
# Check if the file is a valid image
FILE_TYPE=$(file "$OUTPUT_DIR/$FILENAME" | grep -oE 'image|HTML')
if [[ "$FILE_TYPE" == "HTML" ]]; then
echo "Downloaded file is not an image. Please check the extracted URL."
exit 4
fi
echo "Downloaded: $OUTPUT_DIR/$FILENAME"

33
media/instagram-downloader.sh Executable file
View File

@@ -0,0 +1,33 @@
#!/bin/bash
OUTPUT_DIR="./downloads"
mkdir -p "$OUTPUT_DIR"
if [ -z "$1" ]; then
echo "Usage: $0 <INSTAGRAM_REEL_URL>"
exit 1
fi
URL="$1"
if ! command -v yt-dlp &> /dev/null; then
echo "yt-dlp is required. Install it with: pip install yt-dlp"
exit 2
fi
if ! command -v python3 &> /dev/null; then
echo "Python3 is required. Please install it."
exit 3
fi
# Export cookies.txt using Python script
python3 export_instagram_cookies.py
if [ ! -f cookies.txt ]; then
echo "Failed to export cookies.txt. Aborting."
exit 4
fi
# Use cookies.txt to handle private reels
yt-dlp --cookies cookies.txt "$URL" -o "$OUTPUT_DIR/%(title)s.%(ext)s"
echo "Download completed. Files are saved in $OUTPUT_DIR"
#rm cookies.txt

61
media/spotify-rss.py Normal file
View File

@@ -0,0 +1,61 @@
import spotipy
from spotipy.oauth2 import SpotifyClientCredentials
import requests
import urllib.parse
# --- Configuration ---
# Replace these with your actual Spotify Developer credentials
SPOTIPY_CLIENT_ID = '3756ae92386d45e9971aa03d2f4b1eca'
SPOTIPY_CLIENT_SECRET = '6c8ea8c409d944cc895571f3f49985df'
# The ID from your link: https://open.spotify.com/show/13Gs651PIKKI7zRF0p3PcJ
SHOW_ID = '13Gs651PIKKI7zRF0p3PcJ'
def find_real_rss_feed():
# Step 1: Authenticate with Spotify
auth_manager = SpotifyClientCredentials(
client_id=SPOTIPY_CLIENT_ID,
client_secret=SPOTIPY_CLIENT_SECRET
)
sp = spotipy.Spotify(auth_manager=auth_manager)
# Step 2: Get the Show Name from Spotify
print(f"Fetching details for Spotify Show ID: {SHOW_ID}...")
try:
show = sp.show(SHOW_ID)
show_name = show['name']
publisher = show['publisher']
print(f"Found Show: '{show_name}' by {publisher}")
except Exception as e:
print(f"Error fetching from Spotify: {e}")
return
# Step 3: Search the iTunes API for that Show Name
print("\nSearching public directories (iTunes) for the real RSS feed...")
# We encode the name so it can be safely used in a URL (e.g., spaces become %20)
encoded_name = urllib.parse.quote(show_name)
itunes_api_url = f"https://itunes.apple.com/search?term={encoded_name}&entity=podcast&limit=3"
try:
response = requests.get(itunes_api_url)
data = response.json()
if data['resultCount'] > 0:
# Grab the first result's RSS feed URL
# We check the first few results to ensure the author matches, but usually the first is correct
real_rss_url = data['results'][0].get('feedUrl')
print("\n✅ SUCCESS! Found the public RSS feed:")
print("-" * 50)
print(real_rss_url)
print("-" * 50)
print("You can copy and paste this URL directly into Apple Podcasts, Pocket Casts, or any standard RSS reader. It contains all the real .mp3 files!")
else:
print("\n❌ Could not find this show in public directories.")
print("This usually means the podcast is a 'Spotify Exclusive' and does not have a public RSS feed.")
except Exception as e:
print(f"Error searching iTunes: {e}")
if __name__ == '__main__':
find_real_rss_feed()

33
media/url-video-downloader.sh Executable file
View File

@@ -0,0 +1,33 @@
#!/bin/bash
# Script to download videos from URLs shared on Bluesky
# Requires yt-dlp and ffmpeg to be installed
# Directory to save downloaded videos
OUTPUT_DIR="./downloads"
mkdir -p "$OUTPUT_DIR"
# Check if yt-dlp is installed
if ! command -v yt-dlp &>/dev/null; then
echo "yt-dlp is not installed. Please install it and try again."
exit 1
fi
# Check if the URL is passed as an argument
if [[ -z "$1" ]]; then
echo "No URL provided as an argument. Usage: ./script.sh <video_url>"
exit 1
fi
VIDEO_URL="$1"
# Download the video with a shortened filename (using video ID)
echo "Downloading video from $VIDEO_URL..."
yt-dlp -k -o "$OUTPUT_DIR/%(id)s.%(ext)s" "$VIDEO_URL"
if [[ $? -eq 0 ]]; then
echo "Video downloaded successfully to $OUTPUT_DIR."
else
echo "Failed to download the video. Please check the URL and try again."
exit 1
fi

34
media/video-downloader.sh Executable file
View File

@@ -0,0 +1,34 @@
#!/bin/bash
# Script to download videos from URLs shared on Bluesky
# Requires yt-dlp and ffmpeg to be installed
# Directory to save downloaded videos
OUTPUT_DIR="./downloads"
mkdir -p "$OUTPUT_DIR"
# Check if yt-dlp is installed
if ! command -v yt-dlp &>/dev/null; then
echo "yt-dlp is not installed. Please install it and try again."
exit 1
fi
# Prompt for the video URL
read -p "Enter the video URL: " VIDEO_URL
# Validate the URL
if [[ -z "$VIDEO_URL" ]]; then
echo "No URL entered. Exiting."
exit 1
fi
# Download the video
echo "Downloading video from $VIDEO_URL..."
yt-dlp -o "$OUTPUT_DIR/%(title)s.%(ext)s" "$VIDEO_URL"
if [[ $? -eq 0 ]]; then
echo "Video downloaded successfully to $OUTPUT_DIR."
else
echo "Failed to download the video. Please check the URL and try again."
exit 1
fi

44
media/webm-to-mp4-converter.sh Executable file
View File

@@ -0,0 +1,44 @@
#!/bin/bash
# Script to convert a .webm file to .mp4 using ffmpeg
# Check if ffmpeg is installed
if ! command -v ffmpeg &> /dev/null
then
echo "ffmpeg could not be found. Please install ffmpeg first."
exit 1
fi
# Check if input file is provided
if [ "$#" -lt 1 ]; then
echo "Usage: $0 input_file.webm [output_file.mp4]"
exit 1
fi
# Input file
input_file="$1"
# Check if the input file exists
if [ ! -f "$input_file" ]; then
echo "Input file '$input_file' not found."
exit 1
fi
# Set output file name
if [ "$#" -ge 2 ]; then
output_file="$2"
else
output_file="${input_file%.*}.mp4"
fi
# Convert .webm to .mp4 using ffmpeg
ffmpeg -i "$input_file" -c:v libx264 -preset slow -crf 23 -c:a aac "$output_file"
# Check if conversion was successful
if [ $? -eq 0 ]; then
echo "Conversion successful. Output file: $output_file"
else
echo "Conversion failed."
exit 1
fi

13
media/zip_cbr_files.sh Executable file
View File

@@ -0,0 +1,13 @@
#!/bin/bash
# Loop over a range of episode numbers
for i in {1..27}; do
# Format the episode number with leading zeros if needed
episode_number=$(printf $i)
echo $episode_number
zip capitol_${episode_number}.cbr volume_1_episode_${episode_number}_*.jpg
echo "Created capitol_${episode_number}.cbr"
done
echo "All .cbr files have been created."

31
odilo/biblio_odilo.py Normal file
View File

@@ -0,0 +1,31 @@
import requests
# 1. Definim les capçaleres
headers = {
'Content-Type': 'application/json',
'User-Agent': 'ie',
'Host': 'odiloid.odilo.us'
}
url_api = 'https://odiloid.odilo.us/ClientId'
try:
# 2. Fem la petició
print("⏳ Obtenint dades del servidor...")
response = requests.get(url_api, headers=headers)
response.raise_for_status()
# 3. Convertim a JSON (llista de diccionaris)
bibliolist_response = response.json()
# 4. Filtrem només les URLs
llista_urls = [biblio['url'] for biblio in bibliolist_response if 'url' in biblio]
# 5. Mostrem el resultat per pantalla
print(f"✅ S'han trobat {len(llista_urls)} biblioteques. Aquí tens les URLs:\n")
for url in llista_urls:
print(url)
except requests.exceptions.RequestException as e:
print(f"❌ Hi ha hagut un error amb la petició: {e}")

21
odilo/recollir_epubs.sh Normal file
View File

@@ -0,0 +1,21 @@
#!/bin/bash
# Comprovem que s'han passat exactament 2 paràmetres
if [ "$#" -ne 2 ]; then
echo "❌ Error: Falten paràmetres."
echo "💡 Ús correcte: $0 <carpeta_origen> <carpeta_desti>"
exit 1
fi
# Assignem els paràmetres a les variables
origen="$1"
desti="$2"
# Creem la carpeta destí si no existeix
mkdir -p "$desti"
# Executem la cerca i còpia
echo "Cercant i copiant els arxius .epub..."
find "$origen" -type f -name "*.epub" -exec cp {} "$desti" \;
echo "✅ Procés completat! Tots els epubs estan a $desti"

43
outlook/email_login.py Normal file
View File

@@ -0,0 +1,43 @@
import imaplib
# ==========================================
# 1. CONFIGURATION
# ==========================================
EMAIL_ADDRESS = "marti@agile611.com"
# ⚠️ Insert your generated App Password here (no spaces)
PASSWORD = "fcztyfdwpfrgqjgl" # Replace with your actual App Password
IMAP_SERVER = "outlook.office365.com"
IMAP_PORT = 993
def test_app_password_login():
print(f"🔌 Connecting to {IMAP_SERVER} on port {IMAP_PORT}...")
try:
# Connect to the server using SSL/TLS encryption
mail = imaplib.IMAP4_SSL(IMAP_SERVER, IMAP_PORT)
# Attempt plaintext login using the App Password
print("🔐 Attempting login with App Password...")
mail.login(EMAIL_ADDRESS, PASSWORD)
print("✅ Success! The App Password worked perfectly.")
# Select the inbox to verify we can read data
status, messages = mail.select("INBOX")
if status == "OK":
message_count = messages[0].decode('utf-8')
print(f"📥 INBOX selected successfully. Total messages: {message_count}")
# Safely log out
mail.logout()
print("👋 Logged out successfully.")
except imaplib.IMAP4.error as e:
print("\n❌ Login failed!")
print(f"Error details: {e}")
print("\n⚠️ Note: If you see 'BasicAuthBlocked' again, your organization's global Azure settings have completely disabled basic authentication, overriding the App Password.")
if __name__ == "__main__":
test_app_password_login()

177
outlook/get_token.py Normal file
View File

@@ -0,0 +1,177 @@
import msal
import imaplib
import os
import csv
import re
import time
import socket
# 🚨 Force Python to drop the connection if Microsoft stops responding (tarpitting)
socket.setdefaulttimeout(30)
# ==========================================
# 1. CONFIGURATION
# ==========================================
CLIENT_ID = "05332268-8149-449f-a1f8-1efadd17166f"
EMAIL_ADDRESS = "guillem@agile611.com"
AUTHORITY = "https://login.microsoftonline.com/884a3c53-8a5a-4d79-b0e0-a62ab5a794a1"
# MSAL automatically requests offline_access, so we only list the IMAP scope here
SCOPES = ["https://outlook.office.com/IMAP.AccessAsUser.All"]
# ==========================================
# 2. HELPER FUNCTION: GENERATE CSV
# ==========================================
def generate_user_csv(email_address, first_name="Guillem", last_name="Hernandez Sola"):
username = email_address.split("@")[0]
headers = [
"originUsername", "targetUsername", "password", "pop3enabled",
"pop3password", "aliases", "forwards", "filters",
"forename", "surname", "mailboxStatus"
]
row_data = {
"originUsername": username, "targetUsername": username,
"password": "TempWebmailPassword123!", "pop3enabled": "true",
"pop3password": "", "aliases": "", "forwards": "", "filters": "",
"forename": first_name, "surname": last_name, "mailboxStatus": "premium"
}
csv_filename = f"{username}_import.csv"
with open(csv_filename, mode="w", newline="", encoding="utf-8") as file:
writer = csv.DictWriter(file, fieldnames=headers, delimiter=";")
writer.writeheader()
writer.writerow(row_data)
print(f"📝 Migration CSV generated: {csv_filename}")
# ==========================================
# 3. TOKEN & CONNECTION MANAGERS
# ==========================================
def get_valid_token(app):
"""Checks the cache for a valid token, refreshes silently if needed, or prompts user."""
accounts = app.get_accounts()
if accounts:
result = app.acquire_token_silent(SCOPES, account=accounts[0])
if result and "access_token" in result:
return result["access_token"]
flow = app.initiate_device_flow(scopes=SCOPES)
if "user_code" not in flow:
raise ValueError("Failed to create device flow. Check your Client ID and Azure settings.")
print("\n🚨 ACTION REQUIRED 🚨")
print(flow["message"])
print("⏳ Waiting for browser authentication...")
result = app.acquire_token_by_device_flow(flow)
if "access_token" not in result:
raise Exception(f"Failed to get token: {result.get('error_description')}")
return result["access_token"]
def connect_to_imap(email, token):
"""Creates a fresh, authenticated connection to the IMAP server."""
auth_string = f"user={email}\x01auth=Bearer {token}\x01\x01"
mail = imaplib.IMAP4_SSL("outlook.office365.com", 993)
mail.authenticate("XOAUTH2", lambda x: auth_string.encode("utf-8"))
return mail
# ==========================================
# 4. MAIN EXECUTION
# ==========================================
def main():
print("🔄 Initializing Microsoft Authentication...")
app = msal.PublicClientApplication(CLIENT_ID, authority=AUTHORITY)
try:
access_token = get_valid_token(app)
print("\n✅ Access Token Acquired Successfully!")
print("\n🔌 Connecting to Outlook IMAP server...")
mail = connect_to_imap(EMAIL_ADDRESS, access_token)
print("✅ Successfully logged into IMAP via OAuth2!")
except Exception as e:
print(f"\n❌ Authentication failed: {e}")
return
try:
username = EMAIL_ADDRESS.split("@")[0]
base_download_dir = f"downloaded_emails_{username}"
os.makedirs(base_download_dir, exist_ok=True)
status, folders = mail.list()
if status == "OK":
print(f"📂 Found {len(folders)} folders. Starting full account backup...\n")
for folder_data in folders:
folder_string = folder_data.decode('utf-8')
if "\\Noselect" in folder_string:
continue
match = re.search(r'\"([^\"]+)\"$', folder_string)
folder_name = match.group(1) if match else folder_string.split()[-1].strip('"')
print(f"📁 Scanning folder: {folder_name}")
status, _ = mail.select(f'"{folder_name}"', readonly=True)
if status != "OK":
print(f" ⚠️ Could not open {folder_name}. Skipping.")
continue
status, data = mail.search(None, "ALL")
email_ids = data[0].split()
if not email_ids:
print(" ↳ Folder is empty.")
continue
print(f" ↳ Found {len(email_ids)} emails. Downloading...")
safe_folder_name = "".join([c for c in folder_name if c.isalnum() or c in (' ', '-', '_')]).strip()
folder_dir = os.path.join(base_download_dir, safe_folder_name)
os.makedirs(folder_dir, exist_ok=True)
# Download loop with Reconnect & Timeout Logic
for e_id in email_ids:
file_path = os.path.join(folder_dir, f"email_{e_id.decode('utf-8')}.eml")
# 🚀 SKIP EXISTING: Don't re-download emails we already have!
if os.path.exists(file_path):
continue
success = False
while not success:
try:
status, msg_data = mail.fetch(e_id, "(RFC822)")
for response_part in msg_data:
if isinstance(response_part, tuple):
raw_email = response_part[1]
with open(file_path, "wb") as f:
f.write(raw_email)
success = True
# Catch token expiration, forced disconnects, AND silent timeouts
except (imaplib.IMAP4.abort, imaplib.IMAP4.error, ConnectionResetError, socket.timeout, TimeoutError) as e:
print(f"\n ⚠️ Connection lost or timed out. Refreshing token and reconnecting...")
try:
access_token = get_valid_token(app)
mail = connect_to_imap(EMAIL_ADDRESS, access_token)
mail.select(f'"{folder_name}"', readonly=True)
print(" ✅ Reconnected! Resuming download...")
except Exception as reconnect_error:
print(f" ❌ Reconnection failed: {reconnect_error}. Retrying in 5 seconds...")
time.sleep(5)
print(f"\n🎉 All folders successfully downloaded to '{base_download_dir}'!")
mail.logout()
print("👋 Logged out successfully.\n")
print("⚙️ Generating configuration files...")
generate_user_csv(EMAIL_ADDRESS)
print("🎉 Migration prep complete!")
except Exception as e:
print(f"\n❌ A critical error occurred: {e}")
if __name__ == "__main__":
main()

142
outlook/marti.py Normal file
View File

@@ -0,0 +1,142 @@
import msal
import imaplib
import os
import csv
import re # Added for parsing folder names safely
# ==========================================
# 1. CONFIGURATION
# ==========================================
CLIENT_ID = "05332268-8149-449f-a1f8-1efadd17166f"
EMAIL_ADDRESS = "marti@agile611.com"
AUTHORITY = "https://login.microsoftonline.com/884a3c53-8a5a-4d79-b0e0-a62ab5a794a1"
SCOPES = ["https://outlook.office.com/IMAP.AccessAsUser.All"]
# ==========================================
# 2. HELPER FUNCTION: GENERATE CSV
# ==========================================
def generate_user_csv(email_address, first_name="Marti", last_name="Montfort Ruiz"):
username = email_address.split("@")[0]
headers = [
"originUsername", "targetUsername", "password", "pop3enabled",
"pop3password", "aliases", "forwards", "filters",
"forename", "surname", "mailboxStatus"
]
row_data = {
"originUsername": username, "targetUsername": username,
"password": "TempWebmailPassword123!", "pop3enabled": "true",
"pop3password": "", "aliases": "", "forwards": "", "filters": "",
"forename": first_name, "surname": last_name, "mailboxStatus": "premium"
}
csv_filename = f"{username}_import.csv"
with open(csv_filename, mode="w", newline="", encoding="utf-8") as file:
writer = csv.DictWriter(file, fieldnames=headers, delimiter=";")
writer.writeheader()
writer.writerow(row_data)
print(f"📝 Migration CSV generated: {csv_filename}")
def main():
# ==========================================
# 3. GETTING THE OAUTH2 TOKEN
# ==========================================
print("🔄 Initializing Microsoft Authentication...")
app = msal.PublicClientApplication(CLIENT_ID, authority=AUTHORITY)
flow = app.initiate_device_flow(scopes=SCOPES)
if "user_code" not in flow:
raise ValueError("Failed to create device flow. Check your Client ID and Azure settings.")
print("\n🚨 ACTION REQUIRED 🚨")
print(flow["message"])
print("\n⏳ Waiting for browser authentication...")
result = app.acquire_token_by_device_flow(flow)
if "access_token" not in result:
print("\n❌ Failed to get token:", result.get("error_description"))
return
access_token = result["access_token"]
print("\n✅ Access Token Acquired Successfully!")
# ==========================================
# 4. CONNECTING TO IMAP & DOWNLOADING ALL FOLDERS
# ==========================================
print("\n🔌 Connecting to Outlook IMAP server...")
auth_string = f"user={EMAIL_ADDRESS}\x01auth=Bearer {access_token}\x01\x01"
try:
mail = imaplib.IMAP4_SSL("outlook.office365.com", 993)
mail.authenticate("XOAUTH2", lambda x: auth_string.encode("utf-8"))
print("✅ Successfully logged into IMAP via OAuth2!")
# Base directory for this user
username = EMAIL_ADDRESS.split("@")[0]
base_download_dir = f"downloaded_emails_{username}"
os.makedirs(base_download_dir, exist_ok=True)
# Fetch all folders in the mailbox
status, folders = mail.list()
if status == "OK":
print(f"📂 Found {len(folders)} folders. Starting full account backup...\n")
for folder_data in folders:
folder_string = folder_data.decode('utf-8')
# Skip unselectable folders (like root directory markers)
if "\\Noselect" in folder_string:
continue
# Safely extract the folder name (handles spaces and quotes)
match = re.search(r'\"([^\"]+)\"$', folder_string)
folder_name = match.group(1) if match else folder_string.split()[-1].strip('"')
print(f"📁 Scanning folder: {folder_name}")
# Select the folder (readonly to prevent accidental modifications)
status, _ = mail.select(f'"{folder_name}"', readonly=True)
if status != "OK":
print(f" ⚠️ Could not open {folder_name}. Skipping.")
continue
status, data = mail.search(None, "ALL")
email_ids = data[0].split()
if not email_ids:
print(" ↳ Folder is empty.")
continue
print(f" ↳ Found {len(email_ids)} emails. Downloading...")
# Create a safe subfolder name for the OS
safe_folder_name = "".join([c for c in folder_name if c.isalnum() or c in (' ', '-', '_')]).strip()
folder_dir = os.path.join(base_download_dir, safe_folder_name)
os.makedirs(folder_dir, exist_ok=True)
# Download each email into its respective folder
for e_id in email_ids:
status, msg_data = mail.fetch(e_id, "(RFC822)")
for response_part in msg_data:
if isinstance(response_part, tuple):
raw_email = response_part[1]
file_path = os.path.join(folder_dir, f"email_{e_id.decode('utf-8')}.eml")
with open(file_path, "wb") as f:
f.write(raw_email)
print(f"\n🎉 All folders successfully downloaded to '{base_download_dir}'!")
mail.logout()
print("👋 Logged out successfully.\n")
# ==========================================
# 5. GENERATE THE CSV
# ==========================================
print("⚙️ Generating configuration files...")
generate_user_csv(EMAIL_ADDRESS)
print("🎉 Migration prep complete!")
except imaplib.IMAP4.error as e:
print(f"\n❌ IMAP Authentication failed: {e}")
if __name__ == "__main__":
main()

33
pdfs/split_pdf_pages.sh Normal file
View File

@@ -0,0 +1,33 @@
#!/bin/bash
# Check if the input file is provided
if [ "$#" -ne 1 ]; then
echo "Usage: $0 input.pdf"
exit 1
fi
# Input PDF file
input_pdf="$1"
# Check if the file exists
if [ ! -f "$input_pdf" ]; then
echo "Error: File '$input_pdf' not found."
exit 1
fi
# Create an output directory
output_dir="${input_pdf%.pdf}_pages"
mkdir -p "$output_dir"
# Get the number of pages in the PDF
total_pages=$(pdftk "$input_pdf" dump_data | grep NumberOfPages | awk '{print $2}')
# Split each page into a separate PDF
echo "Splitting '$input_pdf' into separate pages..."
for ((i=1; i<=total_pages; i++)); do
output_file="$output_dir/page_$i.pdf"
pdftk "$input_pdf" cat "$i" output "$output_file"
echo "Created $output_file"
done
echo "All pages have been split and saved in '$output_dir'."

View File

@@ -0,0 +1,7 @@
#!/bin/bash
./remove_parentheses_files_folders.sh $1
./replace_hyphen_with_underscores.sh $1
./replace_spaces_with_underscores.sh $1
./replace_under_hyphen_under_with_underscores.sh $1
./replace_triple_with_underscores.sh $1

View File

@@ -0,0 +1,23 @@
#!/bin/bash
# Function to rename files and folders recursively
rename_files_and_folders() {
find "$1" -depth | while read -r item; do
# Remove parentheses from names
new_item=$(echo "$item" | sed 's/[()]//g')
# Rename item if the new name is different
if [ "$item" != "$new_item" ]; then
mv "$item" "$new_item"
echo "Renamed: $item -> $new_item"
fi
done
}
# Starting directory (default is current directory if no argument is provided)
start_dir="${1:-.}"
echo "Starting from directory: $start_dir"
rename_files_and_folders "$start_dir"
echo "Done!"

View File

@@ -0,0 +1,23 @@
#!/bin/bash
# Function to rename folders recursively
rename_folders() {
find "$1" -depth -type d | while read -r dir; do
# Remove parentheses from folder names
new_dir=$(echo "$dir" | sed 's/[()]//g')
# Rename folder if the new name is different
if [ "$dir" != "$new_dir" ]; then
mv "$dir" "$new_dir"
echo "Renamed: $dir -> $new_dir"
fi
done
}
# Starting directory (default is current directory if no argument is provided)
start_dir="${1:-.}"
echo "Starting from directory: $start_dir"
rename_folders "$start_dir"
echo "Done!"

View File

@@ -0,0 +1,23 @@
#!/bin/bash
# Function to rename files and folders recursively
replace_spaces_with_underscores() {
find "$1" -depth | while read -r item; do
# Replace spaces with underscores in names
new_item=$(echo "$item" | sed 's/-/_/g')
# Rename item if the new name is different
if [ "$item" != "$new_item" ]; then
mv "$item" "$new_item"
echo "Renamed: $item -> $new_item"
fi
done
}
# Starting directory (default is current directory if no argument is provided)
start_dir="${1:-.}"
echo "Starting from directory: $start_dir"
replace_spaces_with_underscores "$start_dir"
echo "Done!"

View File

@@ -0,0 +1,23 @@
#!/bin/bash
# Function to rename files and folders recursively
replace_spaces_with_underscores() {
find "$1" -depth | while read -r item; do
# Replace spaces with underscores in names
new_item=$(echo "$item" | sed 's/ /_/g')
# Rename item if the new name is different
if [ "$item" != "$new_item" ]; then
mv "$item" "$new_item"
echo "Renamed: $item -> $new_item"
fi
done
}
# Starting directory (default is current directory if no argument is provided)
start_dir="${1:-.}"
echo "Starting from directory: $start_dir"
replace_spaces_with_underscores "$start_dir"
echo "Done!"

View File

@@ -0,0 +1,23 @@
#!/bin/bash
# Function to rename files and folders recursively
replace_spaces_with_underscores() {
find "$1" -depth | while read -r item; do
# Replace spaces with underscores in names
new_item=$(echo "$item" | sed 's/___/_/g')
# Rename item if the new name is different
if [ "$item" != "$new_item" ]; then
mv "$item" "$new_item"
echo "Renamed: $item -> $new_item"
fi
done
}
# Starting directory (default is current directory if no argument is provided)
start_dir="${1:-.}"
echo "Starting from directory: $start_dir"
replace_spaces_with_underscores "$start_dir"
echo "Done!"

View File

@@ -0,0 +1,23 @@
#!/bin/bash
# Function to rename files and folders recursively
replace_spaces_with_underscores() {
find "$1" -depth | while read -r item; do
# Replace spaces with underscores in names
new_item=$(echo "$item" | sed 's/_-_/_/g')
# Rename item if the new name is different
if [ "$item" != "$new_item" ]; then
mv "$item" "$new_item"
echo "Renamed: $item -> $new_item"
fi
done
}
# Starting directory (default is current directory if no argument is provided)
start_dir="${1:-.}"
echo "Starting from directory: $start_dir"
replace_spaces_with_underscores "$start_dir"
echo "Done!"

View File

@@ -0,0 +1,29 @@
import requests
import csv
posts = []
page = 1
while True:
r = requests.get(f"https://www.agile611.com/wp-json/wp/v2/posts?per_page=100&page={page}")
r.encoding = 'utf-8'
data = r.json()
if not isinstance(data, list) or not data:
break
for post in data:
if isinstance(post, dict) and 'title' in post and isinstance(post['title'], dict):
title = post['title']['rendered']
else:
title = str(post.get('title', ''))
title = title.replace(',', '') # Remove commas from the title
date = post.get('date', '')
link = post.get('link', '')
posts.append([title, date, link])
page += 1
with open('agile611_posts.csv', 'w', newline='', encoding='utf-8-sig') as f:
writer = csv.writer(f)
writer.writerow(['Title', 'Date', 'URL'])
writer.writerows(posts)
print(f"Saved {len(posts)} posts to agile611_posts.csv")