diff --git a/jenkins/ePrss b/jenkins/ePrss index 3564884..665f1e7 100644 --- a/jenkins/ePrss +++ b/jenkins/ePrss @@ -2,94 +2,146 @@ pipeline { agent any options { - timeout(time: 15, unit: 'MINUTES') + timeout(time: 30, unit: 'MINUTES') timestamps() - buildDiscarder(logRotator(numToKeepStr: '10')) + buildDiscarder(logRotator(numToKeepStr: '20')) disableConcurrentBuilds() } - + triggers { - cron('H */6 * * *') + cron(''' +H 22,10,16 * * * +0 4 * * * +''') + } + + environment { + VENV_DIR = 'venv' + MAX_PARALLEL_FEEDS = '4' + JITTER_MAX_SECONDS = '12' + PYTHONUNBUFFERED = '1' + PIP_CACHE_DIR = "${WORKSPACE}/.pip-cache" } stages { - stage('Checkout & Setup') { + stage('Checkout') { steps { checkout scm + } + } + + stage('Setup Python') { + steps { sh ''' - python3 -m venv venv - . venv/bin/activate - pip install atproto fastfeedparser beautifulsoup4 httpx arrow charset-normalizer Pillow + set -euxo pipefail + python3 -m venv "${VENV_DIR}" + . "${VENV_DIR}/bin/activate" + python -m pip install --upgrade pip wheel setuptools + pip install --cache-dir "${PIP_CACHE_DIR}" \ + atproto fastfeedparser beautifulsoup4 httpx arrow charset-normalizer Pillow + python --version + pip --version ''' } } - stage('Process All RSS Feeds') { + stage('Process All RSS Feeds (Batched Parallel)') { steps { - // 🔐 Fetch the single set of EP credentials ONCE for all 26 feeds withCredentials([ string(credentialsId: 'BSKY_EP_HANDLE', variable: 'BSKY_EP_HANDLE'), string(credentialsId: 'BSKY_EP_USERNAME', variable: 'BSKY_EP_USERNAME'), string(credentialsId: 'BSKY_EP_APP_PASSWORD', variable: 'BSKY_EP_APP_PASSWORD') ]) { script { - // 📍 The map only contains the hardcoded URLs def feeds = [ - // Original 19 Feeds - 'badalona': 'https://www.elperiodico.cat/ca/rss/badalona/rss.xml', - 'barcelona': 'https://www.elperiodico.cat/ca/rss/barcelona/rss.xml', - 'ciencia': 'https://www.elperiodico.cat/ca/rss/ciencia/rss.xml', - 'cornella': 'https://www.elperiodico.cat/ca/rss/cornella/rss.xml', - 'economia': 'https://www.elperiodico.cat/ca/rss/economia/rss.xml', - 'educacio': 'https://www.elperiodico.cat/ca/rss/educacio/rss.xml', - 'esports': 'https://www.elperiodico.cat/ca/rss/esports/rss.xml', - 'extra': 'https://www.elperiodico.cat/ca/rss/extra/rss.xml', - 'gent': 'https://www.elperiodico.cat/ca/rss/gent/rss.xml', - 'hospitalet': 'https://www.elperiodico.cat/ca/rss/hospitalet/rss.xml', - 'internacional': 'https://www.elperiodico.cat/ca/rss/internacional/rss.xml', - 'medi-ambient': 'https://www.elperiodico.cat/ca/rss/medi-ambient/rss.xml', - 'motor': 'https://www.elperiodico.cat/ca/rss/motor/rss.xml', - 'oci-i-cultura': 'https://www.elperiodico.cat/ca/rss/oci-i-cultura/rss.xml', - 'opinio': 'https://www.elperiodico.cat/ca/rss/opinio/rss.xml', - 'politica': 'https://www.elperiodico.cat/ca/rss/politica/rss.xml', - 'portada': 'https://www.elperiodico.cat/ca/rss/portada/rss.xml', - 'que-fer': 'https://www.elperiodico.cat/ca/rss/que-fer/rss.xml', - 'sabadell': 'https://www.elperiodico.cat/ca/rss/sabadell/rss.xml', - - // 7 New Feeds - 'sanitat': 'https://www.elperiodico.cat/ca/rss/sanitat/rss.xml', - 'santa-coloma': 'https://www.elperiodico.cat/ca/rss/santa-coloma/rss.xml', - 'societat': 'https://www.elperiodico.cat/ca/rss/societat/rss.xml', - 'tecnologia': 'https://www.elperiodico.cat/ca/rss/tecnologia/rss.xml', - 'tele': 'https://www.elperiodico.cat/ca/rss/tele/rss.xml', - 'temps': 'https://www.elperiodico.cat/ca/rss/temps/rss.xml', - 'terrassa': 'https://www.elperiodico.cat/ca/rss/terrassa/rss.xml' + [name: 'badalona', url: 'https://www.elperiodico.cat/ca/rss/badalona/rss.xml'], + [name: 'barcelona', url: 'https://www.elperiodico.cat/ca/rss/barcelona/rss.xml'], + [name: 'ciencia', url: 'https://www.elperiodico.cat/ca/rss/ciencia/rss.xml'], + [name: 'cornella', url: 'https://www.elperiodico.cat/ca/rss/cornella/rss.xml'], + [name: 'economia', url: 'https://www.elperiodico.cat/ca/rss/economia/rss.xml'], + [name: 'educacio', url: 'https://www.elperiodico.cat/ca/rss/educacio/rss.xml'], + [name: 'esports', url: 'https://www.elperiodico.cat/ca/rss/esports/rss.xml'], + [name: 'extra', url: 'https://www.elperiodico.cat/ca/rss/extra/rss.xml'], + [name: 'gent', url: 'https://www.elperiodico.cat/ca/rss/gent/rss.xml'], + [name: 'hospitalet', url: 'https://www.elperiodico.cat/ca/rss/hospitalet/rss.xml'], + [name: 'internacional', url: 'https://www.elperiodico.cat/ca/rss/internacional/rss.xml'], + [name: 'medi-ambient', url: 'https://www.elperiodico.cat/ca/rss/medi-ambient/rss.xml'], + [name: 'motor', url: 'https://www.elperiodico.cat/ca/rss/motor/rss.xml'], + [name: 'oci-i-cultura', url: 'https://www.elperiodico.cat/ca/rss/oci-i-cultura/rss.xml'], + [name: 'opinio', url: 'https://www.elperiodico.cat/ca/rss/opinio/rss.xml'], + [name: 'politica', url: 'https://www.elperiodico.cat/ca/rss/politica/rss.xml'], + [name: 'portada', url: 'https://www.elperiodico.cat/ca/rss/portada/rss.xml'], + [name: 'que-fer', url: 'https://www.elperiodico.cat/ca/rss/que-fer/rss.xml'], + [name: 'sabadell', url: 'https://www.elperiodico.cat/ca/rss/sabadell/rss.xml'], + [name: 'sanitat', url: 'https://www.elperiodico.cat/ca/rss/sanitat/rss.xml'], + [name: 'santa-coloma', url: 'https://www.elperiodico.cat/ca/rss/santa-coloma/rss.xml'], + [name: 'societat', url: 'https://www.elperiodico.cat/ca/rss/societat/rss.xml'], + [name: 'tecnologia', url: 'https://www.elperiodico.cat/ca/rss/tecnologia/rss.xml'], + [name: 'tele', url: 'https://www.elperiodico.cat/ca/rss/tele/rss.xml'], + [name: 'temps', url: 'https://www.elperiodico.cat/ca/rss/temps/rss.xml'], + [name: 'terrassa', url: 'https://www.elperiodico.cat/ca/rss/terrassa/rss.xml'] ] - def parallelTasks = [:] + int batchSize = env.MAX_PARALLEL_FEEDS as int + int jitterMax = env.JITTER_MAX_SECONDS as int + def batches = feeds.collate(batchSize) - feeds.each { feedName, feedUrl -> - parallelTasks[feedName] = { - stage(feedName.capitalize()) { - sh """ - . venv/bin/activate - - # The hardcoded URL and updated EP credentials are used here - python3 rss2bsky.py \\ - "${feedUrl}" \\ - "\$BSKY_EP_HANDLE" \\ - "\$BSKY_EP_USERNAME" \\ - "\$BSKY_EP_APP_PASSWORD" - """ + echo "Total feeds: ${feeds.size()}, batch size: ${batchSize}, batches: ${batches.size()}" + + int batchNum = 0 + for (def batch : batches) { + batchNum++ + echo "Starting batch ${batchNum}/${batches.size()} with ${batch.size()} feeds" + + def parallelTasks = [:] + + batch.each { feed -> + def feedName = feed.name + def feedUrl = feed.url + + parallelTasks[feedName] = { + stage("Feed: ${feedName}") { + catchError(buildResult: 'UNSTABLE', stageResult: 'FAILURE') { + sh """ + set -euxo pipefail + . "${VENV_DIR}/bin/activate" + + JITTER=\$((RANDOM % ${jitterMax})) + echo "[${feedName}] sleeping \$JITTER s jitter" + sleep \$JITTER + + python3 rss2bsky.py \\ + "${feedUrl}" \\ + "\$BSKY_EP_HANDLE" \\ + "\$BSKY_EP_USERNAME" \\ + "\$BSKY_EP_APP_PASSWORD" + """ + } + } } } - } - // Execute all 26 feeds simultaneously - parallel parallelTasks + parallel parallelTasks + echo "Finished batch ${batchNum}/${batches.size()}" + } } } } } } -} + + post { + always { + archiveArtifacts artifacts: '*.json, **/*.log', allowEmptyArchive: true + } + unstable { + echo 'Build unstable: one or more feeds failed, but pipeline completed.' + } + failure { + echo 'Build failed.' + } + success { + echo 'Build succeeded.' + } + } +} \ No newline at end of file