Added refactor on RSS
This commit is contained in:
162
jenkins/ePrss
162
jenkins/ePrss
@@ -2,94 +2,146 @@ pipeline {
|
|||||||
agent any
|
agent any
|
||||||
|
|
||||||
options {
|
options {
|
||||||
timeout(time: 15, unit: 'MINUTES')
|
timeout(time: 30, unit: 'MINUTES')
|
||||||
timestamps()
|
timestamps()
|
||||||
buildDiscarder(logRotator(numToKeepStr: '10'))
|
buildDiscarder(logRotator(numToKeepStr: '20'))
|
||||||
disableConcurrentBuilds()
|
disableConcurrentBuilds()
|
||||||
}
|
}
|
||||||
|
|
||||||
triggers {
|
triggers {
|
||||||
cron('H */6 * * *')
|
cron('''
|
||||||
|
H 22,10,16 * * *
|
||||||
|
0 4 * * *
|
||||||
|
''')
|
||||||
|
}
|
||||||
|
|
||||||
|
environment {
|
||||||
|
VENV_DIR = 'venv'
|
||||||
|
MAX_PARALLEL_FEEDS = '4'
|
||||||
|
JITTER_MAX_SECONDS = '12'
|
||||||
|
PYTHONUNBUFFERED = '1'
|
||||||
|
PIP_CACHE_DIR = "${WORKSPACE}/.pip-cache"
|
||||||
}
|
}
|
||||||
|
|
||||||
stages {
|
stages {
|
||||||
stage('Checkout & Setup') {
|
stage('Checkout') {
|
||||||
steps {
|
steps {
|
||||||
checkout scm
|
checkout scm
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
stage('Setup Python') {
|
||||||
|
steps {
|
||||||
sh '''
|
sh '''
|
||||||
python3 -m venv venv
|
set -euxo pipefail
|
||||||
. venv/bin/activate
|
python3 -m venv "${VENV_DIR}"
|
||||||
pip install atproto fastfeedparser beautifulsoup4 httpx arrow charset-normalizer Pillow
|
. "${VENV_DIR}/bin/activate"
|
||||||
|
python -m pip install --upgrade pip wheel setuptools
|
||||||
|
pip install --cache-dir "${PIP_CACHE_DIR}" \
|
||||||
|
atproto fastfeedparser beautifulsoup4 httpx arrow charset-normalizer Pillow
|
||||||
|
python --version
|
||||||
|
pip --version
|
||||||
'''
|
'''
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
stage('Process All RSS Feeds') {
|
stage('Process All RSS Feeds (Batched Parallel)') {
|
||||||
steps {
|
steps {
|
||||||
// 🔐 Fetch the single set of EP credentials ONCE for all 26 feeds
|
|
||||||
withCredentials([
|
withCredentials([
|
||||||
string(credentialsId: 'BSKY_EP_HANDLE', variable: 'BSKY_EP_HANDLE'),
|
string(credentialsId: 'BSKY_EP_HANDLE', variable: 'BSKY_EP_HANDLE'),
|
||||||
string(credentialsId: 'BSKY_EP_USERNAME', variable: 'BSKY_EP_USERNAME'),
|
string(credentialsId: 'BSKY_EP_USERNAME', variable: 'BSKY_EP_USERNAME'),
|
||||||
string(credentialsId: 'BSKY_EP_APP_PASSWORD', variable: 'BSKY_EP_APP_PASSWORD')
|
string(credentialsId: 'BSKY_EP_APP_PASSWORD', variable: 'BSKY_EP_APP_PASSWORD')
|
||||||
]) {
|
]) {
|
||||||
script {
|
script {
|
||||||
// 📍 The map only contains the hardcoded URLs
|
|
||||||
def feeds = [
|
def feeds = [
|
||||||
// Original 19 Feeds
|
[name: 'badalona', url: 'https://www.elperiodico.cat/ca/rss/badalona/rss.xml'],
|
||||||
'badalona': 'https://www.elperiodico.cat/ca/rss/badalona/rss.xml',
|
[name: 'barcelona', url: 'https://www.elperiodico.cat/ca/rss/barcelona/rss.xml'],
|
||||||
'barcelona': 'https://www.elperiodico.cat/ca/rss/barcelona/rss.xml',
|
[name: 'ciencia', url: 'https://www.elperiodico.cat/ca/rss/ciencia/rss.xml'],
|
||||||
'ciencia': 'https://www.elperiodico.cat/ca/rss/ciencia/rss.xml',
|
[name: 'cornella', url: 'https://www.elperiodico.cat/ca/rss/cornella/rss.xml'],
|
||||||
'cornella': 'https://www.elperiodico.cat/ca/rss/cornella/rss.xml',
|
[name: 'economia', url: 'https://www.elperiodico.cat/ca/rss/economia/rss.xml'],
|
||||||
'economia': 'https://www.elperiodico.cat/ca/rss/economia/rss.xml',
|
[name: 'educacio', url: 'https://www.elperiodico.cat/ca/rss/educacio/rss.xml'],
|
||||||
'educacio': 'https://www.elperiodico.cat/ca/rss/educacio/rss.xml',
|
[name: 'esports', url: 'https://www.elperiodico.cat/ca/rss/esports/rss.xml'],
|
||||||
'esports': 'https://www.elperiodico.cat/ca/rss/esports/rss.xml',
|
[name: 'extra', url: 'https://www.elperiodico.cat/ca/rss/extra/rss.xml'],
|
||||||
'extra': 'https://www.elperiodico.cat/ca/rss/extra/rss.xml',
|
[name: 'gent', url: 'https://www.elperiodico.cat/ca/rss/gent/rss.xml'],
|
||||||
'gent': 'https://www.elperiodico.cat/ca/rss/gent/rss.xml',
|
[name: 'hospitalet', url: 'https://www.elperiodico.cat/ca/rss/hospitalet/rss.xml'],
|
||||||
'hospitalet': 'https://www.elperiodico.cat/ca/rss/hospitalet/rss.xml',
|
[name: 'internacional', url: 'https://www.elperiodico.cat/ca/rss/internacional/rss.xml'],
|
||||||
'internacional': 'https://www.elperiodico.cat/ca/rss/internacional/rss.xml',
|
[name: 'medi-ambient', url: 'https://www.elperiodico.cat/ca/rss/medi-ambient/rss.xml'],
|
||||||
'medi-ambient': 'https://www.elperiodico.cat/ca/rss/medi-ambient/rss.xml',
|
[name: 'motor', url: 'https://www.elperiodico.cat/ca/rss/motor/rss.xml'],
|
||||||
'motor': 'https://www.elperiodico.cat/ca/rss/motor/rss.xml',
|
[name: 'oci-i-cultura', url: 'https://www.elperiodico.cat/ca/rss/oci-i-cultura/rss.xml'],
|
||||||
'oci-i-cultura': 'https://www.elperiodico.cat/ca/rss/oci-i-cultura/rss.xml',
|
[name: 'opinio', url: 'https://www.elperiodico.cat/ca/rss/opinio/rss.xml'],
|
||||||
'opinio': 'https://www.elperiodico.cat/ca/rss/opinio/rss.xml',
|
[name: 'politica', url: 'https://www.elperiodico.cat/ca/rss/politica/rss.xml'],
|
||||||
'politica': 'https://www.elperiodico.cat/ca/rss/politica/rss.xml',
|
[name: 'portada', url: 'https://www.elperiodico.cat/ca/rss/portada/rss.xml'],
|
||||||
'portada': 'https://www.elperiodico.cat/ca/rss/portada/rss.xml',
|
[name: 'que-fer', url: 'https://www.elperiodico.cat/ca/rss/que-fer/rss.xml'],
|
||||||
'que-fer': 'https://www.elperiodico.cat/ca/rss/que-fer/rss.xml',
|
[name: 'sabadell', url: 'https://www.elperiodico.cat/ca/rss/sabadell/rss.xml'],
|
||||||
'sabadell': 'https://www.elperiodico.cat/ca/rss/sabadell/rss.xml',
|
[name: 'sanitat', url: 'https://www.elperiodico.cat/ca/rss/sanitat/rss.xml'],
|
||||||
|
[name: 'santa-coloma', url: 'https://www.elperiodico.cat/ca/rss/santa-coloma/rss.xml'],
|
||||||
// 7 New Feeds
|
[name: 'societat', url: 'https://www.elperiodico.cat/ca/rss/societat/rss.xml'],
|
||||||
'sanitat': 'https://www.elperiodico.cat/ca/rss/sanitat/rss.xml',
|
[name: 'tecnologia', url: 'https://www.elperiodico.cat/ca/rss/tecnologia/rss.xml'],
|
||||||
'santa-coloma': 'https://www.elperiodico.cat/ca/rss/santa-coloma/rss.xml',
|
[name: 'tele', url: 'https://www.elperiodico.cat/ca/rss/tele/rss.xml'],
|
||||||
'societat': 'https://www.elperiodico.cat/ca/rss/societat/rss.xml',
|
[name: 'temps', url: 'https://www.elperiodico.cat/ca/rss/temps/rss.xml'],
|
||||||
'tecnologia': 'https://www.elperiodico.cat/ca/rss/tecnologia/rss.xml',
|
[name: 'terrassa', url: 'https://www.elperiodico.cat/ca/rss/terrassa/rss.xml']
|
||||||
'tele': 'https://www.elperiodico.cat/ca/rss/tele/rss.xml',
|
|
||||||
'temps': 'https://www.elperiodico.cat/ca/rss/temps/rss.xml',
|
|
||||||
'terrassa': 'https://www.elperiodico.cat/ca/rss/terrassa/rss.xml'
|
|
||||||
]
|
]
|
||||||
|
|
||||||
def parallelTasks = [:]
|
int batchSize = env.MAX_PARALLEL_FEEDS as int
|
||||||
|
int jitterMax = env.JITTER_MAX_SECONDS as int
|
||||||
|
def batches = feeds.collate(batchSize)
|
||||||
|
|
||||||
feeds.each { feedName, feedUrl ->
|
echo "Total feeds: ${feeds.size()}, batch size: ${batchSize}, batches: ${batches.size()}"
|
||||||
parallelTasks[feedName] = {
|
|
||||||
stage(feedName.capitalize()) {
|
|
||||||
sh """
|
|
||||||
. venv/bin/activate
|
|
||||||
|
|
||||||
# The hardcoded URL and updated EP credentials are used here
|
int batchNum = 0
|
||||||
python3 rss2bsky.py \\
|
for (def batch : batches) {
|
||||||
"${feedUrl}" \\
|
batchNum++
|
||||||
"\$BSKY_EP_HANDLE" \\
|
echo "Starting batch ${batchNum}/${batches.size()} with ${batch.size()} feeds"
|
||||||
"\$BSKY_EP_USERNAME" \\
|
|
||||||
"\$BSKY_EP_APP_PASSWORD"
|
def parallelTasks = [:]
|
||||||
"""
|
|
||||||
|
batch.each { feed ->
|
||||||
|
def feedName = feed.name
|
||||||
|
def feedUrl = feed.url
|
||||||
|
|
||||||
|
parallelTasks[feedName] = {
|
||||||
|
stage("Feed: ${feedName}") {
|
||||||
|
catchError(buildResult: 'UNSTABLE', stageResult: 'FAILURE') {
|
||||||
|
sh """
|
||||||
|
set -euxo pipefail
|
||||||
|
. "${VENV_DIR}/bin/activate"
|
||||||
|
|
||||||
|
JITTER=\$((RANDOM % ${jitterMax}))
|
||||||
|
echo "[${feedName}] sleeping \$JITTER s jitter"
|
||||||
|
sleep \$JITTER
|
||||||
|
|
||||||
|
python3 rss2bsky.py \\
|
||||||
|
"${feedUrl}" \\
|
||||||
|
"\$BSKY_EP_HANDLE" \\
|
||||||
|
"\$BSKY_EP_USERNAME" \\
|
||||||
|
"\$BSKY_EP_APP_PASSWORD"
|
||||||
|
"""
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
|
||||||
|
|
||||||
// Execute all 26 feeds simultaneously
|
parallel parallelTasks
|
||||||
parallel parallelTasks
|
echo "Finished batch ${batchNum}/${batches.size()}"
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
post {
|
||||||
|
always {
|
||||||
|
archiveArtifacts artifacts: '*.json, **/*.log', allowEmptyArchive: true
|
||||||
|
}
|
||||||
|
unstable {
|
||||||
|
echo 'Build unstable: one or more feeds failed, but pipeline completed.'
|
||||||
|
}
|
||||||
|
failure {
|
||||||
|
echo 'Build failed.'
|
||||||
|
}
|
||||||
|
success {
|
||||||
|
echo 'Build succeeded.'
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
Reference in New Issue
Block a user