Added new yml
This commit is contained in:
@@ -19,15 +19,15 @@ logging.basicConfig(
|
|||||||
|
|
||||||
# --- Custom Classes to replace Tweety ---
|
# --- Custom Classes to replace Tweety ---
|
||||||
class ScrapedMedia:
|
class ScrapedMedia:
|
||||||
def __init__(self, url):
|
def __init__(self, url, media_type="photo"):
|
||||||
self.type = "photo"
|
self.type = media_type # Type can be "photo" or "video"
|
||||||
self.media_url_https = url
|
self.media_url_https = url
|
||||||
|
|
||||||
class ScrapedTweet:
|
class ScrapedTweet:
|
||||||
def __init__(self, created_on, text, media_urls):
|
def __init__(self, created_on, text, media_urls):
|
||||||
self.created_on = created_on
|
self.created_on = created_on
|
||||||
self.text = text
|
self.text = text
|
||||||
self.media = [ScrapedMedia(url) for url in media_urls]
|
self.media = [ScrapedMedia(url, media_type) for url, media_type in media_urls]
|
||||||
|
|
||||||
# --- 1. Playwright Scraping Logic ---
|
# --- 1. Playwright Scraping Logic ---
|
||||||
def take_error_screenshot(page, error_msg):
|
def take_error_screenshot(page, error_msg):
|
||||||
@@ -145,21 +145,27 @@ def scrape_tweets_via_playwright(username, password, email, target_handle):
|
|||||||
continue # Skip ads or invalid articles
|
continue # Skip ads or invalid articles
|
||||||
created_at = time_el.get_attribute('datetime')
|
created_at = time_el.get_attribute('datetime')
|
||||||
|
|
||||||
# Get Text (FIXED: Added .first to avoid strict mode violations on Quote RTs)
|
# Get Text
|
||||||
text_locator = article.locator('[data-testid="tweetText"]').first
|
text_locator = article.locator('[data-testid="tweetText"]').first
|
||||||
text = text_locator.inner_text() if text_locator.is_visible() else ""
|
text = text_locator.inner_text() if text_locator.is_visible() else ""
|
||||||
|
|
||||||
# Get Images
|
# Get Media URLs
|
||||||
image_urls = []
|
media_urls = []
|
||||||
photo_locators = article.locator('[data-testid="tweetPhoto"] img').all()
|
photo_locators = article.locator('[data-testid="tweetPhoto"] img').all()
|
||||||
for img in photo_locators:
|
for img in photo_locators:
|
||||||
src = img.get_attribute('src')
|
src = img.get_attribute('src')
|
||||||
if src:
|
if src:
|
||||||
# Convert thumbnail URL to high-res URL
|
|
||||||
src = re.sub(r'&name=\w+', '&name=large', src)
|
src = re.sub(r'&name=\w+', '&name=large', src)
|
||||||
image_urls.append(src)
|
media_urls.append((src, "photo"))
|
||||||
|
|
||||||
tweets.append(ScrapedTweet(created_at, text, image_urls))
|
# Get Video URLs
|
||||||
|
video_locators = article.locator('[data-testid="videoPlayer"]').all()
|
||||||
|
for video in video_locators:
|
||||||
|
video_url = video.get_attribute('src')
|
||||||
|
if video_url:
|
||||||
|
media_urls.append((video_url, "video"))
|
||||||
|
|
||||||
|
tweets.append(ScrapedTweet(created_at, text, media_urls))
|
||||||
|
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
logging.warning(f"⚠️ Failed to parse a specific tweet: {e}")
|
logging.warning(f"⚠️ Failed to parse a specific tweet: {e}")
|
||||||
@@ -319,7 +325,6 @@ def sync_feeds(args):
|
|||||||
tweet_time = arrow.get(tweet.created_on)
|
tweet_time = arrow.get(tweet.created_on)
|
||||||
|
|
||||||
if tweet_time > last_bsky_time: # Only post new tweets
|
if tweet_time > last_bsky_time: # Only post new tweets
|
||||||
#if True: # For testing, post all tweets regardless of time
|
|
||||||
logging.info(f"📝 Found new tweet from {tweet_time}. Posting to Bluesky...")
|
logging.info(f"📝 Found new tweet from {tweet_time}. Posting to Bluesky...")
|
||||||
|
|
||||||
raw_text = tweet.text.strip()
|
raw_text = tweet.text.strip()
|
||||||
@@ -359,6 +364,11 @@ def sync_feeds(args):
|
|||||||
if blob:
|
if blob:
|
||||||
# Inject our dynamic alt text here!
|
# Inject our dynamic alt text here!
|
||||||
images.append(models.AppBskyEmbedImages.Image(alt=dynamic_alt, image=blob))
|
images.append(models.AppBskyEmbedImages.Image(alt=dynamic_alt, image=blob))
|
||||||
|
elif media.type == "video":
|
||||||
|
# Handle video uploads if necessary (this part may vary based on your API capabilities)
|
||||||
|
blob = get_blob_from_url(media.media_url_https, bsky_client)
|
||||||
|
if blob:
|
||||||
|
images.append(models.AppBskyEmbedImages.Image(alt=dynamic_alt, image=blob))
|
||||||
|
|
||||||
# 🌐 Posting with Catalan language tag
|
# 🌐 Posting with Catalan language tag
|
||||||
try:
|
try:
|
||||||
|
|||||||
Reference in New Issue
Block a user