From d35279cfb763314ffe758b6f95b154296486445c Mon Sep 17 00:00:00 2001 From: Guillem Hernandez Sola Date: Mon, 30 Mar 2026 16:57:47 +0200 Subject: [PATCH] Added new yml --- twitter2bsky_daemon.py | 44 ++++++++++++++++++++++++++---------------- 1 file changed, 27 insertions(+), 17 deletions(-) diff --git a/twitter2bsky_daemon.py b/twitter2bsky_daemon.py index 73e3d8e..91b8a63 100644 --- a/twitter2bsky_daemon.py +++ b/twitter2bsky_daemon.py @@ -19,15 +19,15 @@ logging.basicConfig( # --- Custom Classes to replace Tweety --- class ScrapedMedia: - def __init__(self, url): - self.type = "photo" + def __init__(self, url, media_type="photo"): + self.type = media_type # Type can be "photo" or "video" self.media_url_https = url class ScrapedTweet: def __init__(self, created_on, text, media_urls): self.created_on = created_on self.text = text - self.media = [ScrapedMedia(url) for url in media_urls] + self.media = [ScrapedMedia(url, media_type) for url, media_type in media_urls] # --- 1. Playwright Scraping Logic --- def take_error_screenshot(page, error_msg): @@ -132,34 +132,40 @@ def scrape_tweets_via_playwright(username, password, email, target_handle): try: page.wait_for_selector('article', timeout=20000) - time.sleep(3) # Let DOM settle and images load + time.sleep(3) # Let DOM settle and images load articles = page.locator('article').all() logging.info(f"📊 Found {len(articles)} tweets on screen. Parsing...") - for article in articles[:10]: # Check top 10 tweets + for article in articles[:10]: # Check top 10 tweets try: # Get Time time_el = article.locator('time').first if not time_el.is_visible(): - continue # Skip ads or invalid articles + continue # Skip ads or invalid articles created_at = time_el.get_attribute('datetime') - # Get Text (FIXED: Added .first to avoid strict mode violations on Quote RTs) + # Get Text text_locator = article.locator('[data-testid="tweetText"]').first text = text_locator.inner_text() if text_locator.is_visible() else "" - # Get Images - image_urls = [] + # Get Media URLs + media_urls = [] photo_locators = article.locator('[data-testid="tweetPhoto"] img').all() for img in photo_locators: src = img.get_attribute('src') if src: - # Convert thumbnail URL to high-res URL src = re.sub(r'&name=\w+', '&name=large', src) - image_urls.append(src) - - tweets.append(ScrapedTweet(created_at, text, image_urls)) + media_urls.append((src, "photo")) + + # Get Video URLs + video_locators = article.locator('[data-testid="videoPlayer"]').all() + for video in video_locators: + video_url = video.get_attribute('src') + if video_url: + media_urls.append((video_url, "video")) + + tweets.append(ScrapedTweet(created_at, text, media_urls)) except Exception as e: logging.warning(f"⚠️ Failed to parse a specific tweet: {e}") @@ -226,7 +232,7 @@ def make_rich(content): parts = raw.split('\n') test_part0 = re.sub(r'[…\.]+$', '', parts[0]) if is_valid_url(test_part0): - return raw # Return original to preserve the paragraph break + return raw # Return original to preserve the paragraph break # Fallback: assume it's a broken URL and glue it anyway return test_url @@ -247,7 +253,7 @@ def make_rich(content): # Split by space to process words, URLs, and tags words = line.split(" ") for i, word in enumerate(words): - if not word: # Handle double spaces gracefully + if not word: # Handle double spaces gracefully if i < len(words) - 1: text_builder.text(" ") continue @@ -319,7 +325,6 @@ def sync_feeds(args): tweet_time = arrow.get(tweet.created_on) if tweet_time > last_bsky_time: # Only post new tweets - #if True: # For testing, post all tweets regardless of time logging.info(f"📝 Found new tweet from {tweet_time}. Posting to Bluesky...") raw_text = tweet.text.strip() @@ -359,7 +364,12 @@ def sync_feeds(args): if blob: # Inject our dynamic alt text here! images.append(models.AppBskyEmbedImages.Image(alt=dynamic_alt, image=blob)) - + elif media.type == "video": + # Handle video uploads if necessary (this part may vary based on your API capabilities) + blob = get_blob_from_url(media.media_url_https, bsky_client) + if blob: + images.append(models.AppBskyEmbedImages.Image(alt=dynamic_alt, image=blob)) + # 🌐 Posting with Catalan language tag try: if images: