From d35279cfb763314ffe758b6f95b154296486445c Mon Sep 17 00:00:00 2001
From: Guillem Hernandez Sola <guillem.hernandez.sola@gmail.com>
Date: Mon, 30 Mar 2026 16:57:47 +0200
Subject: [PATCH] Added new yml

---
 twitter2bsky_daemon.py | 44 ++++++++++++++++++++++++++----------------
 1 file changed, 27 insertions(+), 17 deletions(-)

diff --git a/twitter2bsky_daemon.py b/twitter2bsky_daemon.py
index 73e3d8e..91b8a63 100644
--- a/twitter2bsky_daemon.py
+++ b/twitter2bsky_daemon.py
@@ -19,15 +19,15 @@ logging.basicConfig(
 
 # --- Custom Classes to replace Tweety ---
 class ScrapedMedia:
-    def __init__(self, url):
-        self.type = "photo"
+    def __init__(self, url, media_type="photo"):
+        self.type = media_type  # Type can be "photo" or "video"
         self.media_url_https = url
 
 class ScrapedTweet:
     def __init__(self, created_on, text, media_urls):
         self.created_on = created_on
         self.text = text
-        self.media = [ScrapedMedia(url) for url in media_urls]
+        self.media = [ScrapedMedia(url, media_type) for url, media_type in media_urls]
 
 # --- 1. Playwright Scraping Logic ---
 def take_error_screenshot(page, error_msg):
@@ -132,34 +132,40 @@ def scrape_tweets_via_playwright(username, password, email, target_handle):
         
         try:
             page.wait_for_selector('article', timeout=20000)
-            time.sleep(3) # Let DOM settle and images load
+            time.sleep(3)  # Let DOM settle and images load
             
             articles = page.locator('article').all()
             logging.info(f"📊 Found {len(articles)} tweets on screen. Parsing...")
             
-            for article in articles[:10]: # Check top 10 tweets
+            for article in articles[:10]:  # Check top 10 tweets
                 try:
                     # Get Time
                     time_el = article.locator('time').first
                     if not time_el.is_visible():
-                        continue # Skip ads or invalid articles
+                        continue  # Skip ads or invalid articles
                     created_at = time_el.get_attribute('datetime')
                     
-                    # Get Text (FIXED: Added .first to avoid strict mode violations on Quote RTs)
+                    # Get Text
                     text_locator = article.locator('[data-testid="tweetText"]').first
                     text = text_locator.inner_text() if text_locator.is_visible() else ""
                     
-                    # Get Images
-                    image_urls = []
+                    # Get Media URLs
+                    media_urls = []
                     photo_locators = article.locator('[data-testid="tweetPhoto"] img').all()
                     for img in photo_locators:
                         src = img.get_attribute('src')
                         if src:
-                            # Convert thumbnail URL to high-res URL
                             src = re.sub(r'&name=\w+', '&name=large', src)
-                            image_urls.append(src)
-                            
-                    tweets.append(ScrapedTweet(created_at, text, image_urls))
+                            media_urls.append((src, "photo"))
+                    
+                    # Get Video URLs
+                    video_locators = article.locator('[data-testid="videoPlayer"]').all()
+                    for video in video_locators:
+                        video_url = video.get_attribute('src')
+                        if video_url:
+                            media_urls.append((video_url, "video"))
+                    
+                    tweets.append(ScrapedTweet(created_at, text, media_urls))
                     
                 except Exception as e:
                     logging.warning(f"⚠️ Failed to parse a specific tweet: {e}")
@@ -226,7 +232,7 @@ def make_rich(content):
         parts = raw.split('\n')
         test_part0 = re.sub(r'[…\.]+$', '', parts[0])
         if is_valid_url(test_part0):
-            return raw # Return original to preserve the paragraph break
+            return raw  # Return original to preserve the paragraph break
             
         # Fallback: assume it's a broken URL and glue it anyway
         return test_url
@@ -247,7 +253,7 @@ def make_rich(content):
         # Split by space to process words, URLs, and tags
         words = line.split(" ")
         for i, word in enumerate(words):
-            if not word: # Handle double spaces gracefully
+            if not word:  # Handle double spaces gracefully
                 if i < len(words) - 1:
                     text_builder.text(" ")
                 continue
@@ -319,7 +325,6 @@ def sync_feeds(args):
             tweet_time = arrow.get(tweet.created_on)
             
             if tweet_time > last_bsky_time:  # Only post new tweets
-            #if True:  # For testing, post all tweets regardless of time
                 logging.info(f"📝 Found new tweet from {tweet_time}. Posting to Bluesky...")
                 
                 raw_text = tweet.text.strip()
@@ -359,7 +364,12 @@ def sync_feeds(args):
                             if blob:
                                 # Inject our dynamic alt text here!
                                 images.append(models.AppBskyEmbedImages.Image(alt=dynamic_alt, image=blob))
-                
+                        elif media.type == "video":
+                            # Handle video uploads if necessary (this part may vary based on your API capabilities)
+                            blob = get_blob_from_url(media.media_url_https, bsky_client)
+                            if blob:
+                                images.append(models.AppBskyEmbedImages.Image(alt=dynamic_alt, image=blob))
+
                 # 🌐 Posting with Catalan language tag
                 try:
                     if images: