Added new yml

2026-03-30 16:57:47 +02:00
parent 6a5d9f3df7
commit d35279cfb7
1 changed files with 27 additions and 17 deletions
--- a/twitter2bsky_daemon.py
+++ b/twitter2bsky_daemon.py
@@ -19,15 +19,15 @@ logging.basicConfig(

 # --- Custom Classes to replace Tweety ---
 class ScrapedMedia:
-    def __init__(self, url):
-        self.type = "photo"
+    def __init__(self, url, media_type="photo"):
+        self.type = media_type  # Type can be "photo" or "video"
        self.media_url_https = url

 class ScrapedTweet:
    def __init__(self, created_on, text, media_urls):
        self.created_on = created_on
        self.text = text
-        self.media = [ScrapedMedia(url) for url in media_urls]
+        self.media = [ScrapedMedia(url, media_type) for url, media_type in media_urls]

 # --- 1. Playwright Scraping Logic ---
 def take_error_screenshot(page, error_msg):
@@ -132,34 +132,40 @@ def scrape_tweets_via_playwright(username, password, email, target_handle):
        
        try:
            page.wait_for_selector('article', timeout=20000)
-            time.sleep(3) # Let DOM settle and images load
+            time.sleep(3)  # Let DOM settle and images load
            
            articles = page.locator('article').all()
            logging.info(f"📊 Found {len(articles)} tweets on screen. Parsing...")
            
-            for article in articles[:10]: # Check top 10 tweets
+            for article in articles[:10]:  # Check top 10 tweets
                try:
                    # Get Time
                    time_el = article.locator('time').first
                    if not time_el.is_visible():
-                        continue # Skip ads or invalid articles
+                        continue  # Skip ads or invalid articles
                    created_at = time_el.get_attribute('datetime')
                    
-                    # Get Text (FIXED: Added .first to avoid strict mode violations on Quote RTs)
+                    # Get Text
                    text_locator = article.locator('[data-testid="tweetText"]').first
                    text = text_locator.inner_text() if text_locator.is_visible() else ""
                    
-                    # Get Images
-                    image_urls = []
+                    # Get Media URLs
+                    media_urls = []
                    photo_locators = article.locator('[data-testid="tweetPhoto"] img').all()
                    for img in photo_locators:
                        src = img.get_attribute('src')
                        if src:
-                            # Convert thumbnail URL to high-res URL
                            src = re.sub(r'&name=\w+', '&name=large', src)
-                            image_urls.append(src)
-                            
-                    tweets.append(ScrapedTweet(created_at, text, image_urls))
+                            media_urls.append((src, "photo"))
+                    
+                    # Get Video URLs
+                    video_locators = article.locator('[data-testid="videoPlayer"]').all()
+                    for video in video_locators:
+                        video_url = video.get_attribute('src')
+                        if video_url:
+                            media_urls.append((video_url, "video"))
+                    
+                    tweets.append(ScrapedTweet(created_at, text, media_urls))
                    
                except Exception as e:
                    logging.warning(f"⚠️ Failed to parse a specific tweet: {e}")
@@ -226,7 +232,7 @@ def make_rich(content):
        parts = raw.split('\n')
        test_part0 = re.sub(r'[…\.]+$', '', parts[0])
        if is_valid_url(test_part0):
-            return raw # Return original to preserve the paragraph break
+            return raw  # Return original to preserve the paragraph break
            
        # Fallback: assume it's a broken URL and glue it anyway
        return test_url
@@ -247,7 +253,7 @@ def make_rich(content):
        # Split by space to process words, URLs, and tags
        words = line.split(" ")
        for i, word in enumerate(words):
-            if not word: # Handle double spaces gracefully
+            if not word:  # Handle double spaces gracefully
                if i < len(words) - 1:
                    text_builder.text(" ")
                continue
@@ -319,7 +325,6 @@ def sync_feeds(args):
            tweet_time = arrow.get(tweet.created_on)
            
            if tweet_time > last_bsky_time:  # Only post new tweets
-            #if True:  # For testing, post all tweets regardless of time
                logging.info(f"📝 Found new tweet from {tweet_time}. Posting to Bluesky...")
                
                raw_text = tweet.text.strip()
@@ -359,7 +364,12 @@ def sync_feeds(args):
                            if blob:
                                # Inject our dynamic alt text here!
                                images.append(models.AppBskyEmbedImages.Image(alt=dynamic_alt, image=blob))
-                
+                        elif media.type == "video":
+                            # Handle video uploads if necessary (this part may vary based on your API capabilities)
+                            blob = get_blob_from_url(media.media_url_https, bsky_client)
+                            if blob:
+                                images.append(models.AppBskyEmbedImages.Image(alt=dynamic_alt, image=blob))
+
                # 🌐 Posting with Catalan language tag
                try:
                    if images: