Added new yml

2026-03-30 17:54:40 +02:00
parent 2450ab75b2
commit 9faabf48d0
1 changed files with 29 additions and 28 deletions
--- a/twitter2bsky_daemon.py
+++ b/twitter2bsky_daemon.py
@@ -181,7 +181,6 @@ def build_video_embed(video_blob, alt_text):
 # --- Playwright Scraping ---
 def scrape_tweets_via_playwright(username, password, email, target_handle):
    """Logs in (or loads session) and scrapes tweets directly from the DOM."""
    tweets = []
    state_file = "twitter_browser_state.json"
@@ -333,19 +332,18 @@ def scrape_tweets_via_playwright(username, password, email, target_handle):
 def extract_video_url_from_tweet_page(context, tweet_url):
    """
-    Opens a tweet page and captures the best real video URL.
+    Prefer HLS playlist first because it usually contains the full playable stream
    with audio + video and proper timing metadata.
-    Preference order:
+    Fallback to direct video MP4 only if no HLS playlist is found.
    1. real video .mp4
    2. .m3u8 playlist
-    Ignores:
+    Ignore:
    - .m4s fragments
-    - audio-only mp4 URLs
+    - audio-only MP4 URLs
    """
    page = context.new_page()
    best_video_mp4_url = None
    best_m3u8_url = None
    best_video_mp4_url = None
    def is_audio_only_mp4(url, content_type):
        url_l = url.lower()
@@ -358,7 +356,7 @@ def extract_video_url_from_tweet_page(context, tweet_url):
        )
    def handle_response(response):
-        nonlocal best_video_mp4_url, best_m3u8_url
+        nonlocal best_m3u8_url, best_video_mp4_url
        try:
            url = response.url
            url_l = url.lower()
@@ -368,16 +366,6 @@ def extract_video_url_from_tweet_page(context, tweet_url):
            if ".m4s" in url_l:
                return
            if (".mp4" in url_l or "video/mp4" in content_type_l or "audio/mp4" in content_type_l) and is_audio_only_mp4(url, content_type):
                logging.info(f"🔇 Ignoring audio-only MP4: {url}")
                return
            if ".mp4" in url_l or "video/mp4" in content_type_l:
                if best_video_mp4_url is None:
                    best_video_mp4_url = url
                    logging.info(f"🎥 Found VIDEO MP4 URL: {url}")
                return
            if (
                ".m3u8" in url_l or
                "application/vnd.apple.mpegurl" in content_type_l or
@@ -388,6 +376,16 @@ def extract_video_url_from_tweet_page(context, tweet_url):
                    logging.info(f"📺 Found HLS playlist URL: {url}")
                return
            if (".mp4" in url_l or "video/mp4" in content_type_l or "audio/mp4" in content_type_l) and is_audio_only_mp4(url, content_type):
                logging.info(f"🔇 Ignoring audio-only MP4: {url}")
                return
            if ".mp4" in url_l or "video/mp4" in content_type_l:
                if best_video_mp4_url is None:
                    best_video_mp4_url = url
                    logging.info(f"🎥 Found VIDEO MP4 URL: {url}")
                return
        except Exception:
            pass
@@ -406,7 +404,10 @@ def extract_video_url_from_tweet_page(context, tweet_url):
            except Exception:
                pass
-        return best_video_mp4_url or best_m3u8_url
+        selected_url = best_m3u8_url or best_video_mp4_url
        if selected_url:
            logging.info(f"✅ Selected media URL for download: {selected_url}")
        return selected_url
    except Exception as e:
        logging.warning(f"⚠️ Could not extract video URL from tweet page {tweet_url}: {e}")
@@ -419,7 +420,9 @@ def extract_video_url_from_tweet_page(context, tweet_url):
 def download_and_crop_video(video_url, output_path):
    """
    Downloads a video from MP4 or HLS (.m3u8), then trims it to max 59 seconds.
-    Requires ffmpeg installed on the system.
+    Uses ffmpeg for download and MoviePy for crop.
    HLS is preferred because it usually produces a complete muxed file.
    """
    temp_input = output_path.replace(".mp4", "_source.mp4")
    temp_output = output_path.replace(".mp4", "_cropped.mp4")
@@ -430,10 +433,13 @@ def download_and_crop_video(video_url, output_path):
        download_cmd = [
            "ffmpeg",
            "-y",
            "-protocol_whitelist", "file,http,https,tcp,tls,crypto",
            "-allowed_extensions", "ALL",
            "-i", video_url,
            "-c", "copy",
            temp_input,
        ]
        download_result = subprocess.run(
            download_cmd,
            capture_output=True,
@@ -480,10 +486,6 @@ def download_and_crop_video(video_url, output_path):
            return None
        os.replace(temp_output, output_path)
        if os.path.exists(temp_input):
            os.remove(temp_input)
        logging.info(f"✅ Video cropped to 59 seconds: {output_path}")
        return output_path
@@ -543,8 +545,8 @@ def sync_feeds(args):
            for tweet in reversed(tweets):
                tweet_time = arrow.get(tweet.created_on)
-                if tweet_time <= last_bsky_time:
+                #if tweet_time <= last_bsky_time:
-                #if False:
+                if False:
                    continue
                logging.info(f"📝 Found new tweet from {tweet_time}. Posting to Bluesky...")
@@ -636,7 +638,6 @@ def main():
    load_dotenv()
    parser = argparse.ArgumentParser(description="Twitter to Bluesky Sync")
    parser.add_argument("--twitter-username", help="Your Twitter login username")
    parser.add_argument("--twitter-password", help="Your Twitter login password")
    parser.add_argument("--twitter-email", help="Your Twitter email for security challenges")