Added new yml

This commit is contained in:
Guillem Hernandez Sola
2026-03-30 17:54:40 +02:00
parent 2450ab75b2
commit 9faabf48d0

View File

@@ -181,7 +181,6 @@ def build_video_embed(video_blob, alt_text):
# --- Playwright Scraping ---
def scrape_tweets_via_playwright(username, password, email, target_handle):
"""Logs in (or loads session) and scrapes tweets directly from the DOM."""
tweets = []
state_file = "twitter_browser_state.json"
@@ -333,19 +332,18 @@ def scrape_tweets_via_playwright(username, password, email, target_handle):
def extract_video_url_from_tweet_page(context, tweet_url):
"""
Opens a tweet page and captures the best real video URL.
Prefer HLS playlist first because it usually contains the full playable stream
with audio + video and proper timing metadata.
Preference order:
1. real video .mp4
2. .m3u8 playlist
Fallback to direct video MP4 only if no HLS playlist is found.
Ignores:
Ignore:
- .m4s fragments
- audio-only mp4 URLs
- audio-only MP4 URLs
"""
page = context.new_page()
best_video_mp4_url = None
best_m3u8_url = None
best_video_mp4_url = None
def is_audio_only_mp4(url, content_type):
url_l = url.lower()
@@ -358,7 +356,7 @@ def extract_video_url_from_tweet_page(context, tweet_url):
)
def handle_response(response):
nonlocal best_video_mp4_url, best_m3u8_url
nonlocal best_m3u8_url, best_video_mp4_url
try:
url = response.url
url_l = url.lower()
@@ -368,16 +366,6 @@ def extract_video_url_from_tweet_page(context, tweet_url):
if ".m4s" in url_l:
return
if (".mp4" in url_l or "video/mp4" in content_type_l or "audio/mp4" in content_type_l) and is_audio_only_mp4(url, content_type):
logging.info(f"🔇 Ignoring audio-only MP4: {url}")
return
if ".mp4" in url_l or "video/mp4" in content_type_l:
if best_video_mp4_url is None:
best_video_mp4_url = url
logging.info(f"🎥 Found VIDEO MP4 URL: {url}")
return
if (
".m3u8" in url_l or
"application/vnd.apple.mpegurl" in content_type_l or
@@ -388,6 +376,16 @@ def extract_video_url_from_tweet_page(context, tweet_url):
logging.info(f"📺 Found HLS playlist URL: {url}")
return
if (".mp4" in url_l or "video/mp4" in content_type_l or "audio/mp4" in content_type_l) and is_audio_only_mp4(url, content_type):
logging.info(f"🔇 Ignoring audio-only MP4: {url}")
return
if ".mp4" in url_l or "video/mp4" in content_type_l:
if best_video_mp4_url is None:
best_video_mp4_url = url
logging.info(f"🎥 Found VIDEO MP4 URL: {url}")
return
except Exception:
pass
@@ -406,7 +404,10 @@ def extract_video_url_from_tweet_page(context, tweet_url):
except Exception:
pass
return best_video_mp4_url or best_m3u8_url
selected_url = best_m3u8_url or best_video_mp4_url
if selected_url:
logging.info(f"✅ Selected media URL for download: {selected_url}")
return selected_url
except Exception as e:
logging.warning(f"⚠️ Could not extract video URL from tweet page {tweet_url}: {e}")
@@ -419,7 +420,9 @@ def extract_video_url_from_tweet_page(context, tweet_url):
def download_and_crop_video(video_url, output_path):
"""
Downloads a video from MP4 or HLS (.m3u8), then trims it to max 59 seconds.
Requires ffmpeg installed on the system.
Uses ffmpeg for download and MoviePy for crop.
HLS is preferred because it usually produces a complete muxed file.
"""
temp_input = output_path.replace(".mp4", "_source.mp4")
temp_output = output_path.replace(".mp4", "_cropped.mp4")
@@ -430,10 +433,13 @@ def download_and_crop_video(video_url, output_path):
download_cmd = [
"ffmpeg",
"-y",
"-protocol_whitelist", "file,http,https,tcp,tls,crypto",
"-allowed_extensions", "ALL",
"-i", video_url,
"-c", "copy",
temp_input,
]
download_result = subprocess.run(
download_cmd,
capture_output=True,
@@ -480,10 +486,6 @@ def download_and_crop_video(video_url, output_path):
return None
os.replace(temp_output, output_path)
if os.path.exists(temp_input):
os.remove(temp_input)
logging.info(f"✅ Video cropped to 59 seconds: {output_path}")
return output_path
@@ -543,8 +545,8 @@ def sync_feeds(args):
for tweet in reversed(tweets):
tweet_time = arrow.get(tweet.created_on)
if tweet_time <= last_bsky_time:
#if False:
#if tweet_time <= last_bsky_time:
if False:
continue
logging.info(f"📝 Found new tweet from {tweet_time}. Posting to Bluesky...")
@@ -636,7 +638,6 @@ def main():
load_dotenv()
parser = argparse.ArgumentParser(description="Twitter to Bluesky Sync")
parser.add_argument("--twitter-username", help="Your Twitter login username")
parser.add_argument("--twitter-password", help="Your Twitter login password")
parser.add_argument("--twitter-email", help="Your Twitter email for security challenges")