Added new yml
This commit is contained in:
@@ -181,7 +181,6 @@ def build_video_embed(video_blob, alt_text):
|
||||
|
||||
# --- Playwright Scraping ---
|
||||
def scrape_tweets_via_playwright(username, password, email, target_handle):
|
||||
"""Logs in (or loads session) and scrapes tweets directly from the DOM."""
|
||||
tweets = []
|
||||
state_file = "twitter_browser_state.json"
|
||||
|
||||
@@ -333,19 +332,18 @@ def scrape_tweets_via_playwright(username, password, email, target_handle):
|
||||
|
||||
def extract_video_url_from_tweet_page(context, tweet_url):
|
||||
"""
|
||||
Opens a tweet page and captures the best real video URL.
|
||||
Prefer HLS playlist first because it usually contains the full playable stream
|
||||
with audio + video and proper timing metadata.
|
||||
|
||||
Preference order:
|
||||
1. real video .mp4
|
||||
2. .m3u8 playlist
|
||||
Fallback to direct video MP4 only if no HLS playlist is found.
|
||||
|
||||
Ignores:
|
||||
Ignore:
|
||||
- .m4s fragments
|
||||
- audio-only mp4 URLs
|
||||
- audio-only MP4 URLs
|
||||
"""
|
||||
page = context.new_page()
|
||||
best_video_mp4_url = None
|
||||
best_m3u8_url = None
|
||||
best_video_mp4_url = None
|
||||
|
||||
def is_audio_only_mp4(url, content_type):
|
||||
url_l = url.lower()
|
||||
@@ -358,7 +356,7 @@ def extract_video_url_from_tweet_page(context, tweet_url):
|
||||
)
|
||||
|
||||
def handle_response(response):
|
||||
nonlocal best_video_mp4_url, best_m3u8_url
|
||||
nonlocal best_m3u8_url, best_video_mp4_url
|
||||
try:
|
||||
url = response.url
|
||||
url_l = url.lower()
|
||||
@@ -368,16 +366,6 @@ def extract_video_url_from_tweet_page(context, tweet_url):
|
||||
if ".m4s" in url_l:
|
||||
return
|
||||
|
||||
if (".mp4" in url_l or "video/mp4" in content_type_l or "audio/mp4" in content_type_l) and is_audio_only_mp4(url, content_type):
|
||||
logging.info(f"🔇 Ignoring audio-only MP4: {url}")
|
||||
return
|
||||
|
||||
if ".mp4" in url_l or "video/mp4" in content_type_l:
|
||||
if best_video_mp4_url is None:
|
||||
best_video_mp4_url = url
|
||||
logging.info(f"🎥 Found VIDEO MP4 URL: {url}")
|
||||
return
|
||||
|
||||
if (
|
||||
".m3u8" in url_l or
|
||||
"application/vnd.apple.mpegurl" in content_type_l or
|
||||
@@ -388,6 +376,16 @@ def extract_video_url_from_tweet_page(context, tweet_url):
|
||||
logging.info(f"📺 Found HLS playlist URL: {url}")
|
||||
return
|
||||
|
||||
if (".mp4" in url_l or "video/mp4" in content_type_l or "audio/mp4" in content_type_l) and is_audio_only_mp4(url, content_type):
|
||||
logging.info(f"🔇 Ignoring audio-only MP4: {url}")
|
||||
return
|
||||
|
||||
if ".mp4" in url_l or "video/mp4" in content_type_l:
|
||||
if best_video_mp4_url is None:
|
||||
best_video_mp4_url = url
|
||||
logging.info(f"🎥 Found VIDEO MP4 URL: {url}")
|
||||
return
|
||||
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
@@ -406,7 +404,10 @@ def extract_video_url_from_tweet_page(context, tweet_url):
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
return best_video_mp4_url or best_m3u8_url
|
||||
selected_url = best_m3u8_url or best_video_mp4_url
|
||||
if selected_url:
|
||||
logging.info(f"✅ Selected media URL for download: {selected_url}")
|
||||
return selected_url
|
||||
|
||||
except Exception as e:
|
||||
logging.warning(f"⚠️ Could not extract video URL from tweet page {tweet_url}: {e}")
|
||||
@@ -419,7 +420,9 @@ def extract_video_url_from_tweet_page(context, tweet_url):
|
||||
def download_and_crop_video(video_url, output_path):
|
||||
"""
|
||||
Downloads a video from MP4 or HLS (.m3u8), then trims it to max 59 seconds.
|
||||
Requires ffmpeg installed on the system.
|
||||
Uses ffmpeg for download and MoviePy for crop.
|
||||
|
||||
HLS is preferred because it usually produces a complete muxed file.
|
||||
"""
|
||||
temp_input = output_path.replace(".mp4", "_source.mp4")
|
||||
temp_output = output_path.replace(".mp4", "_cropped.mp4")
|
||||
@@ -430,10 +433,13 @@ def download_and_crop_video(video_url, output_path):
|
||||
download_cmd = [
|
||||
"ffmpeg",
|
||||
"-y",
|
||||
"-protocol_whitelist", "file,http,https,tcp,tls,crypto",
|
||||
"-allowed_extensions", "ALL",
|
||||
"-i", video_url,
|
||||
"-c", "copy",
|
||||
temp_input,
|
||||
]
|
||||
|
||||
download_result = subprocess.run(
|
||||
download_cmd,
|
||||
capture_output=True,
|
||||
@@ -480,10 +486,6 @@ def download_and_crop_video(video_url, output_path):
|
||||
return None
|
||||
|
||||
os.replace(temp_output, output_path)
|
||||
|
||||
if os.path.exists(temp_input):
|
||||
os.remove(temp_input)
|
||||
|
||||
logging.info(f"✅ Video cropped to 59 seconds: {output_path}")
|
||||
return output_path
|
||||
|
||||
@@ -543,8 +545,8 @@ def sync_feeds(args):
|
||||
for tweet in reversed(tweets):
|
||||
tweet_time = arrow.get(tweet.created_on)
|
||||
|
||||
if tweet_time <= last_bsky_time:
|
||||
#if False:
|
||||
#if tweet_time <= last_bsky_time:
|
||||
if False:
|
||||
continue
|
||||
|
||||
logging.info(f"📝 Found new tweet from {tweet_time}. Posting to Bluesky...")
|
||||
@@ -636,7 +638,6 @@ def main():
|
||||
load_dotenv()
|
||||
|
||||
parser = argparse.ArgumentParser(description="Twitter to Bluesky Sync")
|
||||
|
||||
parser.add_argument("--twitter-username", help="Your Twitter login username")
|
||||
parser.add_argument("--twitter-password", help="Your Twitter login password")
|
||||
parser.add_argument("--twitter-email", help="Your Twitter email for security challenges")
|
||||
|
||||
Reference in New Issue
Block a user