Added new yml
This commit is contained in:
@@ -181,7 +181,6 @@ def build_video_embed(video_blob, alt_text):
|
|||||||
|
|
||||||
# --- Playwright Scraping ---
|
# --- Playwright Scraping ---
|
||||||
def scrape_tweets_via_playwright(username, password, email, target_handle):
|
def scrape_tweets_via_playwright(username, password, email, target_handle):
|
||||||
"""Logs in (or loads session) and scrapes tweets directly from the DOM."""
|
|
||||||
tweets = []
|
tweets = []
|
||||||
state_file = "twitter_browser_state.json"
|
state_file = "twitter_browser_state.json"
|
||||||
|
|
||||||
@@ -333,19 +332,18 @@ def scrape_tweets_via_playwright(username, password, email, target_handle):
|
|||||||
|
|
||||||
def extract_video_url_from_tweet_page(context, tweet_url):
|
def extract_video_url_from_tweet_page(context, tweet_url):
|
||||||
"""
|
"""
|
||||||
Opens a tweet page and captures the best real video URL.
|
Prefer HLS playlist first because it usually contains the full playable stream
|
||||||
|
with audio + video and proper timing metadata.
|
||||||
|
|
||||||
Preference order:
|
Fallback to direct video MP4 only if no HLS playlist is found.
|
||||||
1. real video .mp4
|
|
||||||
2. .m3u8 playlist
|
|
||||||
|
|
||||||
Ignores:
|
Ignore:
|
||||||
- .m4s fragments
|
- .m4s fragments
|
||||||
- audio-only mp4 URLs
|
- audio-only MP4 URLs
|
||||||
"""
|
"""
|
||||||
page = context.new_page()
|
page = context.new_page()
|
||||||
best_video_mp4_url = None
|
|
||||||
best_m3u8_url = None
|
best_m3u8_url = None
|
||||||
|
best_video_mp4_url = None
|
||||||
|
|
||||||
def is_audio_only_mp4(url, content_type):
|
def is_audio_only_mp4(url, content_type):
|
||||||
url_l = url.lower()
|
url_l = url.lower()
|
||||||
@@ -358,7 +356,7 @@ def extract_video_url_from_tweet_page(context, tweet_url):
|
|||||||
)
|
)
|
||||||
|
|
||||||
def handle_response(response):
|
def handle_response(response):
|
||||||
nonlocal best_video_mp4_url, best_m3u8_url
|
nonlocal best_m3u8_url, best_video_mp4_url
|
||||||
try:
|
try:
|
||||||
url = response.url
|
url = response.url
|
||||||
url_l = url.lower()
|
url_l = url.lower()
|
||||||
@@ -368,16 +366,6 @@ def extract_video_url_from_tweet_page(context, tweet_url):
|
|||||||
if ".m4s" in url_l:
|
if ".m4s" in url_l:
|
||||||
return
|
return
|
||||||
|
|
||||||
if (".mp4" in url_l or "video/mp4" in content_type_l or "audio/mp4" in content_type_l) and is_audio_only_mp4(url, content_type):
|
|
||||||
logging.info(f"🔇 Ignoring audio-only MP4: {url}")
|
|
||||||
return
|
|
||||||
|
|
||||||
if ".mp4" in url_l or "video/mp4" in content_type_l:
|
|
||||||
if best_video_mp4_url is None:
|
|
||||||
best_video_mp4_url = url
|
|
||||||
logging.info(f"🎥 Found VIDEO MP4 URL: {url}")
|
|
||||||
return
|
|
||||||
|
|
||||||
if (
|
if (
|
||||||
".m3u8" in url_l or
|
".m3u8" in url_l or
|
||||||
"application/vnd.apple.mpegurl" in content_type_l or
|
"application/vnd.apple.mpegurl" in content_type_l or
|
||||||
@@ -388,6 +376,16 @@ def extract_video_url_from_tweet_page(context, tweet_url):
|
|||||||
logging.info(f"📺 Found HLS playlist URL: {url}")
|
logging.info(f"📺 Found HLS playlist URL: {url}")
|
||||||
return
|
return
|
||||||
|
|
||||||
|
if (".mp4" in url_l or "video/mp4" in content_type_l or "audio/mp4" in content_type_l) and is_audio_only_mp4(url, content_type):
|
||||||
|
logging.info(f"🔇 Ignoring audio-only MP4: {url}")
|
||||||
|
return
|
||||||
|
|
||||||
|
if ".mp4" in url_l or "video/mp4" in content_type_l:
|
||||||
|
if best_video_mp4_url is None:
|
||||||
|
best_video_mp4_url = url
|
||||||
|
logging.info(f"🎥 Found VIDEO MP4 URL: {url}")
|
||||||
|
return
|
||||||
|
|
||||||
except Exception:
|
except Exception:
|
||||||
pass
|
pass
|
||||||
|
|
||||||
@@ -406,7 +404,10 @@ def extract_video_url_from_tweet_page(context, tweet_url):
|
|||||||
except Exception:
|
except Exception:
|
||||||
pass
|
pass
|
||||||
|
|
||||||
return best_video_mp4_url or best_m3u8_url
|
selected_url = best_m3u8_url or best_video_mp4_url
|
||||||
|
if selected_url:
|
||||||
|
logging.info(f"✅ Selected media URL for download: {selected_url}")
|
||||||
|
return selected_url
|
||||||
|
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
logging.warning(f"⚠️ Could not extract video URL from tweet page {tweet_url}: {e}")
|
logging.warning(f"⚠️ Could not extract video URL from tweet page {tweet_url}: {e}")
|
||||||
@@ -419,7 +420,9 @@ def extract_video_url_from_tweet_page(context, tweet_url):
|
|||||||
def download_and_crop_video(video_url, output_path):
|
def download_and_crop_video(video_url, output_path):
|
||||||
"""
|
"""
|
||||||
Downloads a video from MP4 or HLS (.m3u8), then trims it to max 59 seconds.
|
Downloads a video from MP4 or HLS (.m3u8), then trims it to max 59 seconds.
|
||||||
Requires ffmpeg installed on the system.
|
Uses ffmpeg for download and MoviePy for crop.
|
||||||
|
|
||||||
|
HLS is preferred because it usually produces a complete muxed file.
|
||||||
"""
|
"""
|
||||||
temp_input = output_path.replace(".mp4", "_source.mp4")
|
temp_input = output_path.replace(".mp4", "_source.mp4")
|
||||||
temp_output = output_path.replace(".mp4", "_cropped.mp4")
|
temp_output = output_path.replace(".mp4", "_cropped.mp4")
|
||||||
@@ -430,10 +433,13 @@ def download_and_crop_video(video_url, output_path):
|
|||||||
download_cmd = [
|
download_cmd = [
|
||||||
"ffmpeg",
|
"ffmpeg",
|
||||||
"-y",
|
"-y",
|
||||||
|
"-protocol_whitelist", "file,http,https,tcp,tls,crypto",
|
||||||
|
"-allowed_extensions", "ALL",
|
||||||
"-i", video_url,
|
"-i", video_url,
|
||||||
"-c", "copy",
|
"-c", "copy",
|
||||||
temp_input,
|
temp_input,
|
||||||
]
|
]
|
||||||
|
|
||||||
download_result = subprocess.run(
|
download_result = subprocess.run(
|
||||||
download_cmd,
|
download_cmd,
|
||||||
capture_output=True,
|
capture_output=True,
|
||||||
@@ -480,10 +486,6 @@ def download_and_crop_video(video_url, output_path):
|
|||||||
return None
|
return None
|
||||||
|
|
||||||
os.replace(temp_output, output_path)
|
os.replace(temp_output, output_path)
|
||||||
|
|
||||||
if os.path.exists(temp_input):
|
|
||||||
os.remove(temp_input)
|
|
||||||
|
|
||||||
logging.info(f"✅ Video cropped to 59 seconds: {output_path}")
|
logging.info(f"✅ Video cropped to 59 seconds: {output_path}")
|
||||||
return output_path
|
return output_path
|
||||||
|
|
||||||
@@ -543,8 +545,8 @@ def sync_feeds(args):
|
|||||||
for tweet in reversed(tweets):
|
for tweet in reversed(tweets):
|
||||||
tweet_time = arrow.get(tweet.created_on)
|
tweet_time = arrow.get(tweet.created_on)
|
||||||
|
|
||||||
if tweet_time <= last_bsky_time:
|
#if tweet_time <= last_bsky_time:
|
||||||
#if False:
|
if False:
|
||||||
continue
|
continue
|
||||||
|
|
||||||
logging.info(f"📝 Found new tweet from {tweet_time}. Posting to Bluesky...")
|
logging.info(f"📝 Found new tweet from {tweet_time}. Posting to Bluesky...")
|
||||||
@@ -636,7 +638,6 @@ def main():
|
|||||||
load_dotenv()
|
load_dotenv()
|
||||||
|
|
||||||
parser = argparse.ArgumentParser(description="Twitter to Bluesky Sync")
|
parser = argparse.ArgumentParser(description="Twitter to Bluesky Sync")
|
||||||
|
|
||||||
parser.add_argument("--twitter-username", help="Your Twitter login username")
|
parser.add_argument("--twitter-username", help="Your Twitter login username")
|
||||||
parser.add_argument("--twitter-password", help="Your Twitter login password")
|
parser.add_argument("--twitter-password", help="Your Twitter login password")
|
||||||
parser.add_argument("--twitter-email", help="Your Twitter email for security challenges")
|
parser.add_argument("--twitter-email", help="Your Twitter email for security challenges")
|
||||||
|
|||||||
Reference in New Issue
Block a user