Added new yml
This commit is contained in:
@@ -5,6 +5,7 @@ import re
|
||||
import httpx
|
||||
import time
|
||||
import os
|
||||
import subprocess
|
||||
from dotenv import load_dotenv
|
||||
from atproto import Client, client_utils, models
|
||||
from playwright.sync_api import sync_playwright
|
||||
@@ -167,6 +168,17 @@ def build_dynamic_alt(raw_text):
|
||||
return dynamic_alt
|
||||
|
||||
|
||||
def build_video_embed(video_blob, alt_text):
|
||||
try:
|
||||
return models.AppBskyEmbedVideo.Main(
|
||||
video=video_blob,
|
||||
alt=alt_text
|
||||
)
|
||||
except AttributeError:
|
||||
logging.error("❌ Your atproto version does not support AppBskyEmbedVideo. Upgrade atproto.")
|
||||
return None
|
||||
|
||||
|
||||
# --- Playwright Scraping ---
|
||||
def scrape_tweets_via_playwright(username, password, email, target_handle):
|
||||
"""Logs in (or loads session) and scrapes tweets directly from the DOM."""
|
||||
@@ -321,23 +333,37 @@ def scrape_tweets_via_playwright(username, password, email, target_handle):
|
||||
|
||||
def extract_video_url_from_tweet_page(context, tweet_url):
|
||||
"""
|
||||
Opens a tweet page and captures the first real MP4 video request.
|
||||
Opens a tweet page and captures the best real video URL.
|
||||
Preference order:
|
||||
1. .mp4
|
||||
2. .m3u8
|
||||
Ignores .m4s fragment files.
|
||||
"""
|
||||
page = context.new_page()
|
||||
found_video_url = None
|
||||
best_mp4_url = None
|
||||
best_m3u8_url = None
|
||||
|
||||
def handle_response(response):
|
||||
nonlocal found_video_url
|
||||
nonlocal best_mp4_url, best_m3u8_url
|
||||
try:
|
||||
url = response.url
|
||||
content_type = response.headers.get("content-type", "")
|
||||
url = response.url.lower()
|
||||
content_type = response.headers.get("content-type", "").lower()
|
||||
|
||||
if found_video_url:
|
||||
if ".m4s" in url:
|
||||
return
|
||||
|
||||
if ".mp4" in url or "video/mp4" in content_type:
|
||||
found_video_url = url
|
||||
logging.info(f"🎥 Found video URL: {url}")
|
||||
if best_mp4_url is None:
|
||||
best_mp4_url = response.url
|
||||
logging.info(f"🎥 Found MP4 video URL: {response.url}")
|
||||
return
|
||||
|
||||
if ".m3u8" in url or "application/vnd.apple.mpegurl" in content_type or "application/x-mpegurl" in content_type:
|
||||
if best_m3u8_url is None:
|
||||
best_m3u8_url = response.url
|
||||
logging.info(f"📺 Found HLS playlist URL: {response.url}")
|
||||
return
|
||||
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
@@ -352,11 +378,11 @@ def extract_video_url_from_tweet_page(context, tweet_url):
|
||||
if video_player.count() > 0:
|
||||
try:
|
||||
video_player.click(force=True, timeout=3000)
|
||||
time.sleep(3)
|
||||
time.sleep(5)
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
return found_video_url
|
||||
return best_mp4_url or best_m3u8_url
|
||||
|
||||
except Exception as e:
|
||||
logging.warning(f"⚠️ Could not extract video URL from tweet page {tweet_url}: {e}")
|
||||
@@ -367,18 +393,35 @@ def extract_video_url_from_tweet_page(context, tweet_url):
|
||||
|
||||
# --- Video Processing ---
|
||||
def download_and_crop_video(video_url, output_path):
|
||||
"""Downloads the video and crops it to max 59 seconds."""
|
||||
"""
|
||||
Downloads a video from MP4 or HLS (.m3u8), then trims it to max 59 seconds.
|
||||
Requires ffmpeg installed on the system.
|
||||
"""
|
||||
temp_input = output_path.replace(".mp4", "_source.mp4")
|
||||
|
||||
try:
|
||||
response = httpx.get(video_url, timeout=60, follow_redirects=True)
|
||||
if response.status_code != 200:
|
||||
logging.error(f"❌ Failed to download video: {video_url} (status {response.status_code})")
|
||||
logging.info(f"⬇️ Downloading video source with ffmpeg: {video_url}")
|
||||
|
||||
download_cmd = [
|
||||
"ffmpeg",
|
||||
"-y",
|
||||
"-i", video_url,
|
||||
"-c", "copy",
|
||||
temp_input,
|
||||
]
|
||||
download_result = subprocess.run(
|
||||
download_cmd,
|
||||
capture_output=True,
|
||||
text=True
|
||||
)
|
||||
|
||||
if download_result.returncode != 0:
|
||||
logging.error(f"❌ ffmpeg download failed:\n{download_result.stderr}")
|
||||
return None
|
||||
|
||||
with open(output_path, "wb") as f:
|
||||
f.write(response.content)
|
||||
logging.info(f"✅ Video downloaded: {output_path}")
|
||||
logging.info(f"✅ Video downloaded: {temp_input}")
|
||||
|
||||
video_clip = VideoFileClip(output_path)
|
||||
video_clip = VideoFileClip(temp_input)
|
||||
end_time = min(59, float(video_clip.duration))
|
||||
|
||||
if hasattr(video_clip, "subclipped"):
|
||||
@@ -398,22 +441,17 @@ def download_and_crop_video(video_url, output_path):
|
||||
cropped_clip.close()
|
||||
|
||||
os.replace(temp_output, output_path)
|
||||
|
||||
if os.path.exists(temp_input):
|
||||
os.remove(temp_input)
|
||||
|
||||
logging.info(f"✅ Video cropped to 59 seconds: {output_path}")
|
||||
return output_path
|
||||
|
||||
except Exception as e:
|
||||
logging.error(f"❌ Error processing video: {e}")
|
||||
return None
|
||||
|
||||
|
||||
def build_video_embed(video_blob, alt_text):
|
||||
try:
|
||||
return models.AppBskyEmbedVideo.Main(
|
||||
video=video_blob,
|
||||
alt=alt_text
|
||||
)
|
||||
except AttributeError:
|
||||
logging.error("❌ Your atproto version does not support AppBskyEmbedVideo. Upgrade atproto.")
|
||||
if os.path.exists(temp_input):
|
||||
os.remove(temp_input)
|
||||
return None
|
||||
|
||||
|
||||
@@ -504,15 +542,17 @@ def sync_feeds(args):
|
||||
try:
|
||||
real_video_url = extract_video_url_from_tweet_page(context, tweet.tweet_url)
|
||||
if not real_video_url:
|
||||
logging.warning(f"⚠️ Could not resolve real video URL for {tweet.tweet_url}")
|
||||
logging.warning(f"⚠️ Could not resolve playable video URL for {tweet.tweet_url}")
|
||||
continue
|
||||
|
||||
cropped_video_path = download_and_crop_video(real_video_url, temp_video_path)
|
||||
if not cropped_video_path:
|
||||
logging.warning(f"⚠️ Video download/crop failed for {tweet.tweet_url}")
|
||||
continue
|
||||
|
||||
video_blob = get_blob_from_file(cropped_video_path, bsky_client)
|
||||
if not video_blob:
|
||||
logging.warning(f"⚠️ Video upload blob failed for {tweet.tweet_url}")
|
||||
continue
|
||||
|
||||
video_embed = build_video_embed(video_blob, dynamic_alt)
|
||||
|
||||
Reference in New Issue
Block a user