Added new yml
This commit is contained in:
@@ -5,6 +5,7 @@ import re
|
|||||||
import httpx
|
import httpx
|
||||||
import time
|
import time
|
||||||
import os
|
import os
|
||||||
|
import subprocess
|
||||||
from dotenv import load_dotenv
|
from dotenv import load_dotenv
|
||||||
from atproto import Client, client_utils, models
|
from atproto import Client, client_utils, models
|
||||||
from playwright.sync_api import sync_playwright
|
from playwright.sync_api import sync_playwright
|
||||||
@@ -167,6 +168,17 @@ def build_dynamic_alt(raw_text):
|
|||||||
return dynamic_alt
|
return dynamic_alt
|
||||||
|
|
||||||
|
|
||||||
|
def build_video_embed(video_blob, alt_text):
|
||||||
|
try:
|
||||||
|
return models.AppBskyEmbedVideo.Main(
|
||||||
|
video=video_blob,
|
||||||
|
alt=alt_text
|
||||||
|
)
|
||||||
|
except AttributeError:
|
||||||
|
logging.error("❌ Your atproto version does not support AppBskyEmbedVideo. Upgrade atproto.")
|
||||||
|
return None
|
||||||
|
|
||||||
|
|
||||||
# --- Playwright Scraping ---
|
# --- Playwright Scraping ---
|
||||||
def scrape_tweets_via_playwright(username, password, email, target_handle):
|
def scrape_tweets_via_playwright(username, password, email, target_handle):
|
||||||
"""Logs in (or loads session) and scrapes tweets directly from the DOM."""
|
"""Logs in (or loads session) and scrapes tweets directly from the DOM."""
|
||||||
@@ -321,23 +333,37 @@ def scrape_tweets_via_playwright(username, password, email, target_handle):
|
|||||||
|
|
||||||
def extract_video_url_from_tweet_page(context, tweet_url):
|
def extract_video_url_from_tweet_page(context, tweet_url):
|
||||||
"""
|
"""
|
||||||
Opens a tweet page and captures the first real MP4 video request.
|
Opens a tweet page and captures the best real video URL.
|
||||||
|
Preference order:
|
||||||
|
1. .mp4
|
||||||
|
2. .m3u8
|
||||||
|
Ignores .m4s fragment files.
|
||||||
"""
|
"""
|
||||||
page = context.new_page()
|
page = context.new_page()
|
||||||
found_video_url = None
|
best_mp4_url = None
|
||||||
|
best_m3u8_url = None
|
||||||
|
|
||||||
def handle_response(response):
|
def handle_response(response):
|
||||||
nonlocal found_video_url
|
nonlocal best_mp4_url, best_m3u8_url
|
||||||
try:
|
try:
|
||||||
url = response.url
|
url = response.url.lower()
|
||||||
content_type = response.headers.get("content-type", "")
|
content_type = response.headers.get("content-type", "").lower()
|
||||||
|
|
||||||
if found_video_url:
|
if ".m4s" in url:
|
||||||
return
|
return
|
||||||
|
|
||||||
if ".mp4" in url or "video/mp4" in content_type:
|
if ".mp4" in url or "video/mp4" in content_type:
|
||||||
found_video_url = url
|
if best_mp4_url is None:
|
||||||
logging.info(f"🎥 Found video URL: {url}")
|
best_mp4_url = response.url
|
||||||
|
logging.info(f"🎥 Found MP4 video URL: {response.url}")
|
||||||
|
return
|
||||||
|
|
||||||
|
if ".m3u8" in url or "application/vnd.apple.mpegurl" in content_type or "application/x-mpegurl" in content_type:
|
||||||
|
if best_m3u8_url is None:
|
||||||
|
best_m3u8_url = response.url
|
||||||
|
logging.info(f"📺 Found HLS playlist URL: {response.url}")
|
||||||
|
return
|
||||||
|
|
||||||
except Exception:
|
except Exception:
|
||||||
pass
|
pass
|
||||||
|
|
||||||
@@ -352,11 +378,11 @@ def extract_video_url_from_tweet_page(context, tweet_url):
|
|||||||
if video_player.count() > 0:
|
if video_player.count() > 0:
|
||||||
try:
|
try:
|
||||||
video_player.click(force=True, timeout=3000)
|
video_player.click(force=True, timeout=3000)
|
||||||
time.sleep(3)
|
time.sleep(5)
|
||||||
except Exception:
|
except Exception:
|
||||||
pass
|
pass
|
||||||
|
|
||||||
return found_video_url
|
return best_mp4_url or best_m3u8_url
|
||||||
|
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
logging.warning(f"⚠️ Could not extract video URL from tweet page {tweet_url}: {e}")
|
logging.warning(f"⚠️ Could not extract video URL from tweet page {tweet_url}: {e}")
|
||||||
@@ -367,18 +393,35 @@ def extract_video_url_from_tweet_page(context, tweet_url):
|
|||||||
|
|
||||||
# --- Video Processing ---
|
# --- Video Processing ---
|
||||||
def download_and_crop_video(video_url, output_path):
|
def download_and_crop_video(video_url, output_path):
|
||||||
"""Downloads the video and crops it to max 59 seconds."""
|
"""
|
||||||
|
Downloads a video from MP4 or HLS (.m3u8), then trims it to max 59 seconds.
|
||||||
|
Requires ffmpeg installed on the system.
|
||||||
|
"""
|
||||||
|
temp_input = output_path.replace(".mp4", "_source.mp4")
|
||||||
|
|
||||||
try:
|
try:
|
||||||
response = httpx.get(video_url, timeout=60, follow_redirects=True)
|
logging.info(f"⬇️ Downloading video source with ffmpeg: {video_url}")
|
||||||
if response.status_code != 200:
|
|
||||||
logging.error(f"❌ Failed to download video: {video_url} (status {response.status_code})")
|
download_cmd = [
|
||||||
|
"ffmpeg",
|
||||||
|
"-y",
|
||||||
|
"-i", video_url,
|
||||||
|
"-c", "copy",
|
||||||
|
temp_input,
|
||||||
|
]
|
||||||
|
download_result = subprocess.run(
|
||||||
|
download_cmd,
|
||||||
|
capture_output=True,
|
||||||
|
text=True
|
||||||
|
)
|
||||||
|
|
||||||
|
if download_result.returncode != 0:
|
||||||
|
logging.error(f"❌ ffmpeg download failed:\n{download_result.stderr}")
|
||||||
return None
|
return None
|
||||||
|
|
||||||
with open(output_path, "wb") as f:
|
logging.info(f"✅ Video downloaded: {temp_input}")
|
||||||
f.write(response.content)
|
|
||||||
logging.info(f"✅ Video downloaded: {output_path}")
|
|
||||||
|
|
||||||
video_clip = VideoFileClip(output_path)
|
video_clip = VideoFileClip(temp_input)
|
||||||
end_time = min(59, float(video_clip.duration))
|
end_time = min(59, float(video_clip.duration))
|
||||||
|
|
||||||
if hasattr(video_clip, "subclipped"):
|
if hasattr(video_clip, "subclipped"):
|
||||||
@@ -398,22 +441,17 @@ def download_and_crop_video(video_url, output_path):
|
|||||||
cropped_clip.close()
|
cropped_clip.close()
|
||||||
|
|
||||||
os.replace(temp_output, output_path)
|
os.replace(temp_output, output_path)
|
||||||
|
|
||||||
|
if os.path.exists(temp_input):
|
||||||
|
os.remove(temp_input)
|
||||||
|
|
||||||
logging.info(f"✅ Video cropped to 59 seconds: {output_path}")
|
logging.info(f"✅ Video cropped to 59 seconds: {output_path}")
|
||||||
return output_path
|
return output_path
|
||||||
|
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
logging.error(f"❌ Error processing video: {e}")
|
logging.error(f"❌ Error processing video: {e}")
|
||||||
return None
|
if os.path.exists(temp_input):
|
||||||
|
os.remove(temp_input)
|
||||||
|
|
||||||
def build_video_embed(video_blob, alt_text):
|
|
||||||
try:
|
|
||||||
return models.AppBskyEmbedVideo.Main(
|
|
||||||
video=video_blob,
|
|
||||||
alt=alt_text
|
|
||||||
)
|
|
||||||
except AttributeError:
|
|
||||||
logging.error("❌ Your atproto version does not support AppBskyEmbedVideo. Upgrade atproto.")
|
|
||||||
return None
|
return None
|
||||||
|
|
||||||
|
|
||||||
@@ -504,15 +542,17 @@ def sync_feeds(args):
|
|||||||
try:
|
try:
|
||||||
real_video_url = extract_video_url_from_tweet_page(context, tweet.tweet_url)
|
real_video_url = extract_video_url_from_tweet_page(context, tweet.tweet_url)
|
||||||
if not real_video_url:
|
if not real_video_url:
|
||||||
logging.warning(f"⚠️ Could not resolve real video URL for {tweet.tweet_url}")
|
logging.warning(f"⚠️ Could not resolve playable video URL for {tweet.tweet_url}")
|
||||||
continue
|
continue
|
||||||
|
|
||||||
cropped_video_path = download_and_crop_video(real_video_url, temp_video_path)
|
cropped_video_path = download_and_crop_video(real_video_url, temp_video_path)
|
||||||
if not cropped_video_path:
|
if not cropped_video_path:
|
||||||
|
logging.warning(f"⚠️ Video download/crop failed for {tweet.tweet_url}")
|
||||||
continue
|
continue
|
||||||
|
|
||||||
video_blob = get_blob_from_file(cropped_video_path, bsky_client)
|
video_blob = get_blob_from_file(cropped_video_path, bsky_client)
|
||||||
if not video_blob:
|
if not video_blob:
|
||||||
|
logging.warning(f"⚠️ Video upload blob failed for {tweet.tweet_url}")
|
||||||
continue
|
continue
|
||||||
|
|
||||||
video_embed = build_video_embed(video_blob, dynamic_alt)
|
video_embed = build_video_embed(video_blob, dynamic_alt)
|
||||||
|
|||||||
Reference in New Issue
Block a user