Added new yml
This commit is contained in:
@@ -334,34 +334,58 @@ def scrape_tweets_via_playwright(username, password, email, target_handle):
|
|||||||
def extract_video_url_from_tweet_page(context, tweet_url):
|
def extract_video_url_from_tweet_page(context, tweet_url):
|
||||||
"""
|
"""
|
||||||
Opens a tweet page and captures the best real video URL.
|
Opens a tweet page and captures the best real video URL.
|
||||||
|
|
||||||
Preference order:
|
Preference order:
|
||||||
1. .mp4
|
1. real video .mp4
|
||||||
2. .m3u8
|
2. .m3u8 playlist
|
||||||
Ignores .m4s fragment files.
|
|
||||||
|
Ignores:
|
||||||
|
- .m4s fragments
|
||||||
|
- audio-only mp4 URLs
|
||||||
"""
|
"""
|
||||||
page = context.new_page()
|
page = context.new_page()
|
||||||
best_mp4_url = None
|
best_video_mp4_url = None
|
||||||
best_m3u8_url = None
|
best_m3u8_url = None
|
||||||
|
|
||||||
|
def is_audio_only_mp4(url, content_type):
|
||||||
|
url_l = url.lower()
|
||||||
|
content_type_l = content_type.lower()
|
||||||
|
return (
|
||||||
|
"/aud/" in url_l or
|
||||||
|
"/audio/" in url_l or
|
||||||
|
"mp4a" in url_l or
|
||||||
|
("audio/" in content_type_l and "video/" not in content_type_l)
|
||||||
|
)
|
||||||
|
|
||||||
def handle_response(response):
|
def handle_response(response):
|
||||||
nonlocal best_mp4_url, best_m3u8_url
|
nonlocal best_video_mp4_url, best_m3u8_url
|
||||||
try:
|
try:
|
||||||
url = response.url.lower()
|
url = response.url
|
||||||
content_type = response.headers.get("content-type", "").lower()
|
url_l = url.lower()
|
||||||
|
content_type = response.headers.get("content-type", "")
|
||||||
|
content_type_l = content_type.lower()
|
||||||
|
|
||||||
if ".m4s" in url:
|
if ".m4s" in url_l:
|
||||||
return
|
return
|
||||||
|
|
||||||
if ".mp4" in url or "video/mp4" in content_type:
|
if (".mp4" in url_l or "video/mp4" in content_type_l or "audio/mp4" in content_type_l) and is_audio_only_mp4(url, content_type):
|
||||||
if best_mp4_url is None:
|
logging.info(f"🔇 Ignoring audio-only MP4: {url}")
|
||||||
best_mp4_url = response.url
|
|
||||||
logging.info(f"🎥 Found MP4 video URL: {response.url}")
|
|
||||||
return
|
return
|
||||||
|
|
||||||
if ".m3u8" in url or "application/vnd.apple.mpegurl" in content_type or "application/x-mpegurl" in content_type:
|
if ".mp4" in url_l or "video/mp4" in content_type_l:
|
||||||
|
if best_video_mp4_url is None:
|
||||||
|
best_video_mp4_url = url
|
||||||
|
logging.info(f"🎥 Found VIDEO MP4 URL: {url}")
|
||||||
|
return
|
||||||
|
|
||||||
|
if (
|
||||||
|
".m3u8" in url_l or
|
||||||
|
"application/vnd.apple.mpegurl" in content_type_l or
|
||||||
|
"application/x-mpegurl" in content_type_l
|
||||||
|
):
|
||||||
if best_m3u8_url is None:
|
if best_m3u8_url is None:
|
||||||
best_m3u8_url = response.url
|
best_m3u8_url = url
|
||||||
logging.info(f"📺 Found HLS playlist URL: {response.url}")
|
logging.info(f"📺 Found HLS playlist URL: {url}")
|
||||||
return
|
return
|
||||||
|
|
||||||
except Exception:
|
except Exception:
|
||||||
@@ -382,7 +406,7 @@ def extract_video_url_from_tweet_page(context, tweet_url):
|
|||||||
except Exception:
|
except Exception:
|
||||||
pass
|
pass
|
||||||
|
|
||||||
return best_mp4_url or best_m3u8_url
|
return best_video_mp4_url or best_m3u8_url
|
||||||
|
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
logging.warning(f"⚠️ Could not extract video URL from tweet page {tweet_url}: {e}")
|
logging.warning(f"⚠️ Could not extract video URL from tweet page {tweet_url}: {e}")
|
||||||
@@ -398,6 +422,7 @@ def download_and_crop_video(video_url, output_path):
|
|||||||
Requires ffmpeg installed on the system.
|
Requires ffmpeg installed on the system.
|
||||||
"""
|
"""
|
||||||
temp_input = output_path.replace(".mp4", "_source.mp4")
|
temp_input = output_path.replace(".mp4", "_source.mp4")
|
||||||
|
temp_output = output_path.replace(".mp4", "_cropped.mp4")
|
||||||
|
|
||||||
try:
|
try:
|
||||||
logging.info(f"⬇️ Downloading video source with ffmpeg: {video_url}")
|
logging.info(f"⬇️ Downloading video source with ffmpeg: {video_url}")
|
||||||
@@ -419,17 +444,27 @@ def download_and_crop_video(video_url, output_path):
|
|||||||
logging.error(f"❌ ffmpeg download failed:\n{download_result.stderr}")
|
logging.error(f"❌ ffmpeg download failed:\n{download_result.stderr}")
|
||||||
return None
|
return None
|
||||||
|
|
||||||
|
if not os.path.exists(temp_input) or os.path.getsize(temp_input) == 0:
|
||||||
|
logging.error("❌ Downloaded video source file is missing or empty.")
|
||||||
|
return None
|
||||||
|
|
||||||
logging.info(f"✅ Video downloaded: {temp_input}")
|
logging.info(f"✅ Video downloaded: {temp_input}")
|
||||||
|
|
||||||
video_clip = VideoFileClip(temp_input)
|
video_clip = VideoFileClip(temp_input)
|
||||||
end_time = min(59, float(video_clip.duration))
|
duration = float(video_clip.duration) if video_clip.duration else 0
|
||||||
|
|
||||||
|
if duration <= 0:
|
||||||
|
video_clip.close()
|
||||||
|
logging.error("❌ Downloaded video has invalid or unknown duration.")
|
||||||
|
return None
|
||||||
|
|
||||||
|
end_time = min(59, duration)
|
||||||
|
|
||||||
if hasattr(video_clip, "subclipped"):
|
if hasattr(video_clip, "subclipped"):
|
||||||
cropped_clip = video_clip.subclipped(0, end_time)
|
cropped_clip = video_clip.subclipped(0, end_time)
|
||||||
else:
|
else:
|
||||||
cropped_clip = video_clip.subclip(0, end_time)
|
cropped_clip = video_clip.subclip(0, end_time)
|
||||||
|
|
||||||
temp_output = output_path.replace(".mp4", "_cropped.mp4")
|
|
||||||
cropped_clip.write_videofile(
|
cropped_clip.write_videofile(
|
||||||
temp_output,
|
temp_output,
|
||||||
codec="libx264",
|
codec="libx264",
|
||||||
@@ -440,6 +475,10 @@ def download_and_crop_video(video_url, output_path):
|
|||||||
video_clip.close()
|
video_clip.close()
|
||||||
cropped_clip.close()
|
cropped_clip.close()
|
||||||
|
|
||||||
|
if not os.path.exists(temp_output) or os.path.getsize(temp_output) == 0:
|
||||||
|
logging.error("❌ Cropped video output is missing or empty.")
|
||||||
|
return None
|
||||||
|
|
||||||
os.replace(temp_output, output_path)
|
os.replace(temp_output, output_path)
|
||||||
|
|
||||||
if os.path.exists(temp_input):
|
if os.path.exists(temp_input):
|
||||||
@@ -450,10 +489,16 @@ def download_and_crop_video(video_url, output_path):
|
|||||||
|
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
logging.error(f"❌ Error processing video: {e}")
|
logging.error(f"❌ Error processing video: {e}")
|
||||||
if os.path.exists(temp_input):
|
|
||||||
os.remove(temp_input)
|
|
||||||
return None
|
return None
|
||||||
|
|
||||||
|
finally:
|
||||||
|
for path in [temp_input, temp_output]:
|
||||||
|
if os.path.exists(path):
|
||||||
|
try:
|
||||||
|
os.remove(path)
|
||||||
|
except Exception:
|
||||||
|
pass
|
||||||
|
|
||||||
|
|
||||||
# --- Main Sync Function ---
|
# --- Main Sync Function ---
|
||||||
def sync_feeds(args):
|
def sync_feeds(args):
|
||||||
@@ -498,7 +543,7 @@ def sync_feeds(args):
|
|||||||
for tweet in reversed(tweets):
|
for tweet in reversed(tweets):
|
||||||
tweet_time = arrow.get(tweet.created_on)
|
tweet_time = arrow.get(tweet.created_on)
|
||||||
|
|
||||||
#if tweet_time <= last_bsky_time
|
#if tweet_time <= last_bsky_time:
|
||||||
if False:
|
if False:
|
||||||
continue
|
continue
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user