Added new yml
This commit is contained in:
@@ -19,15 +19,15 @@ logging.basicConfig(
|
||||
|
||||
# --- Custom Classes to replace Tweety ---
|
||||
class ScrapedMedia:
|
||||
def __init__(self, url):
|
||||
self.type = "photo"
|
||||
def __init__(self, url, media_type="photo"):
|
||||
self.type = media_type # Type can be "photo" or "video"
|
||||
self.media_url_https = url
|
||||
|
||||
class ScrapedTweet:
|
||||
def __init__(self, created_on, text, media_urls):
|
||||
self.created_on = created_on
|
||||
self.text = text
|
||||
self.media = [ScrapedMedia(url) for url in media_urls]
|
||||
self.media = [ScrapedMedia(url, media_type) for url, media_type in media_urls]
|
||||
|
||||
# --- 1. Playwright Scraping Logic ---
|
||||
def take_error_screenshot(page, error_msg):
|
||||
@@ -145,21 +145,27 @@ def scrape_tweets_via_playwright(username, password, email, target_handle):
|
||||
continue # Skip ads or invalid articles
|
||||
created_at = time_el.get_attribute('datetime')
|
||||
|
||||
# Get Text (FIXED: Added .first to avoid strict mode violations on Quote RTs)
|
||||
# Get Text
|
||||
text_locator = article.locator('[data-testid="tweetText"]').first
|
||||
text = text_locator.inner_text() if text_locator.is_visible() else ""
|
||||
|
||||
# Get Images
|
||||
image_urls = []
|
||||
# Get Media URLs
|
||||
media_urls = []
|
||||
photo_locators = article.locator('[data-testid="tweetPhoto"] img').all()
|
||||
for img in photo_locators:
|
||||
src = img.get_attribute('src')
|
||||
if src:
|
||||
# Convert thumbnail URL to high-res URL
|
||||
src = re.sub(r'&name=\w+', '&name=large', src)
|
||||
image_urls.append(src)
|
||||
media_urls.append((src, "photo"))
|
||||
|
||||
tweets.append(ScrapedTweet(created_at, text, image_urls))
|
||||
# Get Video URLs
|
||||
video_locators = article.locator('[data-testid="videoPlayer"]').all()
|
||||
for video in video_locators:
|
||||
video_url = video.get_attribute('src')
|
||||
if video_url:
|
||||
media_urls.append((video_url, "video"))
|
||||
|
||||
tweets.append(ScrapedTweet(created_at, text, media_urls))
|
||||
|
||||
except Exception as e:
|
||||
logging.warning(f"⚠️ Failed to parse a specific tweet: {e}")
|
||||
@@ -319,7 +325,6 @@ def sync_feeds(args):
|
||||
tweet_time = arrow.get(tweet.created_on)
|
||||
|
||||
if tweet_time > last_bsky_time: # Only post new tweets
|
||||
#if True: # For testing, post all tweets regardless of time
|
||||
logging.info(f"📝 Found new tweet from {tweet_time}. Posting to Bluesky...")
|
||||
|
||||
raw_text = tweet.text.strip()
|
||||
@@ -359,6 +364,11 @@ def sync_feeds(args):
|
||||
if blob:
|
||||
# Inject our dynamic alt text here!
|
||||
images.append(models.AppBskyEmbedImages.Image(alt=dynamic_alt, image=blob))
|
||||
elif media.type == "video":
|
||||
# Handle video uploads if necessary (this part may vary based on your API capabilities)
|
||||
blob = get_blob_from_url(media.media_url_https, bsky_client)
|
||||
if blob:
|
||||
images.append(models.AppBskyEmbedImages.Image(alt=dynamic_alt, image=blob))
|
||||
|
||||
# 🌐 Posting with Catalan language tag
|
||||
try:
|
||||
|
||||
Reference in New Issue
Block a user