Added new yml
This commit is contained in:
@@ -19,15 +19,15 @@ logging.basicConfig(
|
||||
|
||||
# --- Custom Classes to replace Tweety ---
|
||||
class ScrapedMedia:
|
||||
def __init__(self, url):
|
||||
self.type = "photo"
|
||||
def __init__(self, url, media_type="photo"):
|
||||
self.type = media_type # Type can be "photo" or "video"
|
||||
self.media_url_https = url
|
||||
|
||||
class ScrapedTweet:
|
||||
def __init__(self, created_on, text, media_urls):
|
||||
self.created_on = created_on
|
||||
self.text = text
|
||||
self.media = [ScrapedMedia(url) for url in media_urls]
|
||||
self.media = [ScrapedMedia(url, media_type) for url, media_type in media_urls]
|
||||
|
||||
# --- 1. Playwright Scraping Logic ---
|
||||
def take_error_screenshot(page, error_msg):
|
||||
@@ -132,34 +132,40 @@ def scrape_tweets_via_playwright(username, password, email, target_handle):
|
||||
|
||||
try:
|
||||
page.wait_for_selector('article', timeout=20000)
|
||||
time.sleep(3) # Let DOM settle and images load
|
||||
time.sleep(3) # Let DOM settle and images load
|
||||
|
||||
articles = page.locator('article').all()
|
||||
logging.info(f"📊 Found {len(articles)} tweets on screen. Parsing...")
|
||||
|
||||
for article in articles[:10]: # Check top 10 tweets
|
||||
for article in articles[:10]: # Check top 10 tweets
|
||||
try:
|
||||
# Get Time
|
||||
time_el = article.locator('time').first
|
||||
if not time_el.is_visible():
|
||||
continue # Skip ads or invalid articles
|
||||
continue # Skip ads or invalid articles
|
||||
created_at = time_el.get_attribute('datetime')
|
||||
|
||||
# Get Text (FIXED: Added .first to avoid strict mode violations on Quote RTs)
|
||||
# Get Text
|
||||
text_locator = article.locator('[data-testid="tweetText"]').first
|
||||
text = text_locator.inner_text() if text_locator.is_visible() else ""
|
||||
|
||||
# Get Images
|
||||
image_urls = []
|
||||
# Get Media URLs
|
||||
media_urls = []
|
||||
photo_locators = article.locator('[data-testid="tweetPhoto"] img').all()
|
||||
for img in photo_locators:
|
||||
src = img.get_attribute('src')
|
||||
if src:
|
||||
# Convert thumbnail URL to high-res URL
|
||||
src = re.sub(r'&name=\w+', '&name=large', src)
|
||||
image_urls.append(src)
|
||||
|
||||
tweets.append(ScrapedTweet(created_at, text, image_urls))
|
||||
media_urls.append((src, "photo"))
|
||||
|
||||
# Get Video URLs
|
||||
video_locators = article.locator('[data-testid="videoPlayer"]').all()
|
||||
for video in video_locators:
|
||||
video_url = video.get_attribute('src')
|
||||
if video_url:
|
||||
media_urls.append((video_url, "video"))
|
||||
|
||||
tweets.append(ScrapedTweet(created_at, text, media_urls))
|
||||
|
||||
except Exception as e:
|
||||
logging.warning(f"⚠️ Failed to parse a specific tweet: {e}")
|
||||
@@ -226,7 +232,7 @@ def make_rich(content):
|
||||
parts = raw.split('\n')
|
||||
test_part0 = re.sub(r'[…\.]+$', '', parts[0])
|
||||
if is_valid_url(test_part0):
|
||||
return raw # Return original to preserve the paragraph break
|
||||
return raw # Return original to preserve the paragraph break
|
||||
|
||||
# Fallback: assume it's a broken URL and glue it anyway
|
||||
return test_url
|
||||
@@ -247,7 +253,7 @@ def make_rich(content):
|
||||
# Split by space to process words, URLs, and tags
|
||||
words = line.split(" ")
|
||||
for i, word in enumerate(words):
|
||||
if not word: # Handle double spaces gracefully
|
||||
if not word: # Handle double spaces gracefully
|
||||
if i < len(words) - 1:
|
||||
text_builder.text(" ")
|
||||
continue
|
||||
@@ -319,7 +325,6 @@ def sync_feeds(args):
|
||||
tweet_time = arrow.get(tweet.created_on)
|
||||
|
||||
if tweet_time > last_bsky_time: # Only post new tweets
|
||||
#if True: # For testing, post all tweets regardless of time
|
||||
logging.info(f"📝 Found new tweet from {tweet_time}. Posting to Bluesky...")
|
||||
|
||||
raw_text = tweet.text.strip()
|
||||
@@ -359,7 +364,12 @@ def sync_feeds(args):
|
||||
if blob:
|
||||
# Inject our dynamic alt text here!
|
||||
images.append(models.AppBskyEmbedImages.Image(alt=dynamic_alt, image=blob))
|
||||
|
||||
elif media.type == "video":
|
||||
# Handle video uploads if necessary (this part may vary based on your API capabilities)
|
||||
blob = get_blob_from_url(media.media_url_https, bsky_client)
|
||||
if blob:
|
||||
images.append(models.AppBskyEmbedImages.Image(alt=dynamic_alt, image=blob))
|
||||
|
||||
# 🌐 Posting with Catalan language tag
|
||||
try:
|
||||
if images:
|
||||
|
||||
Reference in New Issue
Block a user