import requests from bs4 import BeautifulSoup import json import os from selenium import webdriver from selenium.webdriver.chrome.options import Options # CONFIGURATION NATIONER_URL = "https://www.nationsguiden.se/" # Example URL, replace with actual DISCORD_WEBHOOK_URL = "https://discord.com/api/webhooks/1437888900747104317/TI2RfDGC5dzoi5JGz6UO2aD23teYNwa6pLQOskhaDnsSVe3cr8_rly0L3K0VyIYARgeR" def fetch_nationer_open_times(url): import re print(f"DEBUG: Current working directory is {os.getcwd()}") # Use headless browser to get rendered DOM and extract event data chrome_options = Options() chrome_options.add_argument('--headless') chrome_options.add_argument('--disable-gpu') chrome_options.add_argument('--no-sandbox') driver = webdriver.Chrome(options=chrome_options) driver.get(url) import time time.sleep(5) # Wait for JS to load events nationer_data = [] # Find all event blocks (adjust selector as needed) event_blocks = driver.find_elements("css selector", "div.flex.flex-col.justify-evenly") print(f"DEBUG: Found {len(event_blocks)} event blocks in DOM.") for block in event_blocks: try: event_title = block.find_element("css selector", "h4 a").text.strip() except Exception: event_title = "" try: organiser = block.find_element("css selector", "a.text-primary, p.text-primary").text.strip() except Exception: organiser = "" try: open_time = block.find_element("css selector", "time").text.strip() except Exception: open_time = "" try: permalink = block.find_element("css selector", "h4 a").get_attribute("href") except Exception: permalink = "" if event_title: print(f"DEBUG: Event: {event_title}, Organiser: {organiser}, Time: {open_time}") nationer_data.append({ "event": event_title, "nation": organiser, "open_time": open_time, "permalink": permalink }) driver.quit() # Write debug file for extracted events debug_path = os.path.abspath("/mnt/serverdata/html/crawlernation/events_debug.json") try: with open(debug_path, "w", encoding="utf-8") as f: json.dump(nationer_data, f, ensure_ascii=False, indent=2) print(f"Wrote debug event data to {debug_path}") except Exception as e: print(f"Error writing debug file: {e}") return nationer_data def send_to_discord_webhook(data, webhook_url): if not webhook_url: raise ValueError("DISCORD_WEBHOOK_URL not set") if not data: payload = {"content": "Inga öppettider hittades."} try: response = requests.post(webhook_url, json=payload) response.raise_for_status() return response.status_code except requests.exceptions.HTTPError as e: print(f"Discord webhook error: {e}\nPayload: {payload}") return None # Group events pub_keywords = ['pub', 'wermlandskälla', 'orvars krog'] def is_pub_event(e): # Keyword match if any(k in e['event'].lower() for k in pub_keywords): return True # Time match: look for start time after 18:00 import re time_match = re.search(r'(\d{2}):(\d{2})', e.get('open_time', '')) if time_match: hour = int(time_match.group(1)) if hour >= 18: return True return False pub_events = [e for e in data if is_pub_event(e)] other_events = [e for e in data if not is_pub_event(e)] # Format as plain grouped messages def format_batch(events, title): import datetime today = datetime.datetime.now().strftime('%Y-%m-%d') content = f"## {today}\n**{title}**\n" for item in events: content += ( f"> **{item['nation']}**\n" f"> **Event:** {item['event']}\n" f"> **Tid:** {item['open_time']}\n" f"> ————————————————\n" ) return content def send_in_batches(content, webhook_url): max_len = 2000 lines = content.split('\n') batch = '' for line in lines: if len(batch) + len(line) + 1 > max_len: payload = {"content": batch} try: response = requests.post(webhook_url, json=payload) response.raise_for_status() except requests.exceptions.HTTPError as e: print(f"Discord webhook error: {e}\nPayload: {payload}") batch = '' batch += line + '\n' if batch.strip(): payload = {"content": batch} try: response = requests.post(webhook_url, json=payload) response.raise_for_status() except requests.exceptions.HTTPError as e: print(f"Discord webhook error: {e}\nPayload: {payload}") # Do not send non-pub events to Discord # Send pub events second, as normal message if pub_events: content = format_batch(pub_events, "Pub-aktiviteter") send_in_batches(content, webhook_url) def main(): nationer_data = fetch_nationer_open_times(NATIONER_URL) send_to_discord_webhook(nationer_data, DISCORD_WEBHOOK_URL) if __name__ == "__main__": import time import datetime # Run once immediately main() print("First run complete. Scheduling next runs at 00:05 daily.") while True: now = datetime.datetime.now() # Calculate next 00:05 next_run = now.replace(hour=0, minute=5, second=0, microsecond=0) if now >= next_run: next_run += datetime.timedelta(days=1) sleep_seconds = (next_run - now).total_seconds() print(f"Sleeping until next run at {next_run.strftime('%Y-%m-%d %H:%M:%S')} ({int(sleep_seconds)} seconds)") time.sleep(sleep_seconds) main()