160 lines
6.0 KiB
Python
160 lines
6.0 KiB
Python
import requests
|
|
from bs4 import BeautifulSoup
|
|
import json
|
|
import os
|
|
from selenium import webdriver
|
|
from selenium.webdriver.chrome.options import Options
|
|
|
|
# CONFIGURATION
|
|
NATIONER_URL = "https://www.nationsguiden.se/" # Example URL, replace with actual
|
|
DISCORD_WEBHOOK_URL = "https://discord.com/api/webhooks/1437888900747104317/TI2RfDGC5dzoi5JGz6UO2aD23teYNwa6pLQOskhaDnsSVe3cr8_rly0L3K0VyIYARgeR"
|
|
|
|
|
|
def fetch_nationer_open_times(url):
|
|
import re
|
|
print(f"DEBUG: Current working directory is {os.getcwd()}")
|
|
# Use headless browser to get rendered DOM and extract event data
|
|
chrome_options = Options()
|
|
chrome_options.add_argument('--headless')
|
|
chrome_options.add_argument('--disable-gpu')
|
|
chrome_options.add_argument('--no-sandbox')
|
|
driver = webdriver.Chrome(options=chrome_options)
|
|
driver.get(url)
|
|
import time
|
|
time.sleep(5) # Wait for JS to load events
|
|
|
|
nationer_data = []
|
|
# Find all event blocks (adjust selector as needed)
|
|
event_blocks = driver.find_elements("css selector", "div.flex.flex-col.justify-evenly")
|
|
print(f"DEBUG: Found {len(event_blocks)} event blocks in DOM.")
|
|
for block in event_blocks:
|
|
try:
|
|
event_title = block.find_element("css selector", "h4 a").text.strip()
|
|
except Exception:
|
|
event_title = ""
|
|
try:
|
|
organiser = block.find_element("css selector", "a.text-primary, p.text-primary").text.strip()
|
|
except Exception:
|
|
organiser = ""
|
|
try:
|
|
open_time = block.find_element("css selector", "time").text.strip()
|
|
except Exception:
|
|
open_time = ""
|
|
try:
|
|
permalink = block.find_element("css selector", "h4 a").get_attribute("href")
|
|
except Exception:
|
|
permalink = ""
|
|
if event_title:
|
|
print(f"DEBUG: Event: {event_title}, Organiser: {organiser}, Time: {open_time}")
|
|
nationer_data.append({
|
|
"event": event_title,
|
|
"nation": organiser,
|
|
"open_time": open_time,
|
|
"permalink": permalink
|
|
})
|
|
driver.quit()
|
|
# Write debug file for extracted events
|
|
debug_path = os.path.abspath("/mnt/serverdata/html/crawlernation/events_debug.json")
|
|
try:
|
|
with open(debug_path, "w", encoding="utf-8") as f:
|
|
json.dump(nationer_data, f, ensure_ascii=False, indent=2)
|
|
print(f"Wrote debug event data to {debug_path}")
|
|
except Exception as e:
|
|
print(f"Error writing debug file: {e}")
|
|
return nationer_data
|
|
|
|
|
|
def send_to_discord_webhook(data, webhook_url):
|
|
if not webhook_url:
|
|
raise ValueError("DISCORD_WEBHOOK_URL not set")
|
|
if not data:
|
|
payload = {"content": "Inga öppettider hittades."}
|
|
try:
|
|
response = requests.post(webhook_url, json=payload)
|
|
response.raise_for_status()
|
|
return response.status_code
|
|
except requests.exceptions.HTTPError as e:
|
|
print(f"Discord webhook error: {e}\nPayload: {payload}")
|
|
return None
|
|
# Group events
|
|
pub_keywords = ['pub', 'wermlandskälla', 'orvars krog']
|
|
def is_pub_event(e):
|
|
# Keyword match
|
|
if any(k in e['event'].lower() for k in pub_keywords):
|
|
return True
|
|
# Time match: look for start time after 18:00
|
|
import re
|
|
time_match = re.search(r'(\d{2}):(\d{2})', e.get('open_time', ''))
|
|
if time_match:
|
|
hour = int(time_match.group(1))
|
|
if hour >= 18:
|
|
return True
|
|
return False
|
|
pub_events = [e for e in data if is_pub_event(e)]
|
|
other_events = [e for e in data if not is_pub_event(e)]
|
|
# Format as plain grouped messages
|
|
def format_batch(events, title):
|
|
import datetime
|
|
today = datetime.datetime.now().strftime('%Y-%m-%d')
|
|
content = f"## {today}\n**{title}**\n"
|
|
for item in events:
|
|
content += (
|
|
f"> **{item['nation']}**\n"
|
|
f"> **Event:** {item['event']}\n"
|
|
f"> **Tid:** {item['open_time']}\n"
|
|
f"> ————————————————\n"
|
|
)
|
|
return content
|
|
|
|
def send_in_batches(content, webhook_url):
|
|
max_len = 2000
|
|
lines = content.split('\n')
|
|
batch = ''
|
|
for line in lines:
|
|
if len(batch) + len(line) + 1 > max_len:
|
|
payload = {"content": batch}
|
|
try:
|
|
response = requests.post(webhook_url, json=payload)
|
|
response.raise_for_status()
|
|
except requests.exceptions.HTTPError as e:
|
|
print(f"Discord webhook error: {e}\nPayload: {payload}")
|
|
batch = ''
|
|
batch += line + '\n'
|
|
if batch.strip():
|
|
payload = {"content": batch}
|
|
try:
|
|
response = requests.post(webhook_url, json=payload)
|
|
response.raise_for_status()
|
|
except requests.exceptions.HTTPError as e:
|
|
print(f"Discord webhook error: {e}\nPayload: {payload}")
|
|
# Do not send non-pub events to Discord
|
|
# Send pub events second, as normal message
|
|
if pub_events:
|
|
content = format_batch(pub_events, "Pub-aktiviteter")
|
|
send_in_batches(content, webhook_url)
|
|
|
|
|
|
def main():
|
|
nationer_data = fetch_nationer_open_times(NATIONER_URL)
|
|
send_to_discord_webhook(nationer_data, DISCORD_WEBHOOK_URL)
|
|
|
|
|
|
if __name__ == "__main__":
|
|
import time
|
|
import datetime
|
|
|
|
# Run once immediately
|
|
main()
|
|
print("First run complete. Scheduling next runs at 00:05 daily.")
|
|
while True:
|
|
now = datetime.datetime.now()
|
|
# Calculate next 00:05
|
|
next_run = now.replace(hour=0, minute=5, second=0, microsecond=0)
|
|
if now >= next_run:
|
|
next_run += datetime.timedelta(days=1)
|
|
sleep_seconds = (next_run - now).total_seconds()
|
|
print(f"Sleeping until next run at {next_run.strftime('%Y-%m-%d %H:%M:%S')} ({int(sleep_seconds)} seconds)")
|
|
time.sleep(sleep_seconds)
|
|
main()
|
|
|