Each yt-dlp call is now an independent task (one search query, one trending fetch, one graph channel fetch). Tasks are shuffled together so we don't fire 10 searches in a row, then enqueued with 30-90s random gaps between them — a full sweep of ~17 tasks completes in roughly 10-25 minutes instead of hammering YouTube with 21 calls back-to-back. Fast signals (community, category clusters) still run synchronously at schedule time since they're pure SQL. Progress is tracked per-user (total/done/running) and exposed on GET /api/discovery/status. The Discovery page polls every 10s while running and shows a progress bar + "Finding channels… X / Y" in the header. The auto-discovery daemon skips scheduling if a manual sweep is already running. Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
268 lines
13 KiB
Python
268 lines
13 KiB
Python
import os
|
|
from fastapi import FastAPI
|
|
from fastapi.middleware.cors import CORSMiddleware
|
|
from fastapi.staticfiles import StaticFiles
|
|
|
|
from .config import settings
|
|
from .database import init_db, SessionLocal
|
|
from .services import ytdlp as ytdlp_service
|
|
from .routers import auth, channels, videos, search, downloads, discovery, settings as settings_router, stats as stats_router, export as export_router, collections as collections_router, admin as admin_router, playlists as playlists_router
|
|
|
|
app = FastAPI(title="YouTube Hub", version="0.1.0")
|
|
|
|
app.add_middleware(
|
|
CORSMiddleware,
|
|
allow_origins=["*"],
|
|
allow_credentials=True,
|
|
allow_methods=["*"],
|
|
allow_headers=["*"],
|
|
)
|
|
|
|
app.include_router(auth.router, prefix="/api/auth", tags=["auth"])
|
|
app.include_router(channels.router, prefix="/api/channels", tags=["channels"])
|
|
app.include_router(videos.router, prefix="/api/videos", tags=["videos"])
|
|
app.include_router(search.router, prefix="/api/search", tags=["search"])
|
|
app.include_router(downloads.router, prefix="/api/downloads", tags=["downloads"])
|
|
app.include_router(discovery.router, prefix="/api/discovery", tags=["discovery"])
|
|
app.include_router(settings_router.router, prefix="/api/settings", tags=["settings"])
|
|
app.include_router(stats_router.router, prefix="/api/stats", tags=["stats"])
|
|
app.include_router(export_router.router, prefix="/api/export", tags=["export"])
|
|
app.include_router(collections_router.router, prefix="/api/collections", tags=["collections"])
|
|
app.include_router(admin_router.router, prefix="/api/admin", tags=["admin"])
|
|
app.include_router(playlists_router.router, prefix="/api/playlists", tags=["playlists"])
|
|
|
|
|
|
os.makedirs(settings.download_path, exist_ok=True)
|
|
app.mount("/files", StaticFiles(directory=settings.download_path), name="files")
|
|
|
|
|
|
@app.on_event("startup")
|
|
def on_startup():
|
|
from sqlalchemy import text
|
|
init_db()
|
|
db = SessionLocal()
|
|
for col_sql in [
|
|
"ALTER TABLE user_videos ADD COLUMN liked BOOLEAN DEFAULT FALSE",
|
|
"ALTER TABLE user_videos ADD COLUMN liked_at DATETIME",
|
|
"ALTER TABLE downloads ADD COLUMN resolution TEXT",
|
|
"ALTER TABLE user_channels ADD COLUMN auto_download BOOLEAN DEFAULT NULL",
|
|
"ALTER TABLE user_channels ADD COLUMN last_seen_at DATETIME",
|
|
"ALTER TABLE discovery_queue ADD COLUMN preview_json TEXT",
|
|
"ALTER TABLE channels ADD COLUMN subscriber_count INTEGER",
|
|
"ALTER TABLE user_settings ADD COLUMN cookies_browser TEXT DEFAULT ''",
|
|
"ALTER TABLE user_settings ADD COLUMN theater_mode INTEGER DEFAULT 0",
|
|
"ALTER TABLE user_channels ADD COLUMN muted_until DATETIME DEFAULT NULL",
|
|
"ALTER TABLE user_settings ADD COLUMN calm_mode INTEGER DEFAULT 0",
|
|
"ALTER TABLE user_settings ADD COLUMN hide_subscriber_counts INTEGER DEFAULT 0",
|
|
"ALTER TABLE user_settings ADD COLUMN autoplay_enabled INTEGER DEFAULT 0",
|
|
"ALTER TABLE user_videos ADD COLUMN rating INTEGER DEFAULT NULL",
|
|
"ALTER TABLE downloads ADD COLUMN pending_delete_at DATETIME DEFAULT NULL",
|
|
"ALTER TABLE user_channels ADD COLUMN notes TEXT DEFAULT ''",
|
|
"ALTER TABLE videos ADD COLUMN chapters TEXT DEFAULT NULL",
|
|
"ALTER TABLE video_bookmarks ADD COLUMN source TEXT DEFAULT 'manual'",
|
|
"ALTER TABLE user_videos ADD COLUMN completion_percent REAL DEFAULT NULL",
|
|
"ALTER TABLE user_videos ADD COLUMN rewatch_count INTEGER DEFAULT 0",
|
|
"ALTER TABLE users ADD COLUMN is_admin INTEGER DEFAULT 0",
|
|
"""CREATE TABLE IF NOT EXISTS system_config (
|
|
key TEXT PRIMARY KEY,
|
|
value TEXT NOT NULL
|
|
)""",
|
|
"ALTER TABLE user_settings ADD COLUMN cookies_file TEXT DEFAULT ''",
|
|
"ALTER TABLE user_settings ADD COLUMN feed_weight_recency REAL DEFAULT 5.0",
|
|
"ALTER TABLE user_settings ADD COLUMN feed_weight_affinity REAL DEFAULT 5.0",
|
|
"ALTER TABLE user_settings ADD COLUMN feed_weight_channel REAL DEFAULT 5.0",
|
|
"ALTER TABLE user_settings ADD COLUMN use_oauth2 INTEGER DEFAULT 0",
|
|
"ALTER TABLE user_settings ADD COLUMN sync_interval_hours INTEGER DEFAULT 0",
|
|
"ALTER TABLE user_settings ADD COLUMN subtitle_langs TEXT DEFAULT ''",
|
|
"""CREATE TABLE IF NOT EXISTS playlists (
|
|
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
|
youtube_playlist_id TEXT NOT NULL UNIQUE,
|
|
channel_id INTEGER REFERENCES channels(id) ON DELETE CASCADE,
|
|
title TEXT NOT NULL,
|
|
description TEXT,
|
|
thumbnail_url TEXT,
|
|
video_count INTEGER DEFAULT 0,
|
|
video_ids TEXT,
|
|
indexed_at DATETIME,
|
|
crawled_at DATETIME DEFAULT CURRENT_TIMESTAMP
|
|
)""",
|
|
"ALTER TABLE playlists ADD COLUMN video_ids TEXT",
|
|
"""CREATE TABLE IF NOT EXISTS channel_popular_videos (
|
|
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
|
channel_id INTEGER NOT NULL REFERENCES channels(id) ON DELETE CASCADE,
|
|
video_id INTEGER NOT NULL REFERENCES videos(id) ON DELETE CASCADE,
|
|
rank INTEGER NOT NULL,
|
|
fetched_at DATETIME DEFAULT CURRENT_TIMESTAMP,
|
|
UNIQUE(channel_id, video_id)
|
|
)""",
|
|
"""CREATE TABLE IF NOT EXISTS search_history (
|
|
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
|
user_id INTEGER NOT NULL REFERENCES users(id) ON DELETE CASCADE,
|
|
query TEXT NOT NULL,
|
|
searched_at DATETIME DEFAULT CURRENT_TIMESTAMP
|
|
)""",
|
|
"""CREATE TABLE IF NOT EXISTS user_tag_affinity (
|
|
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
|
user_id INTEGER NOT NULL REFERENCES users(id) ON DELETE CASCADE,
|
|
tag TEXT NOT NULL,
|
|
score REAL DEFAULT 0.0,
|
|
updated_at DATETIME DEFAULT CURRENT_TIMESTAMP,
|
|
UNIQUE(user_id, tag)
|
|
)""",
|
|
"""CREATE TABLE IF NOT EXISTS collections (
|
|
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
|
user_id INTEGER NOT NULL REFERENCES users(id) ON DELETE CASCADE,
|
|
name TEXT NOT NULL,
|
|
created_at DATETIME DEFAULT CURRENT_TIMESTAMP
|
|
)""",
|
|
"""CREATE TABLE IF NOT EXISTS collection_items (
|
|
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
|
collection_id INTEGER NOT NULL REFERENCES collections(id) ON DELETE CASCADE,
|
|
video_id INTEGER NOT NULL REFERENCES videos(id) ON DELETE CASCADE,
|
|
added_at DATETIME DEFAULT CURRENT_TIMESTAMP,
|
|
UNIQUE(collection_id, video_id)
|
|
)""",
|
|
"""CREATE TABLE IF NOT EXISTS video_bookmarks (
|
|
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
|
user_id INTEGER NOT NULL REFERENCES users(id) ON DELETE CASCADE,
|
|
video_id INTEGER NOT NULL REFERENCES videos(id) ON DELETE CASCADE,
|
|
timestamp_seconds INTEGER NOT NULL,
|
|
note TEXT DEFAULT '',
|
|
created_at DATETIME DEFAULT CURRENT_TIMESTAMP
|
|
)""",
|
|
"ALTER TABLE user_videos ADD COLUMN feed_shown_count INTEGER NOT NULL DEFAULT 0",
|
|
"ALTER TABLE user_settings ADD COLUMN last_discovery_run DATETIME DEFAULT NULL",
|
|
]:
|
|
try:
|
|
db.execute(text(col_sql))
|
|
db.commit()
|
|
except Exception:
|
|
db.rollback()
|
|
try:
|
|
# Rebuild FTS indexes so all existing rows are searchable
|
|
db.execute(text("INSERT INTO videos_fts(videos_fts) VALUES('rebuild')"))
|
|
db.execute(text("INSERT INTO channels_fts(channels_fts) VALUES('rebuild')"))
|
|
# Migrate signed/expiring YouTube thumbnail URLs to stable format
|
|
db.execute(text("""
|
|
UPDATE videos
|
|
SET thumbnail_url = 'https://i.ytimg.com/vi/' || youtube_video_id || '/hqdefault.jpg'
|
|
WHERE thumbnail_url IS NULL
|
|
OR thumbnail_url NOT LIKE 'https://i.ytimg.com/vi/%/hqdefault.jpg'
|
|
"""))
|
|
db.commit()
|
|
|
|
# On a fresh install with no admin yet, promote the first registered user
|
|
from .models import User as UserModel, SystemConfig
|
|
has_admin = db.query(UserModel).filter_by(is_admin=True).first()
|
|
if not has_admin:
|
|
first_user = db.query(UserModel).order_by(UserModel.id).first()
|
|
if first_user:
|
|
first_user.is_admin = True
|
|
db.commit()
|
|
# Seed system_config from env if not already set
|
|
if not db.query(SystemConfig).filter_by(key="allow_registration").first():
|
|
db.add(SystemConfig(key="allow_registration", value="true"))
|
|
db.commit()
|
|
|
|
# Apply user's saved concurrent download limit on startup
|
|
from .models import UserSettings
|
|
first_user_settings = db.query(UserSettings).first()
|
|
if first_user_settings:
|
|
ytdlp_service.set_max_concurrent(first_user_settings.max_concurrent_downloads)
|
|
ytdlp_service.set_cookies_browser(first_user_settings.cookies_browser or "")
|
|
ytdlp_service.set_cookies_file(first_user_settings.cookies_file or "")
|
|
ytdlp_service.set_oauth2(bool(getattr(first_user_settings, "use_oauth2", False)))
|
|
finally:
|
|
db.close()
|
|
|
|
# Start discovery worker and backfill enrichment
|
|
import threading
|
|
from .routers.channels import _enrich_missing_task, _index_channels_batch
|
|
from .services.discovery import start_discovery_worker
|
|
start_discovery_worker()
|
|
threading.Thread(target=_enrich_missing_task, args=(50,), daemon=True).start()
|
|
|
|
def _auto_sync_daemon():
|
|
import time
|
|
from datetime import datetime, timedelta
|
|
from sqlalchemy import text as _text
|
|
while True:
|
|
time.sleep(3600)
|
|
try:
|
|
db = SessionLocal()
|
|
try:
|
|
users_due = db.execute(
|
|
_text("SELECT user_id, sync_interval_hours FROM user_settings WHERE sync_interval_hours > 0")
|
|
).mappings().all()
|
|
for row in users_due:
|
|
uid = row["user_id"]
|
|
cutoff = datetime.utcnow() - timedelta(hours=row["sync_interval_hours"])
|
|
ch_ids = [
|
|
r["id"] for r in db.execute(
|
|
_text("""
|
|
SELECT c.id FROM channels c
|
|
JOIN user_channels uc ON c.id = uc.channel_id
|
|
WHERE uc.user_id = :uid AND uc.status = 'followed'
|
|
AND (c.crawled_at IS NULL OR c.crawled_at < :cutoff)
|
|
ORDER BY COALESCE(c.crawled_at, '1970-01-01') ASC
|
|
"""),
|
|
{"uid": uid, "cutoff": cutoff},
|
|
).mappings().all()
|
|
]
|
|
if ch_ids:
|
|
threading.Thread(
|
|
target=_index_channels_batch, args=(ch_ids, uid), daemon=True
|
|
).start()
|
|
finally:
|
|
db.close()
|
|
except Exception:
|
|
pass
|
|
|
|
threading.Thread(target=_auto_sync_daemon, daemon=True).start()
|
|
|
|
def _auto_discovery_daemon():
|
|
import time as _time
|
|
from datetime import datetime as _dt, timedelta as _td
|
|
from sqlalchemy import text as _text
|
|
from .services.discovery import schedule_discovery, get_discovery_progress
|
|
|
|
# Wait 5 minutes after startup before the first check so the app can
|
|
# finish initialising and existing enrichment tasks can settle.
|
|
_time.sleep(300)
|
|
|
|
while True:
|
|
try:
|
|
db = SessionLocal()
|
|
try:
|
|
rows = db.execute(_text("""
|
|
SELECT u.id AS user_id,
|
|
COALESCE(us.discovery_regions, 'US,SE') AS discovery_regions,
|
|
us.last_discovery_run
|
|
FROM users u
|
|
LEFT JOIN user_settings us ON u.id = us.user_id
|
|
""")).mappings().all()
|
|
|
|
for row in rows:
|
|
last = row["last_discovery_run"]
|
|
if last is None or (_dt.utcnow() - _dt.fromisoformat(str(last))) > _td(hours=23):
|
|
uid = row["user_id"]
|
|
# Skip if a manual sweep is already running
|
|
prog = get_discovery_progress(uid)
|
|
if prog and prog.get("running"):
|
|
continue
|
|
regions = [r.strip().upper() for r in (row["discovery_regions"] or "US,SE").split(",") if r.strip()]
|
|
schedule_discovery(uid, regions)
|
|
finally:
|
|
db.close()
|
|
except Exception:
|
|
pass
|
|
|
|
_time.sleep(3600) # check every hour, run if >23 h since last run
|
|
|
|
threading.Thread(target=_auto_discovery_daemon, daemon=True).start()
|
|
|
|
|
|
@app.get("/api/health")
|
|
def health():
|
|
return {"status": "ok"}
|