Fix YouTube hammering, sync rate limiting, and Following load time

Sync throttling:
- sync-all now skips channels crawled within the last 6 hours (prevents
  re-scraping 1266 channels on every button press)
- Channels are queued into a single _index_channels_batch task that runs
  with 1.5s delay between each yt-dlp call instead of firing 1266
  background tasks simultaneously
- Startup enrich task reduced from 10 to 3 videos (3 yt-dlp calls on
  each container restart)
- Enrich task adds 2s sleep between metadata fetches

SQLite stability:
- busy_timeout=5000 prevents SQLITE_BUSY errors under concurrent load
- synchronous=NORMAL speeds up writes without data loss risk (safe with WAL)

Following page:
- staleTime: 60s on channels query so cached data is reused immediately
  on revisit; gcTime keeps it in memory for 5 min

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
Mattias Tall
2026-05-26 16:00:37 +02:00
parent 0d6dd94029
commit 1cd8645957
4 changed files with 23 additions and 6 deletions

View File

@@ -15,6 +15,8 @@ def set_sqlite_pragma(dbapi_conn, _):
cursor = dbapi_conn.cursor() cursor = dbapi_conn.cursor()
cursor.execute("PRAGMA journal_mode=WAL") cursor.execute("PRAGMA journal_mode=WAL")
cursor.execute("PRAGMA foreign_keys=ON") cursor.execute("PRAGMA foreign_keys=ON")
cursor.execute("PRAGMA busy_timeout=5000")
cursor.execute("PRAGMA synchronous=NORMAL")
cursor.close() cursor.close()

View File

@@ -152,7 +152,7 @@ def on_startup():
# Backfill descriptions for videos that don't have them yet (runs in background) # Backfill descriptions for videos that don't have them yet (runs in background)
import threading import threading
from .routers.channels import _enrich_missing_task from .routers.channels import _enrich_missing_task
threading.Thread(target=_enrich_missing_task, args=(10,), daemon=True).start() threading.Thread(target=_enrich_missing_task, args=(3,), daemon=True).start()
@app.get("/api/health") @app.get("/api/health")

View File

@@ -105,6 +105,15 @@ def _get_channel_or_404(db: Session, channel_id: int) -> Channel:
return c return c
def _index_channels_batch(channel_ids: list[int], user_id: int, delay: float = 1.5):
"""Run channel syncs sequentially with a polite delay between requests."""
import time
for i, cid in enumerate(channel_ids):
if i > 0:
time.sleep(delay)
_index_channel_task(cid, user_id)
def _index_channel_task(channel_id: int, user_id: int): def _index_channel_task(channel_id: int, user_id: int):
from ..database import SessionLocal from ..database import SessionLocal
db = SessionLocal() db = SessionLocal()
@@ -224,7 +233,9 @@ def _enrich_missing_task(limit: int = 20):
"""), """),
{"limit": limit}, {"limit": limit},
).mappings().all() ).mappings().all()
for row in rows: for i, row in enumerate(rows):
if i > 0:
import time; time.sleep(2)
try: try:
meta = ytdlp.fetch_video_metadata(row["youtube_video_id"]) meta = ytdlp.fetch_video_metadata(row["youtube_video_id"])
if meta: if meta:
@@ -278,22 +289,24 @@ def sync_all_channels(
db: Session = Depends(get_db), db: Session = Depends(get_db),
current_user: User = Depends(get_current_user), current_user: User = Depends(get_current_user),
): ):
# Only sync channels not touched in the last 6 hours to avoid hammering YouTube
channels = db.execute( channels = db.execute(
text(""" text("""
SELECT c.id FROM channels c SELECT c.id FROM channels c
JOIN user_channels uc ON c.id = uc.channel_id JOIN user_channels uc ON c.id = uc.channel_id
WHERE uc.user_id = :uid AND uc.status = 'followed' WHERE uc.user_id = :uid AND uc.status = 'followed'
AND (c.crawled_at IS NULL OR c.crawled_at < datetime('now', '-6 hours'))
ORDER BY COALESCE(c.crawled_at, '1970-01-01') ASC
"""), """),
{"uid": current_user.id}, {"uid": current_user.id},
).mappings().all() ).mappings().all()
for row in channels:
background_tasks.add_task(_index_channel_task, row["id"], current_user.id)
if channels: if channels:
ids = [row["id"] for row in channels]
background_tasks.add_task(_index_channels_batch, ids, current_user.id)
background_tasks.add_task(_discovery_task, current_user.id) background_tasks.add_task(_discovery_task, current_user.id)
background_tasks.add_task(_enrich_missing_task, 20) background_tasks.add_task(_enrich_missing_task, 5)
return {"indexing": len(channels)} return {"indexing": len(channels)}

View File

@@ -579,6 +579,8 @@ export default function Following() {
const { data: channels = [], isLoading: loadingChannels } = useQuery({ const { data: channels = [], isLoading: loadingChannels } = useQuery({
queryKey: ["channels"], queryKey: ["channels"],
queryFn: () => getChannels().then((r) => r.data), queryFn: () => getChannels().then((r) => r.data),
staleTime: 60_000,
gcTime: 5 * 60_000,
}); });
const { data: appSettings } = useQuery({ const { data: appSettings } = useQuery({