Fix YouTube hammering, sync rate limiting, and Following load time

Sync throttling:
- sync-all now skips channels crawled within the last 6 hours (prevents
  re-scraping 1266 channels on every button press)
- Channels are queued into a single _index_channels_batch task that runs
  with 1.5s delay between each yt-dlp call instead of firing 1266
  background tasks simultaneously
- Startup enrich task reduced from 10 to 3 videos (3 yt-dlp calls on
  each container restart)
- Enrich task adds 2s sleep between metadata fetches

SQLite stability:
- busy_timeout=5000 prevents SQLITE_BUSY errors under concurrent load
- synchronous=NORMAL speeds up writes without data loss risk (safe with WAL)

Following page:
- staleTime: 60s on channels query so cached data is reused immediately
  on revisit; gcTime keeps it in memory for 5 min

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
Mattias Tall
2026-05-26 16:00:37 +02:00
parent 0d6dd94029
commit 1cd8645957
4 changed files with 23 additions and 6 deletions

View File

@@ -105,6 +105,15 @@ def _get_channel_or_404(db: Session, channel_id: int) -> Channel:
return c
def _index_channels_batch(channel_ids: list[int], user_id: int, delay: float = 1.5):
"""Run channel syncs sequentially with a polite delay between requests."""
import time
for i, cid in enumerate(channel_ids):
if i > 0:
time.sleep(delay)
_index_channel_task(cid, user_id)
def _index_channel_task(channel_id: int, user_id: int):
from ..database import SessionLocal
db = SessionLocal()
@@ -224,7 +233,9 @@ def _enrich_missing_task(limit: int = 20):
"""),
{"limit": limit},
).mappings().all()
for row in rows:
for i, row in enumerate(rows):
if i > 0:
import time; time.sleep(2)
try:
meta = ytdlp.fetch_video_metadata(row["youtube_video_id"])
if meta:
@@ -278,22 +289,24 @@ def sync_all_channels(
db: Session = Depends(get_db),
current_user: User = Depends(get_current_user),
):
# Only sync channels not touched in the last 6 hours to avoid hammering YouTube
channels = db.execute(
text("""
SELECT c.id FROM channels c
JOIN user_channels uc ON c.id = uc.channel_id
WHERE uc.user_id = :uid AND uc.status = 'followed'
AND (c.crawled_at IS NULL OR c.crawled_at < datetime('now', '-6 hours'))
ORDER BY COALESCE(c.crawled_at, '1970-01-01') ASC
"""),
{"uid": current_user.id},
).mappings().all()
for row in channels:
background_tasks.add_task(_index_channel_task, row["id"], current_user.id)
if channels:
ids = [row["id"] for row in channels]
background_tasks.add_task(_index_channels_batch, ids, current_user.id)
background_tasks.add_task(_discovery_task, current_user.id)
background_tasks.add_task(_enrich_missing_task, 20)
background_tasks.add_task(_enrich_missing_task, 5)
return {"indexing": len(channels)}