Fix YouTube hammering, sync rate limiting, and Following load time
Sync throttling: - sync-all now skips channels crawled within the last 6 hours (prevents re-scraping 1266 channels on every button press) - Channels are queued into a single _index_channels_batch task that runs with 1.5s delay between each yt-dlp call instead of firing 1266 background tasks simultaneously - Startup enrich task reduced from 10 to 3 videos (3 yt-dlp calls on each container restart) - Enrich task adds 2s sleep between metadata fetches SQLite stability: - busy_timeout=5000 prevents SQLITE_BUSY errors under concurrent load - synchronous=NORMAL speeds up writes without data loss risk (safe with WAL) Following page: - staleTime: 60s on channels query so cached data is reused immediately on revisit; gcTime keeps it in memory for 5 min Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
@@ -15,6 +15,8 @@ def set_sqlite_pragma(dbapi_conn, _):
|
||||
cursor = dbapi_conn.cursor()
|
||||
cursor.execute("PRAGMA journal_mode=WAL")
|
||||
cursor.execute("PRAGMA foreign_keys=ON")
|
||||
cursor.execute("PRAGMA busy_timeout=5000")
|
||||
cursor.execute("PRAGMA synchronous=NORMAL")
|
||||
cursor.close()
|
||||
|
||||
|
||||
|
||||
@@ -152,7 +152,7 @@ def on_startup():
|
||||
# Backfill descriptions for videos that don't have them yet (runs in background)
|
||||
import threading
|
||||
from .routers.channels import _enrich_missing_task
|
||||
threading.Thread(target=_enrich_missing_task, args=(10,), daemon=True).start()
|
||||
threading.Thread(target=_enrich_missing_task, args=(3,), daemon=True).start()
|
||||
|
||||
|
||||
@app.get("/api/health")
|
||||
|
||||
@@ -105,6 +105,15 @@ def _get_channel_or_404(db: Session, channel_id: int) -> Channel:
|
||||
return c
|
||||
|
||||
|
||||
def _index_channels_batch(channel_ids: list[int], user_id: int, delay: float = 1.5):
|
||||
"""Run channel syncs sequentially with a polite delay between requests."""
|
||||
import time
|
||||
for i, cid in enumerate(channel_ids):
|
||||
if i > 0:
|
||||
time.sleep(delay)
|
||||
_index_channel_task(cid, user_id)
|
||||
|
||||
|
||||
def _index_channel_task(channel_id: int, user_id: int):
|
||||
from ..database import SessionLocal
|
||||
db = SessionLocal()
|
||||
@@ -224,7 +233,9 @@ def _enrich_missing_task(limit: int = 20):
|
||||
"""),
|
||||
{"limit": limit},
|
||||
).mappings().all()
|
||||
for row in rows:
|
||||
for i, row in enumerate(rows):
|
||||
if i > 0:
|
||||
import time; time.sleep(2)
|
||||
try:
|
||||
meta = ytdlp.fetch_video_metadata(row["youtube_video_id"])
|
||||
if meta:
|
||||
@@ -278,22 +289,24 @@ def sync_all_channels(
|
||||
db: Session = Depends(get_db),
|
||||
current_user: User = Depends(get_current_user),
|
||||
):
|
||||
# Only sync channels not touched in the last 6 hours to avoid hammering YouTube
|
||||
channels = db.execute(
|
||||
text("""
|
||||
SELECT c.id FROM channels c
|
||||
JOIN user_channels uc ON c.id = uc.channel_id
|
||||
WHERE uc.user_id = :uid AND uc.status = 'followed'
|
||||
AND (c.crawled_at IS NULL OR c.crawled_at < datetime('now', '-6 hours'))
|
||||
ORDER BY COALESCE(c.crawled_at, '1970-01-01') ASC
|
||||
"""),
|
||||
{"uid": current_user.id},
|
||||
).mappings().all()
|
||||
|
||||
for row in channels:
|
||||
background_tasks.add_task(_index_channel_task, row["id"], current_user.id)
|
||||
|
||||
if channels:
|
||||
ids = [row["id"] for row in channels]
|
||||
background_tasks.add_task(_index_channels_batch, ids, current_user.id)
|
||||
background_tasks.add_task(_discovery_task, current_user.id)
|
||||
|
||||
background_tasks.add_task(_enrich_missing_task, 20)
|
||||
background_tasks.add_task(_enrich_missing_task, 5)
|
||||
|
||||
return {"indexing": len(channels)}
|
||||
|
||||
|
||||
Reference in New Issue
Block a user