Fix YouTube hammering, sync rate limiting, and Following load time
Sync throttling: - sync-all now skips channels crawled within the last 6 hours (prevents re-scraping 1266 channels on every button press) - Channels are queued into a single _index_channels_batch task that runs with 1.5s delay between each yt-dlp call instead of firing 1266 background tasks simultaneously - Startup enrich task reduced from 10 to 3 videos (3 yt-dlp calls on each container restart) - Enrich task adds 2s sleep between metadata fetches SQLite stability: - busy_timeout=5000 prevents SQLITE_BUSY errors under concurrent load - synchronous=NORMAL speeds up writes without data loss risk (safe with WAL) Following page: - staleTime: 60s on channels query so cached data is reused immediately on revisit; gcTime keeps it in memory for 5 min Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
@@ -15,6 +15,8 @@ def set_sqlite_pragma(dbapi_conn, _):
|
|||||||
cursor = dbapi_conn.cursor()
|
cursor = dbapi_conn.cursor()
|
||||||
cursor.execute("PRAGMA journal_mode=WAL")
|
cursor.execute("PRAGMA journal_mode=WAL")
|
||||||
cursor.execute("PRAGMA foreign_keys=ON")
|
cursor.execute("PRAGMA foreign_keys=ON")
|
||||||
|
cursor.execute("PRAGMA busy_timeout=5000")
|
||||||
|
cursor.execute("PRAGMA synchronous=NORMAL")
|
||||||
cursor.close()
|
cursor.close()
|
||||||
|
|
||||||
|
|
||||||
|
|||||||
@@ -152,7 +152,7 @@ def on_startup():
|
|||||||
# Backfill descriptions for videos that don't have them yet (runs in background)
|
# Backfill descriptions for videos that don't have them yet (runs in background)
|
||||||
import threading
|
import threading
|
||||||
from .routers.channels import _enrich_missing_task
|
from .routers.channels import _enrich_missing_task
|
||||||
threading.Thread(target=_enrich_missing_task, args=(10,), daemon=True).start()
|
threading.Thread(target=_enrich_missing_task, args=(3,), daemon=True).start()
|
||||||
|
|
||||||
|
|
||||||
@app.get("/api/health")
|
@app.get("/api/health")
|
||||||
|
|||||||
@@ -105,6 +105,15 @@ def _get_channel_or_404(db: Session, channel_id: int) -> Channel:
|
|||||||
return c
|
return c
|
||||||
|
|
||||||
|
|
||||||
|
def _index_channels_batch(channel_ids: list[int], user_id: int, delay: float = 1.5):
|
||||||
|
"""Run channel syncs sequentially with a polite delay between requests."""
|
||||||
|
import time
|
||||||
|
for i, cid in enumerate(channel_ids):
|
||||||
|
if i > 0:
|
||||||
|
time.sleep(delay)
|
||||||
|
_index_channel_task(cid, user_id)
|
||||||
|
|
||||||
|
|
||||||
def _index_channel_task(channel_id: int, user_id: int):
|
def _index_channel_task(channel_id: int, user_id: int):
|
||||||
from ..database import SessionLocal
|
from ..database import SessionLocal
|
||||||
db = SessionLocal()
|
db = SessionLocal()
|
||||||
@@ -224,7 +233,9 @@ def _enrich_missing_task(limit: int = 20):
|
|||||||
"""),
|
"""),
|
||||||
{"limit": limit},
|
{"limit": limit},
|
||||||
).mappings().all()
|
).mappings().all()
|
||||||
for row in rows:
|
for i, row in enumerate(rows):
|
||||||
|
if i > 0:
|
||||||
|
import time; time.sleep(2)
|
||||||
try:
|
try:
|
||||||
meta = ytdlp.fetch_video_metadata(row["youtube_video_id"])
|
meta = ytdlp.fetch_video_metadata(row["youtube_video_id"])
|
||||||
if meta:
|
if meta:
|
||||||
@@ -278,22 +289,24 @@ def sync_all_channels(
|
|||||||
db: Session = Depends(get_db),
|
db: Session = Depends(get_db),
|
||||||
current_user: User = Depends(get_current_user),
|
current_user: User = Depends(get_current_user),
|
||||||
):
|
):
|
||||||
|
# Only sync channels not touched in the last 6 hours to avoid hammering YouTube
|
||||||
channels = db.execute(
|
channels = db.execute(
|
||||||
text("""
|
text("""
|
||||||
SELECT c.id FROM channels c
|
SELECT c.id FROM channels c
|
||||||
JOIN user_channels uc ON c.id = uc.channel_id
|
JOIN user_channels uc ON c.id = uc.channel_id
|
||||||
WHERE uc.user_id = :uid AND uc.status = 'followed'
|
WHERE uc.user_id = :uid AND uc.status = 'followed'
|
||||||
|
AND (c.crawled_at IS NULL OR c.crawled_at < datetime('now', '-6 hours'))
|
||||||
|
ORDER BY COALESCE(c.crawled_at, '1970-01-01') ASC
|
||||||
"""),
|
"""),
|
||||||
{"uid": current_user.id},
|
{"uid": current_user.id},
|
||||||
).mappings().all()
|
).mappings().all()
|
||||||
|
|
||||||
for row in channels:
|
|
||||||
background_tasks.add_task(_index_channel_task, row["id"], current_user.id)
|
|
||||||
|
|
||||||
if channels:
|
if channels:
|
||||||
|
ids = [row["id"] for row in channels]
|
||||||
|
background_tasks.add_task(_index_channels_batch, ids, current_user.id)
|
||||||
background_tasks.add_task(_discovery_task, current_user.id)
|
background_tasks.add_task(_discovery_task, current_user.id)
|
||||||
|
|
||||||
background_tasks.add_task(_enrich_missing_task, 20)
|
background_tasks.add_task(_enrich_missing_task, 5)
|
||||||
|
|
||||||
return {"indexing": len(channels)}
|
return {"indexing": len(channels)}
|
||||||
|
|
||||||
|
|||||||
@@ -579,6 +579,8 @@ export default function Following() {
|
|||||||
const { data: channels = [], isLoading: loadingChannels } = useQuery({
|
const { data: channels = [], isLoading: loadingChannels } = useQuery({
|
||||||
queryKey: ["channels"],
|
queryKey: ["channels"],
|
||||||
queryFn: () => getChannels().then((r) => r.data),
|
queryFn: () => getChannels().then((r) => r.data),
|
||||||
|
staleTime: 60_000,
|
||||||
|
gcTime: 5 * 60_000,
|
||||||
});
|
});
|
||||||
|
|
||||||
const { data: appSettings } = useQuery({
|
const { data: appSettings } = useQuery({
|
||||||
|
|||||||
Reference in New Issue
Block a user