Add queue-based gradual discovery with shuffled call ordering and progress UI

Each yt-dlp call is now an independent task (one search query, one trending fetch, one graph channel fetch). Tasks are shuffled together so we don't fire 10 searches in a row, then enqueued with 30-90s random gaps between them — a full sweep of ~17 tasks completes in roughly 10-25 minutes instead of hammering YouTube with 21 calls back-to-back. Fast signals (community, category clusters) still run synchronously at schedule time since they're pure SQL. Progress is tracked per-user (total/done/running) and exposed on GET /api/discovery/status. The Discovery page polls every 10s while running and shows a progress bar + "Finding channels… X / Y" in the header. The auto-discovery daemon skips scheduling if a manual sweep is already running. Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
2026-05-27 02:28:35 +02:00
parent e6faf8e08e
commit a535e9f22a
4 changed files with 367 additions and 44 deletions
--- a/backend/routers/discovery.py
+++ b/backend/routers/discovery.py
@@ -2,7 +2,7 @@ import json
 import random
 from typing import Optional

-from fastapi import APIRouter, BackgroundTasks, Depends, HTTPException
+from fastapi import APIRouter, Depends, HTTPException
 from pydantic import BaseModel
 from sqlalchemy.orm import Session
 from sqlalchemy import text
@@ -10,7 +10,7 @@ from sqlalchemy import text
 from ..auth_utils import get_current_user
 from ..database import get_db
 from ..models import Channel, DiscoveryQueue, User, UserChannel, UserSettings
-from ..services.discovery import run_full_discovery
+from ..services.discovery import schedule_discovery, get_discovery_progress

 router = APIRouter()

@@ -160,34 +160,14 @@ def dismiss_discovery(

@router.post("/refresh", status_code=202)
 def refresh_discovery(
-    background_tasks: BackgroundTasks,
    db: Session = Depends(get_db),
    current_user: User = Depends(get_current_user),
 ):
    s = db.query(UserSettings).filter_by(user_id=current_user.id).first()
    regions_str = (s.discovery_regions if s and s.discovery_regions else "US,SE")
    regions = [r.strip().upper() for r in regions_str.split(",") if r.strip()]
-    user_id = current_user.id
-
-    def _run_discovery():
-        from datetime import datetime
-        from ..database import SessionLocal
-        from sqlalchemy import text as _text
-        fresh_db = SessionLocal()
-        try:
-            run_full_discovery(fresh_db, user_id, regions)
-            fresh_db.execute(
-                _text("UPDATE user_settings SET last_discovery_run = :now WHERE user_id = :uid"),
-                {"now": datetime.utcnow(), "uid": user_id},
-            )
-            fresh_db.commit()
-        finally:
-            fresh_db.close()
-
-    background_tasks.add_task(_run_discovery)
-    from .channels import _enrich_missing_task
-    background_tasks.add_task(_enrich_missing_task, 20)
-    return {"detail": "Discovery refresh started"}
+    schedule_discovery(current_user.id, regions)
+    return {"detail": "Discovery queued"}


@router.get("/videos", response_model=list[dict])
@@ -263,7 +243,6 @@ def discovery_status(
    db: Session = Depends(get_db),
    current_user: User = Depends(get_current_user),
 ):
-    from ..models import UserSettings
    s = db.query(UserSettings).filter_by(user_id=current_user.id).first()
    pending = db.execute(
        text("SELECT COUNT(*) AS n FROM discovery_queue WHERE user_id = :uid AND seen = 0"),
@@ -272,6 +251,7 @@ def discovery_status(
    return {
        "last_run": s.last_discovery_run.isoformat() if s and s.last_discovery_run else None,
        "pending_count": pending["n"] if pending else 0,
+        "progress": get_discovery_progress(current_user.id),
    }