Add queue-based gradual discovery with shuffled call ordering and progress UI

Each yt-dlp call is now an independent task (one search query, one trending
fetch, one graph channel fetch). Tasks are shuffled together so we don't fire
10 searches in a row, then enqueued with 30-90s random gaps between them —
a full sweep of ~17 tasks completes in roughly 10-25 minutes instead of
hammering YouTube with 21 calls back-to-back.

Fast signals (community, category clusters) still run synchronously at
schedule time since they're pure SQL.

Progress is tracked per-user (total/done/running) and exposed on
GET /api/discovery/status. The Discovery page polls every 10s while
running and shows a progress bar + "Finding channels… X / Y" in the header.
The auto-discovery daemon skips scheduling if a manual sweep is already running.

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
2026-05-27 02:28:35 +02:00
parent e6faf8e08e
commit a535e9f22a
4 changed files with 367 additions and 44 deletions

View File

@@ -2,7 +2,7 @@ import json
import random
from typing import Optional
from fastapi import APIRouter, BackgroundTasks, Depends, HTTPException
from fastapi import APIRouter, Depends, HTTPException
from pydantic import BaseModel
from sqlalchemy.orm import Session
from sqlalchemy import text
@@ -10,7 +10,7 @@ from sqlalchemy import text
from ..auth_utils import get_current_user
from ..database import get_db
from ..models import Channel, DiscoveryQueue, User, UserChannel, UserSettings
from ..services.discovery import run_full_discovery
from ..services.discovery import schedule_discovery, get_discovery_progress
router = APIRouter()
@@ -160,34 +160,14 @@ def dismiss_discovery(
@router.post("/refresh", status_code=202)
def refresh_discovery(
background_tasks: BackgroundTasks,
db: Session = Depends(get_db),
current_user: User = Depends(get_current_user),
):
s = db.query(UserSettings).filter_by(user_id=current_user.id).first()
regions_str = (s.discovery_regions if s and s.discovery_regions else "US,SE")
regions = [r.strip().upper() for r in regions_str.split(",") if r.strip()]
user_id = current_user.id
def _run_discovery():
from datetime import datetime
from ..database import SessionLocal
from sqlalchemy import text as _text
fresh_db = SessionLocal()
try:
run_full_discovery(fresh_db, user_id, regions)
fresh_db.execute(
_text("UPDATE user_settings SET last_discovery_run = :now WHERE user_id = :uid"),
{"now": datetime.utcnow(), "uid": user_id},
)
fresh_db.commit()
finally:
fresh_db.close()
background_tasks.add_task(_run_discovery)
from .channels import _enrich_missing_task
background_tasks.add_task(_enrich_missing_task, 20)
return {"detail": "Discovery refresh started"}
schedule_discovery(current_user.id, regions)
return {"detail": "Discovery queued"}
@router.get("/videos", response_model=list[dict])
@@ -263,7 +243,6 @@ def discovery_status(
db: Session = Depends(get_db),
current_user: User = Depends(get_current_user),
):
from ..models import UserSettings
s = db.query(UserSettings).filter_by(user_id=current_user.id).first()
pending = db.execute(
text("SELECT COUNT(*) AS n FROM discovery_queue WHERE user_id = :uid AND seen = 0"),
@@ -272,6 +251,7 @@ def discovery_status(
return {
"last_run": s.last_discovery_run.isoformat() if s and s.last_discovery_run else None,
"pending_count": pending["n"] if pending else 0,
"progress": get_discovery_progress(current_user.id),
}