Add queue-based gradual discovery with shuffled call ordering and progress UI
Each yt-dlp call is now an independent task (one search query, one trending fetch, one graph channel fetch). Tasks are shuffled together so we don't fire 10 searches in a row, then enqueued with 30-90s random gaps between them — a full sweep of ~17 tasks completes in roughly 10-25 minutes instead of hammering YouTube with 21 calls back-to-back. Fast signals (community, category clusters) still run synchronously at schedule time since they're pure SQL. Progress is tracked per-user (total/done/running) and exposed on GET /api/discovery/status. The Discovery page polls every 10s while running and shows a progress bar + "Finding channels… X / Y" in the header. The auto-discovery daemon skips scheduling if a manual sweep is already running. Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
@@ -2,7 +2,7 @@ import json
|
||||
import random
|
||||
from typing import Optional
|
||||
|
||||
from fastapi import APIRouter, BackgroundTasks, Depends, HTTPException
|
||||
from fastapi import APIRouter, Depends, HTTPException
|
||||
from pydantic import BaseModel
|
||||
from sqlalchemy.orm import Session
|
||||
from sqlalchemy import text
|
||||
@@ -10,7 +10,7 @@ from sqlalchemy import text
|
||||
from ..auth_utils import get_current_user
|
||||
from ..database import get_db
|
||||
from ..models import Channel, DiscoveryQueue, User, UserChannel, UserSettings
|
||||
from ..services.discovery import run_full_discovery
|
||||
from ..services.discovery import schedule_discovery, get_discovery_progress
|
||||
|
||||
router = APIRouter()
|
||||
|
||||
@@ -160,34 +160,14 @@ def dismiss_discovery(
|
||||
|
||||
@router.post("/refresh", status_code=202)
|
||||
def refresh_discovery(
|
||||
background_tasks: BackgroundTasks,
|
||||
db: Session = Depends(get_db),
|
||||
current_user: User = Depends(get_current_user),
|
||||
):
|
||||
s = db.query(UserSettings).filter_by(user_id=current_user.id).first()
|
||||
regions_str = (s.discovery_regions if s and s.discovery_regions else "US,SE")
|
||||
regions = [r.strip().upper() for r in regions_str.split(",") if r.strip()]
|
||||
user_id = current_user.id
|
||||
|
||||
def _run_discovery():
|
||||
from datetime import datetime
|
||||
from ..database import SessionLocal
|
||||
from sqlalchemy import text as _text
|
||||
fresh_db = SessionLocal()
|
||||
try:
|
||||
run_full_discovery(fresh_db, user_id, regions)
|
||||
fresh_db.execute(
|
||||
_text("UPDATE user_settings SET last_discovery_run = :now WHERE user_id = :uid"),
|
||||
{"now": datetime.utcnow(), "uid": user_id},
|
||||
)
|
||||
fresh_db.commit()
|
||||
finally:
|
||||
fresh_db.close()
|
||||
|
||||
background_tasks.add_task(_run_discovery)
|
||||
from .channels import _enrich_missing_task
|
||||
background_tasks.add_task(_enrich_missing_task, 20)
|
||||
return {"detail": "Discovery refresh started"}
|
||||
schedule_discovery(current_user.id, regions)
|
||||
return {"detail": "Discovery queued"}
|
||||
|
||||
|
||||
@router.get("/videos", response_model=list[dict])
|
||||
@@ -263,7 +243,6 @@ def discovery_status(
|
||||
db: Session = Depends(get_db),
|
||||
current_user: User = Depends(get_current_user),
|
||||
):
|
||||
from ..models import UserSettings
|
||||
s = db.query(UserSettings).filter_by(user_id=current_user.id).first()
|
||||
pending = db.execute(
|
||||
text("SELECT COUNT(*) AS n FROM discovery_queue WHERE user_id = :uid AND seen = 0"),
|
||||
@@ -272,6 +251,7 @@ def discovery_status(
|
||||
return {
|
||||
"last_run": s.last_discovery_run.isoformat() if s and s.last_discovery_run else None,
|
||||
"pending_count": pending["n"] if pending else 0,
|
||||
"progress": get_discovery_progress(current_user.id),
|
||||
}
|
||||
|
||||
|
||||
|
||||
Reference in New Issue
Block a user