Add queue-based gradual discovery with shuffled call ordering and progress UI
Each yt-dlp call is now an independent task (one search query, one trending fetch, one graph channel fetch). Tasks are shuffled together so we don't fire 10 searches in a row, then enqueued with 30-90s random gaps between them — a full sweep of ~17 tasks completes in roughly 10-25 minutes instead of hammering YouTube with 21 calls back-to-back. Fast signals (community, category clusters) still run synchronously at schedule time since they're pure SQL. Progress is tracked per-user (total/done/running) and exposed on GET /api/discovery/status. The Discovery page polls every 10s while running and shows a progress bar + "Finding channels… X / Y" in the header. The auto-discovery daemon skips scheduling if a manual sweep is already running. Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
@@ -175,9 +175,11 @@ def on_startup():
|
|||||||
finally:
|
finally:
|
||||||
db.close()
|
db.close()
|
||||||
|
|
||||||
# Backfill descriptions for videos that don't have them yet (runs in background)
|
# Start discovery worker and backfill enrichment
|
||||||
import threading
|
import threading
|
||||||
from .routers.channels import _enrich_missing_task, _index_channels_batch
|
from .routers.channels import _enrich_missing_task, _index_channels_batch
|
||||||
|
from .services.discovery import start_discovery_worker
|
||||||
|
start_discovery_worker()
|
||||||
threading.Thread(target=_enrich_missing_task, args=(50,), daemon=True).start()
|
threading.Thread(target=_enrich_missing_task, args=(50,), daemon=True).start()
|
||||||
|
|
||||||
def _auto_sync_daemon():
|
def _auto_sync_daemon():
|
||||||
@@ -222,7 +224,7 @@ def on_startup():
|
|||||||
import time as _time
|
import time as _time
|
||||||
from datetime import datetime as _dt, timedelta as _td
|
from datetime import datetime as _dt, timedelta as _td
|
||||||
from sqlalchemy import text as _text
|
from sqlalchemy import text as _text
|
||||||
from .services.discovery import run_full_discovery
|
from .services.discovery import schedule_discovery, get_discovery_progress
|
||||||
|
|
||||||
# Wait 5 minutes after startup before the first check so the app can
|
# Wait 5 minutes after startup before the first check so the app can
|
||||||
# finish initialising and existing enrichment tasks can settle.
|
# finish initialising and existing enrichment tasks can settle.
|
||||||
@@ -244,13 +246,12 @@ def on_startup():
|
|||||||
last = row["last_discovery_run"]
|
last = row["last_discovery_run"]
|
||||||
if last is None or (_dt.utcnow() - _dt.fromisoformat(str(last))) > _td(hours=23):
|
if last is None or (_dt.utcnow() - _dt.fromisoformat(str(last))) > _td(hours=23):
|
||||||
uid = row["user_id"]
|
uid = row["user_id"]
|
||||||
|
# Skip if a manual sweep is already running
|
||||||
|
prog = get_discovery_progress(uid)
|
||||||
|
if prog and prog.get("running"):
|
||||||
|
continue
|
||||||
regions = [r.strip().upper() for r in (row["discovery_regions"] or "US,SE").split(",") if r.strip()]
|
regions = [r.strip().upper() for r in (row["discovery_regions"] or "US,SE").split(",") if r.strip()]
|
||||||
run_full_discovery(db, uid, regions)
|
schedule_discovery(uid, regions)
|
||||||
db.execute(
|
|
||||||
_text("UPDATE user_settings SET last_discovery_run = :now WHERE user_id = :uid"),
|
|
||||||
{"now": _dt.utcnow(), "uid": uid},
|
|
||||||
)
|
|
||||||
db.commit()
|
|
||||||
finally:
|
finally:
|
||||||
db.close()
|
db.close()
|
||||||
except Exception:
|
except Exception:
|
||||||
|
|||||||
@@ -2,7 +2,7 @@ import json
|
|||||||
import random
|
import random
|
||||||
from typing import Optional
|
from typing import Optional
|
||||||
|
|
||||||
from fastapi import APIRouter, BackgroundTasks, Depends, HTTPException
|
from fastapi import APIRouter, Depends, HTTPException
|
||||||
from pydantic import BaseModel
|
from pydantic import BaseModel
|
||||||
from sqlalchemy.orm import Session
|
from sqlalchemy.orm import Session
|
||||||
from sqlalchemy import text
|
from sqlalchemy import text
|
||||||
@@ -10,7 +10,7 @@ from sqlalchemy import text
|
|||||||
from ..auth_utils import get_current_user
|
from ..auth_utils import get_current_user
|
||||||
from ..database import get_db
|
from ..database import get_db
|
||||||
from ..models import Channel, DiscoveryQueue, User, UserChannel, UserSettings
|
from ..models import Channel, DiscoveryQueue, User, UserChannel, UserSettings
|
||||||
from ..services.discovery import run_full_discovery
|
from ..services.discovery import schedule_discovery, get_discovery_progress
|
||||||
|
|
||||||
router = APIRouter()
|
router = APIRouter()
|
||||||
|
|
||||||
@@ -160,34 +160,14 @@ def dismiss_discovery(
|
|||||||
|
|
||||||
@router.post("/refresh", status_code=202)
|
@router.post("/refresh", status_code=202)
|
||||||
def refresh_discovery(
|
def refresh_discovery(
|
||||||
background_tasks: BackgroundTasks,
|
|
||||||
db: Session = Depends(get_db),
|
db: Session = Depends(get_db),
|
||||||
current_user: User = Depends(get_current_user),
|
current_user: User = Depends(get_current_user),
|
||||||
):
|
):
|
||||||
s = db.query(UserSettings).filter_by(user_id=current_user.id).first()
|
s = db.query(UserSettings).filter_by(user_id=current_user.id).first()
|
||||||
regions_str = (s.discovery_regions if s and s.discovery_regions else "US,SE")
|
regions_str = (s.discovery_regions if s and s.discovery_regions else "US,SE")
|
||||||
regions = [r.strip().upper() for r in regions_str.split(",") if r.strip()]
|
regions = [r.strip().upper() for r in regions_str.split(",") if r.strip()]
|
||||||
user_id = current_user.id
|
schedule_discovery(current_user.id, regions)
|
||||||
|
return {"detail": "Discovery queued"}
|
||||||
def _run_discovery():
|
|
||||||
from datetime import datetime
|
|
||||||
from ..database import SessionLocal
|
|
||||||
from sqlalchemy import text as _text
|
|
||||||
fresh_db = SessionLocal()
|
|
||||||
try:
|
|
||||||
run_full_discovery(fresh_db, user_id, regions)
|
|
||||||
fresh_db.execute(
|
|
||||||
_text("UPDATE user_settings SET last_discovery_run = :now WHERE user_id = :uid"),
|
|
||||||
{"now": datetime.utcnow(), "uid": user_id},
|
|
||||||
)
|
|
||||||
fresh_db.commit()
|
|
||||||
finally:
|
|
||||||
fresh_db.close()
|
|
||||||
|
|
||||||
background_tasks.add_task(_run_discovery)
|
|
||||||
from .channels import _enrich_missing_task
|
|
||||||
background_tasks.add_task(_enrich_missing_task, 20)
|
|
||||||
return {"detail": "Discovery refresh started"}
|
|
||||||
|
|
||||||
|
|
||||||
@router.get("/videos", response_model=list[dict])
|
@router.get("/videos", response_model=list[dict])
|
||||||
@@ -263,7 +243,6 @@ def discovery_status(
|
|||||||
db: Session = Depends(get_db),
|
db: Session = Depends(get_db),
|
||||||
current_user: User = Depends(get_current_user),
|
current_user: User = Depends(get_current_user),
|
||||||
):
|
):
|
||||||
from ..models import UserSettings
|
|
||||||
s = db.query(UserSettings).filter_by(user_id=current_user.id).first()
|
s = db.query(UserSettings).filter_by(user_id=current_user.id).first()
|
||||||
pending = db.execute(
|
pending = db.execute(
|
||||||
text("SELECT COUNT(*) AS n FROM discovery_queue WHERE user_id = :uid AND seen = 0"),
|
text("SELECT COUNT(*) AS n FROM discovery_queue WHERE user_id = :uid AND seen = 0"),
|
||||||
@@ -272,6 +251,7 @@ def discovery_status(
|
|||||||
return {
|
return {
|
||||||
"last_run": s.last_discovery_run.isoformat() if s and s.last_discovery_run else None,
|
"last_run": s.last_discovery_run.isoformat() if s and s.last_discovery_run else None,
|
||||||
"pending_count": pending["n"] if pending else 0,
|
"pending_count": pending["n"] if pending else 0,
|
||||||
|
"progress": get_discovery_progress(current_user.id),
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|||||||
@@ -1,6 +1,9 @@
|
|||||||
"""Discovery engine — search-based crawl, trending, community signal, category clustering."""
|
"""Discovery engine — search-based crawl, trending, community signal, category clustering."""
|
||||||
import json
|
import json
|
||||||
|
import queue as _queue
|
||||||
import random
|
import random
|
||||||
|
import threading as _threading
|
||||||
|
import time as _time
|
||||||
from datetime import datetime
|
from datetime import datetime
|
||||||
from sqlalchemy.orm import Session
|
from sqlalchemy.orm import Session
|
||||||
from sqlalchemy import text
|
from sqlalchemy import text
|
||||||
@@ -8,6 +11,16 @@ from sqlalchemy import text
|
|||||||
from ..models import Channel, UserChannel, DiscoveryQueue, Video
|
from ..models import Channel, UserChannel, DiscoveryQueue, Video
|
||||||
from . import ytdlp
|
from . import ytdlp
|
||||||
|
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
# Background task queue — spaces yt-dlp calls 30-90 s apart and shuffles
|
||||||
|
# call types so we don't fire 10 searches in a row.
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
_task_queue: _queue.Queue = _queue.Queue()
|
||||||
|
_progress: dict[int, dict] = {} # user_id -> {total, done, running}
|
||||||
|
_progress_lock = _threading.Lock()
|
||||||
|
_worker_started = False
|
||||||
|
_worker_lock = _threading.Lock()
|
||||||
|
|
||||||
|
|
||||||
def _fetch_and_index_channel(db: Session, channel: Channel):
|
def _fetch_and_index_channel(db: Session, channel: Channel):
|
||||||
"""Fetch full metadata + recent videos for a discovered channel."""
|
"""Fetch full metadata + recent videos for a discovered channel."""
|
||||||
@@ -680,3 +693,316 @@ def run_full_discovery(db: Session, user_id: int, regions: list[str] | None = No
|
|||||||
# update_watch_signal skipped — tags already included in crawl_by_search
|
# update_watch_signal skipped — tags already included in crawl_by_search
|
||||||
update_trending_signal(db, user_id, regions[:1]) # 1 yt-dlp call (first region only)
|
update_trending_signal(db, user_id, regions[:1]) # 1 yt-dlp call (first region only)
|
||||||
update_graph_signal(db, user_id) # ~6 yt-dlp calls
|
update_graph_signal(db, user_id) # ~6 yt-dlp calls
|
||||||
|
|
||||||
|
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
# Queue-based gradual discovery — each yt-dlp call is its own task, shuffled
|
||||||
|
# so call types are mixed, with 30-90 s gaps between them.
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
|
||||||
|
def _get_followed_yt_ids(db: Session, user_id: int) -> set[str]:
|
||||||
|
return set(db.execute(
|
||||||
|
text("""
|
||||||
|
SELECT c.youtube_channel_id FROM channels c
|
||||||
|
JOIN user_channels uc ON c.id = uc.channel_id
|
||||||
|
WHERE uc.user_id = :uid AND uc.status = 'followed'
|
||||||
|
"""),
|
||||||
|
{"uid": user_id},
|
||||||
|
).scalars().all())
|
||||||
|
|
||||||
|
|
||||||
|
def _get_neg_tags(db: Session, user_id: int) -> frozenset[str]:
|
||||||
|
return frozenset(db.execute(
|
||||||
|
text("SELECT tag FROM user_tag_affinity WHERE user_id = :uid AND score < -2"),
|
||||||
|
{"uid": user_id},
|
||||||
|
).scalars().all())
|
||||||
|
|
||||||
|
|
||||||
|
def _stamp_last_run(user_id: int):
|
||||||
|
from ..database import SessionLocal
|
||||||
|
from sqlalchemy import text as _text
|
||||||
|
db = SessionLocal()
|
||||||
|
try:
|
||||||
|
db.execute(
|
||||||
|
_text("UPDATE user_settings SET last_discovery_run = :now WHERE user_id = :uid"),
|
||||||
|
{"now": datetime.utcnow(), "uid": user_id},
|
||||||
|
)
|
||||||
|
db.commit()
|
||||||
|
except Exception:
|
||||||
|
db.rollback()
|
||||||
|
finally:
|
||||||
|
db.close()
|
||||||
|
|
||||||
|
|
||||||
|
def _do_task_search(user_id: int, query: str, source: str, score_multiplier: float):
|
||||||
|
from ..database import SessionLocal
|
||||||
|
db = SessionLocal()
|
||||||
|
try:
|
||||||
|
followed_yt_ids = _get_followed_yt_ids(db, user_id)
|
||||||
|
neg_tags = _get_neg_tags(db, user_id)
|
||||||
|
_search_and_store(db, user_id, [query], followed_yt_ids, score_multiplier, source, neg_tags)
|
||||||
|
finally:
|
||||||
|
db.close()
|
||||||
|
|
||||||
|
|
||||||
|
def _do_task_trending(user_id: int, region: str):
|
||||||
|
from ..database import SessionLocal
|
||||||
|
db = SessionLocal()
|
||||||
|
try:
|
||||||
|
update_trending_signal(db, user_id, [region])
|
||||||
|
finally:
|
||||||
|
db.close()
|
||||||
|
|
||||||
|
|
||||||
|
def _fetch_graph_for_channel(db: Session, user_id: int, source_yt_id: str):
|
||||||
|
"""Fetch featured channels for one followed channel and add to discovery queue."""
|
||||||
|
followed_yt_ids = _get_followed_yt_ids(db, user_id)
|
||||||
|
dismissed_ids = set(db.execute(
|
||||||
|
text("SELECT channel_id FROM user_channels WHERE user_id = :uid AND status = 'dismissed'"),
|
||||||
|
{"uid": user_id},
|
||||||
|
).scalars().all())
|
||||||
|
|
||||||
|
try:
|
||||||
|
featured = ytdlp.fetch_featured_channels(source_yt_id)
|
||||||
|
except Exception:
|
||||||
|
return
|
||||||
|
|
||||||
|
needs_indexing: list[int] = []
|
||||||
|
for yt_id in featured:
|
||||||
|
if yt_id in followed_yt_ids:
|
||||||
|
continue
|
||||||
|
channel = db.query(Channel).filter_by(youtube_channel_id=yt_id).first()
|
||||||
|
is_new = channel is None
|
||||||
|
if not channel:
|
||||||
|
channel = Channel(youtube_channel_id=yt_id, name="", description="", thumbnail_url=None)
|
||||||
|
db.add(channel)
|
||||||
|
db.flush()
|
||||||
|
if channel.id in dismissed_ids:
|
||||||
|
continue
|
||||||
|
uc = db.query(UserChannel).filter_by(user_id=user_id, channel_id=channel.id).first()
|
||||||
|
if uc and uc.status in ("followed", "dismissed"):
|
||||||
|
continue
|
||||||
|
_add_to_discovery(db, user_id, channel.id, score=8.0, source="graph")
|
||||||
|
if is_new or not channel.crawled_at:
|
||||||
|
needs_indexing.append(channel.id)
|
||||||
|
|
||||||
|
db.commit()
|
||||||
|
|
||||||
|
for channel_id in needs_indexing[:3]:
|
||||||
|
ch = db.query(Channel).filter_by(id=channel_id).first()
|
||||||
|
if ch:
|
||||||
|
_fetch_and_index_channel(db, ch)
|
||||||
|
|
||||||
|
|
||||||
|
def _do_task_graph(user_id: int, source_yt_id: str):
|
||||||
|
from ..database import SessionLocal
|
||||||
|
db = SessionLocal()
|
||||||
|
try:
|
||||||
|
_fetch_graph_for_channel(db, user_id, source_yt_id)
|
||||||
|
finally:
|
||||||
|
db.close()
|
||||||
|
|
||||||
|
|
||||||
|
def _worker_loop():
|
||||||
|
while True:
|
||||||
|
try:
|
||||||
|
user_id, task = _task_queue.get(timeout=10)
|
||||||
|
except _queue.Empty:
|
||||||
|
continue
|
||||||
|
|
||||||
|
try:
|
||||||
|
task()
|
||||||
|
except Exception:
|
||||||
|
pass
|
||||||
|
|
||||||
|
with _progress_lock:
|
||||||
|
p = _progress.get(user_id)
|
||||||
|
if p:
|
||||||
|
p["done"] = min(p["done"] + 1, p["total"])
|
||||||
|
if p["done"] >= p["total"] and p["running"]:
|
||||||
|
p["running"] = False
|
||||||
|
_threading.Thread(target=_stamp_last_run, args=(user_id,), daemon=True).start()
|
||||||
|
|
||||||
|
_task_queue.task_done()
|
||||||
|
|
||||||
|
# Polite gap — only sleep if more tasks are waiting
|
||||||
|
if not _task_queue.empty():
|
||||||
|
_time.sleep(random.uniform(30, 90))
|
||||||
|
|
||||||
|
|
||||||
|
def start_discovery_worker():
|
||||||
|
"""Start the singleton background worker thread (idempotent)."""
|
||||||
|
global _worker_started
|
||||||
|
with _worker_lock:
|
||||||
|
if not _worker_started:
|
||||||
|
_threading.Thread(target=_worker_loop, daemon=True, name="discovery-worker").start()
|
||||||
|
_worker_started = True
|
||||||
|
|
||||||
|
|
||||||
|
def get_discovery_progress(user_id: int) -> dict | None:
|
||||||
|
with _progress_lock:
|
||||||
|
p = _progress.get(user_id)
|
||||||
|
return dict(p) if p is not None else None
|
||||||
|
|
||||||
|
|
||||||
|
def _build_search_task_args(db: Session, user_id: int) -> list[tuple[str, str, float]]:
|
||||||
|
"""Compute all search/liked query strings without executing any yt-dlp calls."""
|
||||||
|
result: list[tuple[str, str, float]] = []
|
||||||
|
|
||||||
|
followed_rows = db.execute(
|
||||||
|
text("""
|
||||||
|
SELECT c.name, c.youtube_channel_id
|
||||||
|
FROM channels c
|
||||||
|
JOIN user_channels uc ON c.id = uc.channel_id
|
||||||
|
WHERE uc.user_id = :user_id AND uc.status = 'followed'
|
||||||
|
"""),
|
||||||
|
{"user_id": user_id},
|
||||||
|
).mappings().all()
|
||||||
|
|
||||||
|
followed_names = [row["name"] for row in followed_rows if row["name"]]
|
||||||
|
|
||||||
|
tag_rows = db.execute(
|
||||||
|
text("""
|
||||||
|
SELECT tags FROM (
|
||||||
|
SELECT v.tags FROM videos v
|
||||||
|
JOIN user_channels uc ON v.channel_id = uc.channel_id
|
||||||
|
WHERE uc.user_id = :user_id AND uc.status = 'followed'
|
||||||
|
AND v.tags IS NOT NULL AND v.tags != '' AND v.tags != '[]'
|
||||||
|
LIMIT 300
|
||||||
|
)
|
||||||
|
UNION ALL
|
||||||
|
SELECT tags FROM (
|
||||||
|
SELECT v.tags FROM user_videos uv
|
||||||
|
JOIN videos v ON uv.video_id = v.id
|
||||||
|
WHERE uv.user_id = :user_id AND uv.liked = 1
|
||||||
|
AND v.tags IS NOT NULL AND v.tags != '' AND v.tags != '[]'
|
||||||
|
LIMIT 100
|
||||||
|
)
|
||||||
|
"""),
|
||||||
|
{"user_id": user_id},
|
||||||
|
).mappings().all()
|
||||||
|
|
||||||
|
tag_counts: dict[str, int] = {}
|
||||||
|
liked_tag_counts: dict[str, int] = {}
|
||||||
|
for row in tag_rows:
|
||||||
|
try:
|
||||||
|
for tag in json.loads(row["tags"]):
|
||||||
|
if isinstance(tag, str):
|
||||||
|
t = tag.lower().strip()
|
||||||
|
if 3 <= len(t) <= 40:
|
||||||
|
tag_counts[t] = tag_counts.get(t, 0) + 1
|
||||||
|
except (json.JSONDecodeError, TypeError):
|
||||||
|
continue
|
||||||
|
|
||||||
|
cat_rows = db.execute(
|
||||||
|
text("""
|
||||||
|
SELECT v.category, COUNT(*) AS cnt
|
||||||
|
FROM videos v
|
||||||
|
JOIN user_channels uc ON v.channel_id = uc.channel_id
|
||||||
|
WHERE uc.user_id = :user_id AND uc.status = 'followed'
|
||||||
|
AND v.category IS NOT NULL
|
||||||
|
GROUP BY v.category
|
||||||
|
ORDER BY cnt DESC
|
||||||
|
LIMIT 5
|
||||||
|
"""),
|
||||||
|
{"user_id": user_id},
|
||||||
|
).mappings().all()
|
||||||
|
|
||||||
|
top_tags = [t for t, _ in sorted(tag_counts.items(), key=lambda x: -x[1])[:5]]
|
||||||
|
top_cats = [r["category"] for r in cat_rows]
|
||||||
|
sampled_names = random.sample(followed_names, min(4, len(followed_names))) if followed_names else []
|
||||||
|
serendipity = [f"best {top_cats[0]} channels"] if top_cats else []
|
||||||
|
search_queries = list(dict.fromkeys(top_tags + sampled_names + serendipity + top_cats[:2]))[:10]
|
||||||
|
for q in search_queries:
|
||||||
|
result.append((q, "search", 5.0))
|
||||||
|
|
||||||
|
# Liked signal queries
|
||||||
|
liked_rows = db.execute(
|
||||||
|
text("""
|
||||||
|
SELECT v.tags FROM user_videos uv
|
||||||
|
JOIN videos v ON uv.video_id = v.id
|
||||||
|
WHERE uv.user_id = :user_id AND uv.liked = 1
|
||||||
|
AND v.tags IS NOT NULL AND v.tags != '' AND v.tags != '[]'
|
||||||
|
"""),
|
||||||
|
{"user_id": user_id},
|
||||||
|
).mappings().all()
|
||||||
|
|
||||||
|
for row in liked_rows:
|
||||||
|
try:
|
||||||
|
for tag in json.loads(row["tags"]):
|
||||||
|
if isinstance(tag, str):
|
||||||
|
t = tag.lower().strip()
|
||||||
|
if 3 <= len(t) <= 40:
|
||||||
|
liked_tag_counts[t] = liked_tag_counts.get(t, 0) + 2
|
||||||
|
except (json.JSONDecodeError, TypeError):
|
||||||
|
pass
|
||||||
|
|
||||||
|
for q in [t for t, _ in sorted(liked_tag_counts.items(), key=lambda x: -x[1])[:4]]:
|
||||||
|
result.append((q, "liked", 10.0))
|
||||||
|
|
||||||
|
return result
|
||||||
|
|
||||||
|
|
||||||
|
def _sample_graph_yt_ids(db: Session, user_id: int) -> list[str]:
|
||||||
|
rows = db.execute(
|
||||||
|
text("""
|
||||||
|
SELECT c.youtube_channel_id
|
||||||
|
FROM channels c
|
||||||
|
JOIN user_channels uc ON c.id = uc.channel_id
|
||||||
|
WHERE uc.user_id = :user_id AND uc.status = 'followed'
|
||||||
|
AND c.youtube_channel_id IS NOT NULL
|
||||||
|
"""),
|
||||||
|
{"user_id": user_id},
|
||||||
|
).scalars().all()
|
||||||
|
if not rows:
|
||||||
|
return []
|
||||||
|
return random.sample(list(rows), min(6, len(rows)))
|
||||||
|
|
||||||
|
|
||||||
|
def schedule_discovery(user_id: int, regions: list[str] | None = None):
|
||||||
|
"""Schedule a full discovery sweep, spreading yt-dlp calls 30-90 s apart
|
||||||
|
with call types shuffled so searches, graph fetches, and trending are mixed."""
|
||||||
|
if regions is None:
|
||||||
|
regions = ["US", "SE"]
|
||||||
|
|
||||||
|
from ..database import SessionLocal
|
||||||
|
|
||||||
|
# Fast signals (pure SQL, no yt-dlp) run synchronously right now
|
||||||
|
db = SessionLocal()
|
||||||
|
try:
|
||||||
|
db.execute(
|
||||||
|
text("""
|
||||||
|
DELETE FROM discovery_queue
|
||||||
|
WHERE user_id = :uid AND seen = 0
|
||||||
|
AND created_at <= datetime('now', '-14 days')
|
||||||
|
"""),
|
||||||
|
{"uid": user_id},
|
||||||
|
)
|
||||||
|
db.commit()
|
||||||
|
update_community_signal(db, user_id)
|
||||||
|
update_category_clusters(db, user_id)
|
||||||
|
|
||||||
|
search_args = _build_search_task_args(db, user_id)
|
||||||
|
graph_yt_ids = _sample_graph_yt_ids(db, user_id)
|
||||||
|
finally:
|
||||||
|
db.close()
|
||||||
|
|
||||||
|
# Build one task per yt-dlp call, then shuffle to mix call types
|
||||||
|
tasks: list[tuple[int, object]] = []
|
||||||
|
for query, source, mult in search_args:
|
||||||
|
tasks.append((user_id, lambda q=query, s=source, m=mult: _do_task_search(user_id, q, s, m)))
|
||||||
|
for region in regions[:1]:
|
||||||
|
tasks.append((user_id, lambda r=region: _do_task_trending(user_id, r)))
|
||||||
|
for yt_id in graph_yt_ids:
|
||||||
|
tasks.append((user_id, lambda y=yt_id: _do_task_graph(user_id, y)))
|
||||||
|
|
||||||
|
random.shuffle(tasks)
|
||||||
|
|
||||||
|
with _progress_lock:
|
||||||
|
_progress[user_id] = {"total": len(tasks), "done": 0, "running": bool(tasks)}
|
||||||
|
|
||||||
|
for item in tasks:
|
||||||
|
_task_queue.put(item)
|
||||||
|
|
||||||
|
if not tasks:
|
||||||
|
_stamp_last_run(user_id)
|
||||||
|
|||||||
@@ -217,7 +217,8 @@ export default function DiscoveryPage() {
|
|||||||
const { data: discStatus } = useQuery({
|
const { data: discStatus } = useQuery({
|
||||||
queryKey: ["discovery-status"],
|
queryKey: ["discovery-status"],
|
||||||
queryFn: () => getDiscoveryStatus().then(r => r.data),
|
queryFn: () => getDiscoveryStatus().then(r => r.data),
|
||||||
staleTime: 60_000,
|
staleTime: 10_000,
|
||||||
|
refetchInterval: (query) => query.state.data?.progress?.running ? 10_000 : 60_000,
|
||||||
});
|
});
|
||||||
|
|
||||||
const refreshMut = useMutation({
|
const refreshMut = useMutation({
|
||||||
@@ -247,11 +248,13 @@ export default function DiscoveryPage() {
|
|||||||
<div className="flex flex-col gap-6">
|
<div className="flex flex-col gap-6">
|
||||||
{/* Header */}
|
{/* Header */}
|
||||||
<div className="flex items-center justify-between gap-4">
|
<div className="flex items-center justify-between gap-4">
|
||||||
<div className="min-w-0">
|
<div className="min-w-0 flex-1">
|
||||||
<h1 className="font-display font-bold text-2xl text-zinc-100">Discover</h1>
|
<h1 className="font-display font-bold text-2xl text-zinc-100">Discover</h1>
|
||||||
{discStatus && (
|
{discStatus && (
|
||||||
<p className="text-xs text-zinc-500 mt-0.5">
|
<p className="text-xs text-zinc-500 mt-0.5">
|
||||||
{discStatus.pending_count > 0
|
{discStatus.progress?.running
|
||||||
|
? `Finding channels… ${discStatus.progress.done} / ${discStatus.progress.total}`
|
||||||
|
: discStatus.pending_count > 0
|
||||||
? `${discStatus.pending_count} channel${discStatus.pending_count !== 1 ? "s" : ""} queued`
|
? `${discStatus.pending_count} channel${discStatus.pending_count !== 1 ? "s" : ""} queued`
|
||||||
: "Queue empty"}
|
: "Queue empty"}
|
||||||
{discStatus.last_run
|
{discStatus.last_run
|
||||||
@@ -259,25 +262,38 @@ export default function DiscoveryPage() {
|
|||||||
: " · never refreshed"}
|
: " · never refreshed"}
|
||||||
</p>
|
</p>
|
||||||
)}
|
)}
|
||||||
|
{discStatus?.progress?.running && (
|
||||||
|
<div className="mt-1.5 h-1 w-48 rounded-full bg-zinc-800 overflow-hidden">
|
||||||
|
<div
|
||||||
|
className="h-full bg-accent rounded-full transition-all duration-500"
|
||||||
|
style={{ width: `${Math.round((discStatus.progress.done / discStatus.progress.total) * 100)}%` }}
|
||||||
|
/>
|
||||||
|
</div>
|
||||||
|
)}
|
||||||
</div>
|
</div>
|
||||||
<button
|
<button
|
||||||
onClick={() => refreshMut.mutate()}
|
onClick={() => refreshMut.mutate()}
|
||||||
disabled={refreshMut.isPending || refreshMut.isSuccess}
|
disabled={refreshMut.isPending || discStatus?.progress?.running}
|
||||||
className="shrink-0 flex items-center gap-2 text-sm font-medium px-4 py-2 bg-zinc-800 text-zinc-300 rounded-lg hover:bg-zinc-700 transition-colors disabled:opacity-60"
|
className="shrink-0 flex items-center gap-2 text-sm font-medium px-4 py-2 bg-zinc-800 text-zinc-300 rounded-lg hover:bg-zinc-700 transition-colors disabled:opacity-60"
|
||||||
>
|
>
|
||||||
{refreshMut.isPending && (
|
{discStatus?.progress?.running ? (
|
||||||
<svg className="w-3.5 h-3.5 animate-spin" fill="none" viewBox="0 0 24 24">
|
<svg className="w-3.5 h-3.5 animate-spin" fill="none" viewBox="0 0 24 24">
|
||||||
<circle className="opacity-25" cx="12" cy="12" r="10" stroke="currentColor" strokeWidth="4" />
|
<circle className="opacity-25" cx="12" cy="12" r="10" stroke="currentColor" strokeWidth="4" />
|
||||||
<path className="opacity-75" fill="currentColor" d="M4 12a8 8 0 018-8v4a4 4 0 00-4 4H4z" />
|
<path className="opacity-75" fill="currentColor" d="M4 12a8 8 0 018-8v4a4 4 0 00-4 4H4z" />
|
||||||
</svg>
|
</svg>
|
||||||
)}
|
) : refreshMut.isPending ? (
|
||||||
{refreshMut.isSuccess ? "Queued ✓" : refreshMut.isPending ? "Queueing…" : "Find more"}
|
<svg className="w-3.5 h-3.5 animate-spin" fill="none" viewBox="0 0 24 24">
|
||||||
|
<circle className="opacity-25" cx="12" cy="12" r="10" stroke="currentColor" strokeWidth="4" />
|
||||||
|
<path className="opacity-75" fill="currentColor" d="M4 12a8 8 0 018-8v4a4 4 0 00-4 4H4z" />
|
||||||
|
</svg>
|
||||||
|
) : null}
|
||||||
|
{discStatus?.progress?.running ? "Running…" : refreshMut.isSuccess ? "Queued ✓" : refreshMut.isPending ? "Queueing…" : "Find more"}
|
||||||
</button>
|
</button>
|
||||||
</div>
|
</div>
|
||||||
|
|
||||||
{refreshMut.isSuccess && (
|
{(refreshMut.isSuccess || discStatus?.progress?.running) && (
|
||||||
<div className="px-4 py-3 rounded-xl bg-zinc-800/60 border border-zinc-700/50 text-sm text-zinc-400">
|
<div className="px-4 py-3 rounded-xl bg-zinc-800/60 border border-zinc-700/50 text-sm text-zinc-400">
|
||||||
Discovery is running in the background — it searches YouTube using your tags and interests and takes a few minutes. New channels will appear when it finishes. It also runs automatically every day.
|
Discovery is running in the background — searches and channel fetches are spaced out over ~20 minutes to avoid hitting limits. New channels appear as each batch completes. Runs automatically every day.
|
||||||
</div>
|
</div>
|
||||||
)}
|
)}
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user