Auto-schedule daily discovery + fix Find More UX + expand query diversity

Auto-discovery daemon: - Runs every hour, triggers full discovery for any user whose last run was >23 hours ago. First check is 5 minutes after startup. - Tracks run time in user_settings.last_discovery_run (new column). - Manual Find More also stamps last_discovery_run. Discovery status endpoint (GET /api/discovery/status): - Returns pending_count (unseen queue size) and last_run timestamp. - Shown in the Discover page header so users know queue state at a glance. Find More UX fix: - Was: kick background task, wait 8 seconds, refetch (task takes minutes). - Now: button shows "Queued ✓" on success with an explanatory banner telling the user it takes a few minutes and also runs daily automatically. Query diversity: - Added "best [category] channels" serendipity queries to crawl_by_search. - Limit raised from 25 to 30 queries per run. Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
2026-05-27 01:58:39 +02:00
parent 4d255647a1
commit 12f54ac5b0
5 changed files with 107 additions and 14 deletions
--- a/backend/main.py
+++ b/backend/main.py
@@ -131,6 +131,7 @@ def on_startup():
            created_at DATETIME DEFAULT CURRENT_TIMESTAMP
        )""",
        "ALTER TABLE user_videos ADD COLUMN feed_shown_count INTEGER NOT NULL DEFAULT 0",
+        "ALTER TABLE user_settings ADD COLUMN last_discovery_run DATETIME DEFAULT NULL",
    ]:
        try:
            db.execute(text(col_sql))
@@ -217,6 +218,48 @@ def on_startup():

    threading.Thread(target=_auto_sync_daemon, daemon=True).start()

+    def _auto_discovery_daemon():
+        import time as _time
+        from datetime import datetime as _dt, timedelta as _td
+        from sqlalchemy import text as _text
+        from .services.discovery import run_full_discovery
+
+        # Wait 5 minutes after startup before the first check so the app can
+        # finish initialising and existing enrichment tasks can settle.
+        _time.sleep(300)
+
+        while True:
+            try:
+                db = SessionLocal()
+                try:
+                    rows = db.execute(_text("""
+                        SELECT u.id AS user_id,
+                               COALESCE(us.discovery_regions, 'US,SE') AS discovery_regions,
+                               us.last_discovery_run
+                        FROM users u
+                        LEFT JOIN user_settings us ON u.id = us.user_id
+                    """)).mappings().all()
+
+                    for row in rows:
+                        last = row["last_discovery_run"]
+                        if last is None or (_dt.utcnow() - _dt.fromisoformat(str(last))) > _td(hours=23):
+                            uid = row["user_id"]
+                            regions = [r.strip().upper() for r in (row["discovery_regions"] or "US,SE").split(",") if r.strip()]
+                            run_full_discovery(db, uid, regions)
+                            db.execute(
+                                _text("UPDATE user_settings SET last_discovery_run = :now WHERE user_id = :uid"),
+                                {"now": _dt.utcnow(), "uid": uid},
+                            )
+                            db.commit()
+                finally:
+                    db.close()
+            except Exception:
+                pass
+
+            _time.sleep(3600)  # check every hour, run if >23 h since last run
+
+    threading.Thread(target=_auto_discovery_daemon, daemon=True).start()
+

@app.get("/api/health")
 def health():
--- a/backend/routers/discovery.py
+++ b/backend/routers/discovery.py
@@ -170,10 +170,17 @@ def refresh_discovery(
    user_id = current_user.id

    def _run_discovery():
+        from datetime import datetime
        from ..database import SessionLocal
+        from sqlalchemy import text as _text
        fresh_db = SessionLocal()
        try:
            run_full_discovery(fresh_db, user_id, regions)
+            fresh_db.execute(
+                _text("UPDATE user_settings SET last_discovery_run = :now WHERE user_id = :uid"),
+                {"now": datetime.utcnow(), "uid": user_id},
+            )
+            fresh_db.commit()
        finally:
            fresh_db.close()

@@ -251,6 +258,23 @@ def dismiss_discovery_video(
    db.commit()


+@router.get("/status")
+def discovery_status(
+    db: Session = Depends(get_db),
+    current_user: User = Depends(get_current_user),
+):
+    from ..models import UserSettings
+    s = db.query(UserSettings).filter_by(user_id=current_user.id).first()
+    pending = db.execute(
+        text("SELECT COUNT(*) AS n FROM discovery_queue WHERE user_id = :uid AND seen = 0"),
+        {"uid": current_user.id},
+    ).mappings().first()
+    return {
+        "last_run": s.last_discovery_run.isoformat() if s and s.last_discovery_run else None,
+        "pending_count": pending["n"] if pending else 0,
+    }
+
+
@router.get("/community", response_model=list[dict])
 def community_shelf(
    db: Session = Depends(get_db),
--- a/backend/services/discovery.py
+++ b/backend/services/discovery.py
@@ -264,8 +264,12 @@ def crawl_by_search(db: Session, user_id: int):
    if followed_names:
        sampled_names = random.sample(followed_names, min(15, len(followed_names)))

-    # Combine: tags (most signal) + channel names (broad reach) + categories (fallback)
-    queries = list(dict.fromkeys(top_tags + sampled_names + top_cats))[:25]
+    # Serendipity queries: "best [category] channels" — surfaces curated list videos
+    # which then get their channel indexed; broadens discovery beyond direct tag matches.
+    serendipity = [f"best {cat} channels" for cat in top_cats[:3]]
+
+    # Combine: tags (most signal) + channel names (broad reach) + serendipity + categories
+    queries = list(dict.fromkeys(top_tags + sampled_names + serendipity + top_cats))[:30]
    if not queries:
        return