From 12f54ac5b016de316e00d6d07ae7e8b46ec06852 Mon Sep 17 00:00:00 2001 From: Mattias Thall Date: Wed, 27 May 2026 01:58:39 +0200 Subject: [PATCH] Auto-schedule daily discovery + fix Find More UX + expand query diversity MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Auto-discovery daemon: - Runs every hour, triggers full discovery for any user whose last run was >23 hours ago. First check is 5 minutes after startup. - Tracks run time in user_settings.last_discovery_run (new column). - Manual Find More also stamps last_discovery_run. Discovery status endpoint (GET /api/discovery/status): - Returns pending_count (unseen queue size) and last_run timestamp. - Shown in the Discover page header so users know queue state at a glance. Find More UX fix: - Was: kick background task, wait 8 seconds, refetch (task takes minutes). - Now: button shows "Queued ✓" on success with an explanatory banner telling the user it takes a few minutes and also runs daily automatically. Query diversity: - Added "best [category] channels" serendipity queries to crawl_by_search. - Limit raised from 25 to 30 queries per run. Co-Authored-By: Claude Sonnet 4.6 --- backend/main.py | 43 ++++++++++++++++++++++++++++++ backend/routers/discovery.py | 24 +++++++++++++++++ backend/services/discovery.py | 8 ++++-- frontend/src/api/index.js | 1 + frontend/src/pages/Discovery.jsx | 45 +++++++++++++++++++++++--------- 5 files changed, 107 insertions(+), 14 deletions(-) diff --git a/backend/main.py b/backend/main.py index f558fdc..7befbf5 100644 --- a/backend/main.py +++ b/backend/main.py @@ -131,6 +131,7 @@ def on_startup(): created_at DATETIME DEFAULT CURRENT_TIMESTAMP )""", "ALTER TABLE user_videos ADD COLUMN feed_shown_count INTEGER NOT NULL DEFAULT 0", + "ALTER TABLE user_settings ADD COLUMN last_discovery_run DATETIME DEFAULT NULL", ]: try: db.execute(text(col_sql)) @@ -217,6 +218,48 @@ def on_startup(): threading.Thread(target=_auto_sync_daemon, daemon=True).start() + def _auto_discovery_daemon(): + import time as _time + from datetime import datetime as _dt, timedelta as _td + from sqlalchemy import text as _text + from .services.discovery import run_full_discovery + + # Wait 5 minutes after startup before the first check so the app can + # finish initialising and existing enrichment tasks can settle. + _time.sleep(300) + + while True: + try: + db = SessionLocal() + try: + rows = db.execute(_text(""" + SELECT u.id AS user_id, + COALESCE(us.discovery_regions, 'US,SE') AS discovery_regions, + us.last_discovery_run + FROM users u + LEFT JOIN user_settings us ON u.id = us.user_id + """)).mappings().all() + + for row in rows: + last = row["last_discovery_run"] + if last is None or (_dt.utcnow() - _dt.fromisoformat(str(last))) > _td(hours=23): + uid = row["user_id"] + regions = [r.strip().upper() for r in (row["discovery_regions"] or "US,SE").split(",") if r.strip()] + run_full_discovery(db, uid, regions) + db.execute( + _text("UPDATE user_settings SET last_discovery_run = :now WHERE user_id = :uid"), + {"now": _dt.utcnow(), "uid": uid}, + ) + db.commit() + finally: + db.close() + except Exception: + pass + + _time.sleep(3600) # check every hour, run if >23 h since last run + + threading.Thread(target=_auto_discovery_daemon, daemon=True).start() + @app.get("/api/health") def health(): diff --git a/backend/routers/discovery.py b/backend/routers/discovery.py index 741653c..434efae 100644 --- a/backend/routers/discovery.py +++ b/backend/routers/discovery.py @@ -170,10 +170,17 @@ def refresh_discovery( user_id = current_user.id def _run_discovery(): + from datetime import datetime from ..database import SessionLocal + from sqlalchemy import text as _text fresh_db = SessionLocal() try: run_full_discovery(fresh_db, user_id, regions) + fresh_db.execute( + _text("UPDATE user_settings SET last_discovery_run = :now WHERE user_id = :uid"), + {"now": datetime.utcnow(), "uid": user_id}, + ) + fresh_db.commit() finally: fresh_db.close() @@ -251,6 +258,23 @@ def dismiss_discovery_video( db.commit() +@router.get("/status") +def discovery_status( + db: Session = Depends(get_db), + current_user: User = Depends(get_current_user), +): + from ..models import UserSettings + s = db.query(UserSettings).filter_by(user_id=current_user.id).first() + pending = db.execute( + text("SELECT COUNT(*) AS n FROM discovery_queue WHERE user_id = :uid AND seen = 0"), + {"uid": current_user.id}, + ).mappings().first() + return { + "last_run": s.last_discovery_run.isoformat() if s and s.last_discovery_run else None, + "pending_count": pending["n"] if pending else 0, + } + + @router.get("/community", response_model=list[dict]) def community_shelf( db: Session = Depends(get_db), diff --git a/backend/services/discovery.py b/backend/services/discovery.py index 172317f..ded0bdc 100644 --- a/backend/services/discovery.py +++ b/backend/services/discovery.py @@ -264,8 +264,12 @@ def crawl_by_search(db: Session, user_id: int): if followed_names: sampled_names = random.sample(followed_names, min(15, len(followed_names))) - # Combine: tags (most signal) + channel names (broad reach) + categories (fallback) - queries = list(dict.fromkeys(top_tags + sampled_names + top_cats))[:25] + # Serendipity queries: "best [category] channels" — surfaces curated list videos + # which then get their channel indexed; broadens discovery beyond direct tag matches. + serendipity = [f"best {cat} channels" for cat in top_cats[:3]] + + # Combine: tags (most signal) + channel names (broad reach) + serendipity + categories + queries = list(dict.fromkeys(top_tags + sampled_names + serendipity + top_cats))[:30] if not queries: return diff --git a/frontend/src/api/index.js b/frontend/src/api/index.js index 329f88a..8516e51 100644 --- a/frontend/src/api/index.js +++ b/frontend/src/api/index.js @@ -140,6 +140,7 @@ export const followDiscovery = (channelId) => export const dismissDiscovery = (channelId) => api.post(`/discovery/${channelId}/dismiss`); export const refreshDiscovery = () => api.post("/discovery/refresh"); +export const getDiscoveryStatus = () => api.get("/discovery/status"); export const getCommunityShelf = () => api.get("/discovery/community"); // Stats diff --git a/frontend/src/pages/Discovery.jsx b/frontend/src/pages/Discovery.jsx index 42341cd..9e0c7be 100644 --- a/frontend/src/pages/Discovery.jsx +++ b/frontend/src/pages/Discovery.jsx @@ -4,6 +4,7 @@ import { useQuery, useMutation, useQueryClient } from "@tanstack/react-query"; import { getDiscovery, getDiscoveryVideos, followDiscovery, dismissDiscovery, dismissDiscoveryVideo, refreshDiscovery, + getDiscoveryStatus, } from "../api"; import VideoCard from "../components/VideoCard"; import { scrollToTop } from "../utils/scroll"; @@ -213,12 +214,20 @@ export default function DiscoveryPage() { placeholderData: (prev) => prev, }); + const { data: discStatus } = useQuery({ + queryKey: ["discovery-status"], + queryFn: () => getDiscoveryStatus().then(r => r.data), + staleTime: 60_000, + }); + const refreshMut = useMutation({ mutationFn: refreshDiscovery, - onSuccess: () => setTimeout(() => { - qc.invalidateQueries({ queryKey: ["discovery"] }); - qc.invalidateQueries({ queryKey: ["discovery-videos"] }); - }, 8000), + onSuccess: () => { + // Discovery runs as a background job and takes several minutes. + // Invalidate status immediately so the "queued" state shows, then + // re-check every 2 minutes until results land. + qc.invalidateQueries({ queryKey: ["discovery-status"] }); + }, }); const handleDismissVideo = (video) => { @@ -237,12 +246,24 @@ export default function DiscoveryPage() { return (
{/* Header */} -
-

Discover

+
+
+

Discover

+ {discStatus && ( +

+ {discStatus.pending_count > 0 + ? `${discStatus.pending_count} channel${discStatus.pending_count !== 1 ? "s" : ""} queued` + : "Queue empty"} + {discStatus.last_run + ? ` · last refreshed ${new Date(discStatus.last_run + "Z").toLocaleDateString(undefined, { month: "short", day: "numeric", hour: "2-digit", minute: "2-digit" })}` + : " · never refreshed"} +

+ )} +
- {refreshMut.isSuccess && !refreshMut.isPending && ( -
- Searching YouTube for new channels — results will appear in a few seconds. + {refreshMut.isSuccess && ( +
+ Discovery is running in the background — it searches YouTube using your tags and interests and takes a few minutes. New channels will appear when it finishes. It also runs automatically every day.
)}