Popular tab: rank by real view_count, drop broken ?sort=p URL

yt-dlp's own test suite marks channel sort as 'Query for sorting no longer works' — YouTube blocked it. New approach: fetch view_count for up to 200 indexed videos in parallel (8 workers, prioritising those missing counts), then Popular tab sorts by view_count DESC WHERE view_count IS NOT NULL. Accurate for any channel once enrichment runs. Frontend refetch wait raised to 60s to cover ~200 parallel fetches. Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
2026-05-26 23:02:03 +02:00
parent 3e699d61b6
commit ff4d8e4ab4
2 changed files with 55 additions and 149 deletions
--- a/backend/routers/channels.py
+++ b/backend/routers/channels.py
@@ -618,47 +618,28 @@ def get_channel_videos(
        q_clause = "AND (v.title LIKE :q OR v.description LIKE :q)"
        params["q"] = f"%{q.strip()}%"

-    if sort == "popular":
-        try:
-            rows = db.execute(
-                text(f"""
-                    SELECT v.id, v.youtube_video_id, v.title, v.thumbnail_url,
-                           v.duration_seconds, v.published_at, v.view_count,
-                           COALESCE(uv.downloaded, 0) AS is_downloaded,
-                           COALESCE(uv.watched, 0) AS is_watched
-                    FROM channel_popular_videos cpv
-                    JOIN videos v ON cpv.video_id = v.id
-                    LEFT JOIN user_videos uv ON v.id = uv.video_id AND uv.user_id = :user_id
-                    WHERE cpv.channel_id = :channel_id {q_clause}
-                    ORDER BY cpv.rank ASC
-                    LIMIT :limit OFFSET :offset
-                """),
-                params,
-            ).mappings().all()
-        except Exception:
-            rows = []
-        return [VideoOut(**dict(r)) for r in rows]
-    else:
-        order = {
-            "newest":   "v.published_at DESC NULLS LAST",
-            "oldest":   "v.published_at ASC NULLS LAST",
-            "title":    "v.title ASC",
-            "unwatched":"COALESCE(uv.watched, 0) ASC, v.published_at DESC NULLS LAST",
-        }.get(sort, "v.published_at DESC NULLS LAST")
-        rows = db.execute(
-            text(f"""
-                SELECT v.id, v.youtube_video_id, v.title, v.thumbnail_url,
-                       v.duration_seconds, v.published_at, v.view_count,
-                       COALESCE(uv.downloaded, 0) AS is_downloaded,
-                       COALESCE(uv.watched, 0) AS is_watched
-                FROM videos v
-                LEFT JOIN user_videos uv ON v.id = uv.video_id AND uv.user_id = :user_id
-                WHERE v.channel_id = :channel_id {q_clause}
-                ORDER BY {order}
-                LIMIT :limit OFFSET :offset
-            """),
-            params,
-        ).mappings().all()
+    order = {
+        "newest":   "v.published_at DESC NULLS LAST",
+        "oldest":   "v.published_at ASC NULLS LAST",
+        "title":    "v.title ASC",
+        "unwatched":"COALESCE(uv.watched, 0) ASC, v.published_at DESC NULLS LAST",
+        "popular":  "v.view_count DESC NULLS LAST",
+    }.get(sort, "v.published_at DESC NULLS LAST")
+    view_count_clause = "AND v.view_count IS NOT NULL" if sort == "popular" else ""
+    rows = db.execute(
+        text(f"""
+            SELECT v.id, v.youtube_video_id, v.title, v.thumbnail_url,
+                   v.duration_seconds, v.published_at, v.view_count,
+                   COALESCE(uv.downloaded, 0) AS is_downloaded,
+                   COALESCE(uv.watched, 0) AS is_watched
+            FROM videos v
+            LEFT JOIN user_videos uv ON v.id = uv.video_id AND uv.user_id = :user_id
+            WHERE v.channel_id = :channel_id {view_count_clause} {q_clause}
+            ORDER BY {order}
+            LIMIT :limit OFFSET :offset
+        """),
+        params,
+    ).mappings().all()
    return [VideoOut(**dict(r)) for r in rows]


@@ -676,119 +657,44 @@ def fetch_popular_videos(


 def _fetch_popular_task(channel_id: int, youtube_channel_id: str):
-    """Fetch popular videos in two phases.
+    """Enrich indexed videos with view_count so Popular tab can rank them.

-    Phase 1 (fast): flat-playlist to get IDs + basic info, write to DB
-    immediately so the Popular tab populates within seconds.
-    Phase 2 (background thread): enrich each video with view_count and
-    published_at via individual fetches — runs while the user is already
-    browsing.
+    YouTube's ?sort=p is broken in yt-dlp (their own tests mark it skipped).
+    Instead we fetch real view counts for all indexed videos via individual
+    page requests and sort by view_count DESC locally.
+    Prioritises videos missing view_count; caps at 200 per run.
    """
-    import threading
-    from ..database import SessionLocal
-
-    if youtube_channel_id.startswith("@"):
-        url = f"https://www.youtube.com/{youtube_channel_id}/videos?sort=p"
-    else:
-        url = f"https://www.youtube.com/channel/{youtube_channel_id}/videos?sort=p"
-
-    stdout, _, _ = ytdlp._run([
-        "yt-dlp", url,
-        "--dump-json", "--flat-playlist",
-        "--playlist-end", "30",
-        "--quiet",
-        *ytdlp._cookie_args(),
-    ], timeout=60)
-
-    entries = []
-    for line in stdout.splitlines():
-        line = line.strip()
-        if not line:
-            continue
-        try:
-            info = json.loads(line)
-            yt_id = info.get("id")
-            if yt_id:
-                entries.append({"id": yt_id, "title": info.get("title", ""), "duration": info.get("duration")})
-        except json.JSONDecodeError:
-            continue
-
-    if not entries:
-        return
-
-    # Phase 1: store with basic info and write popular ranks immediately
-    db = SessionLocal()
-    try:
-        # Ensure table exists regardless of whether startup migration ran
-        db.execute(text("""
-            CREATE TABLE IF NOT EXISTS channel_popular_videos (
-                id INTEGER PRIMARY KEY AUTOINCREMENT,
-                channel_id INTEGER NOT NULL REFERENCES channels(id) ON DELETE CASCADE,
-                video_id INTEGER NOT NULL REFERENCES videos(id) ON DELETE CASCADE,
-                rank INTEGER NOT NULL,
-                fetched_at DATETIME DEFAULT CURRENT_TIMESTAMP,
-                UNIQUE(channel_id, video_id)
-            )
-        """))
-        db.commit()
-
-        channel = db.query(Channel).filter_by(id=channel_id).first()
-        if not channel:
-            return
-
-        try:
-            db.execute(text("DELETE FROM channel_popular_videos WHERE channel_id = :cid"), {"cid": channel_id})
-            db.commit()
-        except Exception:
-            db.rollback()
-
-        for rank, entry in enumerate(entries, start=1):
-            yt_id = entry["id"]
-            try:
-                existing = db.query(Video).filter_by(youtube_video_id=yt_id).first()
-                if existing:
-                    video_id = existing.id
-                else:
-                    v = Video(
-                        youtube_video_id=yt_id,
-                        channel_id=channel.id,
-                        title=entry["title"],
-                        thumbnail_url=ytdlp._stable_thumbnail(yt_id),
-                        duration_seconds=entry["duration"],
-                        tags="[]",
-                    )
-                    db.add(v)
-                    db.flush()
-                    video_id = v.id
-                db.execute(
-                    text("""
-                        INSERT INTO channel_popular_videos (channel_id, video_id, rank)
-                        VALUES (:cid, :vid, :rank)
-                        ON CONFLICT(channel_id, video_id) DO UPDATE SET rank = :rank
-                    """),
-                    {"cid": channel_id, "vid": video_id, "rank": rank},
-                )
-                db.commit()
-            except Exception:
-                db.rollback()
-    except Exception as e:
-        print(f"[popular] task error: {e}", flush=True)
-        db.rollback()
-    finally:
-        db.close()
-
-    # Phase 2: enrich view_count + dates in a daemon thread (non-blocking)
-    video_ids = [e["id"] for e in entries]
-    threading.Thread(target=_enrich_popular_videos, args=(video_ids,), daemon=True).start()
-
-
-def _enrich_popular_videos(video_ids: list):
    from ..database import SessionLocal
    from concurrent.futures import ThreadPoolExecutor, as_completed

+    db = SessionLocal()
+    try:
+        # Videos without view_count first, then those with stale counts
+        rows = db.execute(
+            text("""
+                SELECT youtube_video_id FROM videos
+                WHERE channel_id = :cid
+                ORDER BY (view_count IS NULL) DESC, published_at DESC NULLS LAST
+                LIMIT 200
+            """),
+            {"cid": channel_id},
+        ).mappings().all()
+        video_ids = [r["youtube_video_id"] for r in rows]
+    finally:
+        db.close()
+
+    if not video_ids:
+        return
+
    with ThreadPoolExecutor(max_workers=8) as pool:
        futures = {pool.submit(ytdlp.fetch_video_metadata, vid): vid for vid in video_ids}
-        results = {futures[f]: f.result() for f in as_completed(futures) if not f.exception()}
+        results = {}
+        for future in as_completed(futures):
+            vid = futures[future]
+            try:
+                results[vid] = future.result()
+            except Exception:
+                pass

    db = SessionLocal()
    try: