Parallelize discovery searches and add graph signal

Run search queries concurrently (5 workers) instead of sequentially — cuts crawl time dramatically. Add graph signal: fetch featured channels from followed channels' /channels tab in parallel (4 workers), which surfaces creator-curated recommendations as a high-signal, diverse pool that search alone can't reach. Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
2026-05-26 21:59:23 +02:00
parent 62c2c73906
commit 871f668525
2 changed files with 116 additions and 5 deletions
--- a/backend/services/ytdlp.py
+++ b/backend/services/ytdlp.py
@@ -350,6 +350,39 @@ def fetch_channel_metadata(channel_id: str, max_videos: int = 30) -> dict | None
    return {"channel": channel_info, "videos": videos}


+def fetch_featured_channels(channel_id: str) -> list[str]:
+    """Fetch channel IDs from the /channels tab of a YouTube channel.
+
+    The /channels tab lists channels the creator explicitly recommends — a very
+    high-signal source for discovery. Returns UC... channel IDs.
+    """
+    if channel_id.startswith("@"):
+        url = f"https://www.youtube.com/{channel_id}/channels"
+    else:
+        url = f"https://www.youtube.com/channel/{channel_id}/channels"
+    stdout, _, code = _run([
+        "yt-dlp", url,
+        "--dump-json",
+        "--flat-playlist",
+        "--quiet",
+        *_cookie_args(),
+    ], timeout=30)
+
+    channel_ids: list[str] = []
+    for line in stdout.splitlines():
+        line = line.strip()
+        if not line:
+            continue
+        try:
+            info = json.loads(line)
+            ch_id = info.get("channel_id") or info.get("id")
+            if ch_id and ch_id.startswith("UC"):
+                channel_ids.append(ch_id)
+        except json.JSONDecodeError:
+            continue
+    return channel_ids
+
+
 def fetch_channel_links(channel_id: str) -> list[str]:
    """Extract linked channel IDs from a channel's about/description."""
    if channel_id.startswith("@"):