Parallelize discovery searches and add graph signal
Run search queries concurrently (5 workers) instead of sequentially — cuts crawl time dramatically. Add graph signal: fetch featured channels from followed channels' /channels tab in parallel (4 workers), which surfaces creator-curated recommendations as a high-signal, diverse pool that search alone can't reach. Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
@@ -350,6 +350,39 @@ def fetch_channel_metadata(channel_id: str, max_videos: int = 30) -> dict | None
|
||||
return {"channel": channel_info, "videos": videos}
|
||||
|
||||
|
||||
def fetch_featured_channels(channel_id: str) -> list[str]:
|
||||
"""Fetch channel IDs from the /channels tab of a YouTube channel.
|
||||
|
||||
The /channels tab lists channels the creator explicitly recommends — a very
|
||||
high-signal source for discovery. Returns UC... channel IDs.
|
||||
"""
|
||||
if channel_id.startswith("@"):
|
||||
url = f"https://www.youtube.com/{channel_id}/channels"
|
||||
else:
|
||||
url = f"https://www.youtube.com/channel/{channel_id}/channels"
|
||||
stdout, _, code = _run([
|
||||
"yt-dlp", url,
|
||||
"--dump-json",
|
||||
"--flat-playlist",
|
||||
"--quiet",
|
||||
*_cookie_args(),
|
||||
], timeout=30)
|
||||
|
||||
channel_ids: list[str] = []
|
||||
for line in stdout.splitlines():
|
||||
line = line.strip()
|
||||
if not line:
|
||||
continue
|
||||
try:
|
||||
info = json.loads(line)
|
||||
ch_id = info.get("channel_id") or info.get("id")
|
||||
if ch_id and ch_id.startswith("UC"):
|
||||
channel_ids.append(ch_id)
|
||||
except json.JSONDecodeError:
|
||||
continue
|
||||
return channel_ids
|
||||
|
||||
|
||||
def fetch_channel_links(channel_id: str) -> list[str]:
|
||||
"""Extract linked channel IDs from a channel's about/description."""
|
||||
if channel_id.startswith("@"):
|
||||
|
||||
Reference in New Issue
Block a user