diff --git a/backend/routers/channels.py b/backend/routers/channels.py index 375f78b..07fad91 100644 --- a/backend/routers/channels.py +++ b/backend/routers/channels.py @@ -80,11 +80,7 @@ def _get_channel_or_404(db: Session, channel_id: int) -> Channel: def _index_channels_batch(channel_ids: list[int], user_id: int, delay: float = 1.5): - """Run channel syncs sequentially with a polite delay between requests.""" - import time - for i, cid in enumerate(channel_ids): - if i > 0: - time.sleep(delay) + for cid in channel_ids: _index_channel_task(cid, user_id) @@ -208,9 +204,7 @@ def _enrich_missing_task(limit: int = 20): """), {"limit": limit}, ).mappings().all() - for i, row in enumerate(rows): - if i > 0: - time.sleep(2.0) + for row in rows: try: meta = ytdlp.fetch_video_metadata(row["youtube_video_id"]) if meta: @@ -824,9 +818,7 @@ def _fetch_popular_task(channel_id: int, youtube_channel_id: str, channel_name: _tasks[task_id]["done"] = 0 try: - for i, yt_id in enumerate(video_ids): - if i > 0: - time.sleep(2.0) + for yt_id in video_ids: try: meta = ytdlp.fetch_video_metadata(yt_id) if meta: diff --git a/backend/services/ytdlp.py b/backend/services/ytdlp.py index 197fde0..100ebc7 100644 --- a/backend/services/ytdlp.py +++ b/backend/services/ytdlp.py @@ -1,8 +1,10 @@ """Subprocess wrapper for yt-dlp.""" import json +import random import re import subprocess import threading +import time import urllib.request import xml.etree.ElementTree as ET from datetime import datetime, timezone @@ -17,6 +19,24 @@ def _run(args: list[str], timeout: int = 60) -> tuple[str, str, int]: return result.stdout, result.stderr, result.returncode +# Global rate limiter for all metadata fetches — prevents concurrent tasks from +# hammering YouTube and invalidating cookies. +_meta_lock = threading.Lock() +_meta_last_call: float = 0.0 +_META_MIN_GAP = 5.0 # seconds between any two metadata requests + + +def _meta_run(args: list[str], timeout: int = 60) -> tuple[str, str, int]: + global _meta_last_call + with _meta_lock: + now = time.monotonic() + wait = _META_MIN_GAP - (now - _meta_last_call) + if wait > 0: + time.sleep(wait + random.uniform(0.5, 2.5)) + _meta_last_call = time.monotonic() + return _run(args, timeout=timeout) + + def _parse_date(date_str: str | None) -> datetime | None: if not date_str: return None @@ -233,13 +253,13 @@ def fetch_video_metadata(video_id: str) -> dict | None: "yt-dlp", url, "--dump-json", "--no-download", "--no-playlist", ] - stdout, stderr, code = _run([*base_cmd, *cookie_args], timeout=30) + stdout, stderr, code = _meta_run([*base_cmd, *cookie_args], timeout=30) if code != 0: print(f"[fetch_meta] FAILED code={code} stderr={stderr[:500]!r}", flush=True) # Retry without auth args — broken cookie config shouldn't block public videos if cookie_args: print(f"[fetch_meta] retrying without cookie args", flush=True) - stdout, stderr, code = _run(base_cmd, timeout=30) + stdout, stderr, code = _meta_run(base_cmd, timeout=30) if code != 0: print(f"[fetch_meta] retry also FAILED code={code}", flush=True) @@ -310,7 +330,7 @@ def fetch_channel_metadata(channel_id: str, max_videos: int = 30, start_video: i end = (start_video - 1 + max_videos) if start_video > 1 else max_videos args += ["--playlist-end", str(end)] - stdout, _, code = _run(args, timeout=60) + stdout, _, code = _meta_run(args, timeout=60) if not stdout.strip(): return None @@ -359,7 +379,7 @@ def fetch_channel_playlists(channel_id: str, max_results: int = 100) -> list[dic url = f"https://www.youtube.com/{channel_id}/playlists" else: url = f"https://www.youtube.com/channel/{channel_id}/playlists" - stdout, _, code = _run([ + stdout, _, code = _meta_run([ "yt-dlp", url, "--dump-json", "--flat-playlist", "--playlist-end", str(max_results), @@ -552,9 +572,9 @@ def fetch_available_subs(video_id: str) -> dict: url = f"https://www.youtube.com/watch?v={video_id}" base_cmd = ["yt-dlp", url, "--dump-json", "--no-download", "--no-playlist"] cookie_args = _cookie_args() - stdout, _, code = _run([*base_cmd, *cookie_args], timeout=30) + stdout, _, code = _meta_run([*base_cmd, *cookie_args], timeout=30) if code != 0 and cookie_args: - stdout, _, code = _run(base_cmd, timeout=30) + stdout, _, code = _meta_run(base_cmd, timeout=30) for line in stdout.splitlines(): line = line.strip()