From c26fc3483c2f9457b08523ef1cbd487f50246e83 Mon Sep 17 00:00:00 2001 From: Mattias Thall Date: Wed, 27 May 2026 00:34:59 +0200 Subject: [PATCH] Rate limit only background batch fetches, not user requests fetch_video_metadata and fetch_channel_metadata now take polite=True for background tasks (enforces 5s+ gap via global lock) while user-facing calls (watch page, follow channel, download) use polite=False and run immediately. Co-Authored-By: Claude Sonnet 4.6 --- backend/routers/channels.py | 10 +++++----- backend/services/ytdlp.py | 19 ++++++++++++------- 2 files changed, 17 insertions(+), 12 deletions(-) diff --git a/backend/routers/channels.py b/backend/routers/channels.py index 07fad91..65d0de1 100644 --- a/backend/routers/channels.py +++ b/backend/routers/channels.py @@ -92,7 +92,7 @@ def _index_channel_task(channel_id: int, user_id: int, max_videos: int = 30): if not channel: return - result = ytdlp.fetch_channel_metadata(channel.youtube_channel_id, max_videos=max_videos) + result = ytdlp.fetch_channel_metadata(channel.youtube_channel_id, max_videos=max_videos, polite=True) if not result: return @@ -206,7 +206,7 @@ def _enrich_missing_task(limit: int = 20): ).mappings().all() for row in rows: try: - meta = ytdlp.fetch_video_metadata(row["youtube_video_id"]) + meta = ytdlp.fetch_video_metadata(row["youtube_video_id"], polite=True) if meta: vid = db.query(Video).filter_by(id=row["id"]).first() if vid: @@ -820,7 +820,7 @@ def _fetch_popular_task(channel_id: int, youtube_channel_id: str, channel_name: try: for yt_id in video_ids: try: - meta = ytdlp.fetch_video_metadata(yt_id) + meta = ytdlp.fetch_video_metadata(yt_id, polite=True) if meta: db = SessionLocal() try: @@ -866,7 +866,7 @@ def _search_channel_task(channel_id: int, youtube_channel_id: str, q: str, user_ db = SessionLocal() try: url = f"https://www.youtube.com/channel/{youtube_channel_id}/search?query={quote(q)}" - result = ytdlp.fetch_channel_metadata(youtube_channel_id, max_videos=100) + result = ytdlp.fetch_channel_metadata(youtube_channel_id, max_videos=100, polite=True) if not result: return # Filter results by query match (yt-dlp fetches all; we filter titles locally) @@ -1050,7 +1050,7 @@ def _index_channel_explore_task(channel_id: int, user_id: int, start_video: int, channel = db.query(Channel).filter_by(id=channel_id).first() if not channel: return - result = ytdlp.fetch_channel_metadata(channel.youtube_channel_id, max_videos=count, start_video=start_video) + result = ytdlp.fetch_channel_metadata(channel.youtube_channel_id, max_videos=count, start_video=start_video, polite=True) if not result: return for vdata in result.get("videos", []): diff --git a/backend/services/ytdlp.py b/backend/services/ytdlp.py index 100ebc7..b131959 100644 --- a/backend/services/ytdlp.py +++ b/backend/services/ytdlp.py @@ -244,8 +244,12 @@ def _best_thumbnail(thumbnails: list | None) -> str | None: return best[0].get("url") if best else None -def fetch_video_metadata(video_id: str) -> dict | None: - """Fetch metadata for a single video by YouTube ID.""" +def fetch_video_metadata(video_id: str, polite: bool = False) -> dict | None: + """Fetch metadata for a single video by YouTube ID. + + polite=True applies the global rate limiter (for background batch tasks). + polite=False (default) runs immediately for user-facing requests. + """ url = f"https://www.youtube.com/watch?v={video_id}" cookie_args = _cookie_args() print(f"[fetch_meta] video={video_id} cookie_args={cookie_args!r}", flush=True) @@ -253,13 +257,13 @@ def fetch_video_metadata(video_id: str) -> dict | None: "yt-dlp", url, "--dump-json", "--no-download", "--no-playlist", ] - stdout, stderr, code = _meta_run([*base_cmd, *cookie_args], timeout=30) + runner = _meta_run if polite else _run + stdout, stderr, code = runner([*base_cmd, *cookie_args], timeout=30) if code != 0: print(f"[fetch_meta] FAILED code={code} stderr={stderr[:500]!r}", flush=True) - # Retry without auth args — broken cookie config shouldn't block public videos if cookie_args: print(f"[fetch_meta] retrying without cookie args", flush=True) - stdout, stderr, code = _meta_run(base_cmd, timeout=30) + stdout, stderr, code = runner(base_cmd, timeout=30) if code != 0: print(f"[fetch_meta] retry also FAILED code={code}", flush=True) @@ -307,7 +311,7 @@ def _rss_dates(uc_channel_id: str) -> dict[str, datetime]: return {} -def fetch_channel_metadata(channel_id: str, max_videos: int = 30, start_video: int = 1) -> dict | None: +def fetch_channel_metadata(channel_id: str, max_videos: int = 30, start_video: int = 1, polite: bool = False) -> dict | None: """Fetch channel info + recent videos. Uses --dump-single-json --flat-playlist for speed, then enriches video dates @@ -330,7 +334,8 @@ def fetch_channel_metadata(channel_id: str, max_videos: int = 30, start_video: i end = (start_video - 1 + max_videos) if start_video > 1 else max_videos args += ["--playlist-end", str(end)] - stdout, _, code = _meta_run(args, timeout=60) + runner = _meta_run if polite else _run + stdout, _, code = runner(args, timeout=60) if not stdout.strip(): return None