Add global yt-dlp metadata rate limiter (5s + jitter between calls)

All fetch_video_metadata / fetch_channel_metadata / fetch_channel_playlists
/ fetch_available_subs calls now go through _meta_run which enforces a
minimum 5s gap (+ 0.5-2.5s random jitter) across all concurrent tasks.
Per-task sleep loops removed since the global lock serializes everything.

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
2026-05-27 00:31:14 +02:00
parent 15e6b94cbf
commit c180c293b0
2 changed files with 29 additions and 17 deletions

View File

@@ -1,8 +1,10 @@
"""Subprocess wrapper for yt-dlp."""
import json
import random
import re
import subprocess
import threading
import time
import urllib.request
import xml.etree.ElementTree as ET
from datetime import datetime, timezone
@@ -17,6 +19,24 @@ def _run(args: list[str], timeout: int = 60) -> tuple[str, str, int]:
return result.stdout, result.stderr, result.returncode
# Global rate limiter for all metadata fetches — prevents concurrent tasks from
# hammering YouTube and invalidating cookies.
_meta_lock = threading.Lock()
_meta_last_call: float = 0.0
_META_MIN_GAP = 5.0 # seconds between any two metadata requests
def _meta_run(args: list[str], timeout: int = 60) -> tuple[str, str, int]:
global _meta_last_call
with _meta_lock:
now = time.monotonic()
wait = _META_MIN_GAP - (now - _meta_last_call)
if wait > 0:
time.sleep(wait + random.uniform(0.5, 2.5))
_meta_last_call = time.monotonic()
return _run(args, timeout=timeout)
def _parse_date(date_str: str | None) -> datetime | None:
if not date_str:
return None
@@ -233,13 +253,13 @@ def fetch_video_metadata(video_id: str) -> dict | None:
"yt-dlp", url,
"--dump-json", "--no-download", "--no-playlist",
]
stdout, stderr, code = _run([*base_cmd, *cookie_args], timeout=30)
stdout, stderr, code = _meta_run([*base_cmd, *cookie_args], timeout=30)
if code != 0:
print(f"[fetch_meta] FAILED code={code} stderr={stderr[:500]!r}", flush=True)
# Retry without auth args — broken cookie config shouldn't block public videos
if cookie_args:
print(f"[fetch_meta] retrying without cookie args", flush=True)
stdout, stderr, code = _run(base_cmd, timeout=30)
stdout, stderr, code = _meta_run(base_cmd, timeout=30)
if code != 0:
print(f"[fetch_meta] retry also FAILED code={code}", flush=True)
@@ -310,7 +330,7 @@ def fetch_channel_metadata(channel_id: str, max_videos: int = 30, start_video: i
end = (start_video - 1 + max_videos) if start_video > 1 else max_videos
args += ["--playlist-end", str(end)]
stdout, _, code = _run(args, timeout=60)
stdout, _, code = _meta_run(args, timeout=60)
if not stdout.strip():
return None
@@ -359,7 +379,7 @@ def fetch_channel_playlists(channel_id: str, max_results: int = 100) -> list[dic
url = f"https://www.youtube.com/{channel_id}/playlists"
else:
url = f"https://www.youtube.com/channel/{channel_id}/playlists"
stdout, _, code = _run([
stdout, _, code = _meta_run([
"yt-dlp", url,
"--dump-json", "--flat-playlist",
"--playlist-end", str(max_results),
@@ -552,9 +572,9 @@ def fetch_available_subs(video_id: str) -> dict:
url = f"https://www.youtube.com/watch?v={video_id}"
base_cmd = ["yt-dlp", url, "--dump-json", "--no-download", "--no-playlist"]
cookie_args = _cookie_args()
stdout, _, code = _run([*base_cmd, *cookie_args], timeout=30)
stdout, _, code = _meta_run([*base_cmd, *cookie_args], timeout=30)
if code != 0 and cookie_args:
stdout, _, code = _run(base_cmd, timeout=30)
stdout, _, code = _meta_run(base_cmd, timeout=30)
for line in stdout.splitlines():
line = line.strip()