Add global yt-dlp metadata rate limiter (5s + jitter between calls)
All fetch_video_metadata / fetch_channel_metadata / fetch_channel_playlists / fetch_available_subs calls now go through _meta_run which enforces a minimum 5s gap (+ 0.5-2.5s random jitter) across all concurrent tasks. Per-task sleep loops removed since the global lock serializes everything. Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
@@ -1,8 +1,10 @@
|
||||
"""Subprocess wrapper for yt-dlp."""
|
||||
import json
|
||||
import random
|
||||
import re
|
||||
import subprocess
|
||||
import threading
|
||||
import time
|
||||
import urllib.request
|
||||
import xml.etree.ElementTree as ET
|
||||
from datetime import datetime, timezone
|
||||
@@ -17,6 +19,24 @@ def _run(args: list[str], timeout: int = 60) -> tuple[str, str, int]:
|
||||
return result.stdout, result.stderr, result.returncode
|
||||
|
||||
|
||||
# Global rate limiter for all metadata fetches — prevents concurrent tasks from
|
||||
# hammering YouTube and invalidating cookies.
|
||||
_meta_lock = threading.Lock()
|
||||
_meta_last_call: float = 0.0
|
||||
_META_MIN_GAP = 5.0 # seconds between any two metadata requests
|
||||
|
||||
|
||||
def _meta_run(args: list[str], timeout: int = 60) -> tuple[str, str, int]:
|
||||
global _meta_last_call
|
||||
with _meta_lock:
|
||||
now = time.monotonic()
|
||||
wait = _META_MIN_GAP - (now - _meta_last_call)
|
||||
if wait > 0:
|
||||
time.sleep(wait + random.uniform(0.5, 2.5))
|
||||
_meta_last_call = time.monotonic()
|
||||
return _run(args, timeout=timeout)
|
||||
|
||||
|
||||
def _parse_date(date_str: str | None) -> datetime | None:
|
||||
if not date_str:
|
||||
return None
|
||||
@@ -233,13 +253,13 @@ def fetch_video_metadata(video_id: str) -> dict | None:
|
||||
"yt-dlp", url,
|
||||
"--dump-json", "--no-download", "--no-playlist",
|
||||
]
|
||||
stdout, stderr, code = _run([*base_cmd, *cookie_args], timeout=30)
|
||||
stdout, stderr, code = _meta_run([*base_cmd, *cookie_args], timeout=30)
|
||||
if code != 0:
|
||||
print(f"[fetch_meta] FAILED code={code} stderr={stderr[:500]!r}", flush=True)
|
||||
# Retry without auth args — broken cookie config shouldn't block public videos
|
||||
if cookie_args:
|
||||
print(f"[fetch_meta] retrying without cookie args", flush=True)
|
||||
stdout, stderr, code = _run(base_cmd, timeout=30)
|
||||
stdout, stderr, code = _meta_run(base_cmd, timeout=30)
|
||||
if code != 0:
|
||||
print(f"[fetch_meta] retry also FAILED code={code}", flush=True)
|
||||
|
||||
@@ -310,7 +330,7 @@ def fetch_channel_metadata(channel_id: str, max_videos: int = 30, start_video: i
|
||||
end = (start_video - 1 + max_videos) if start_video > 1 else max_videos
|
||||
args += ["--playlist-end", str(end)]
|
||||
|
||||
stdout, _, code = _run(args, timeout=60)
|
||||
stdout, _, code = _meta_run(args, timeout=60)
|
||||
if not stdout.strip():
|
||||
return None
|
||||
|
||||
@@ -359,7 +379,7 @@ def fetch_channel_playlists(channel_id: str, max_results: int = 100) -> list[dic
|
||||
url = f"https://www.youtube.com/{channel_id}/playlists"
|
||||
else:
|
||||
url = f"https://www.youtube.com/channel/{channel_id}/playlists"
|
||||
stdout, _, code = _run([
|
||||
stdout, _, code = _meta_run([
|
||||
"yt-dlp", url,
|
||||
"--dump-json", "--flat-playlist",
|
||||
"--playlist-end", str(max_results),
|
||||
@@ -552,9 +572,9 @@ def fetch_available_subs(video_id: str) -> dict:
|
||||
url = f"https://www.youtube.com/watch?v={video_id}"
|
||||
base_cmd = ["yt-dlp", url, "--dump-json", "--no-download", "--no-playlist"]
|
||||
cookie_args = _cookie_args()
|
||||
stdout, _, code = _run([*base_cmd, *cookie_args], timeout=30)
|
||||
stdout, _, code = _meta_run([*base_cmd, *cookie_args], timeout=30)
|
||||
if code != 0 and cookie_args:
|
||||
stdout, _, code = _run(base_cmd, timeout=30)
|
||||
stdout, _, code = _meta_run(base_cmd, timeout=30)
|
||||
|
||||
for line in stdout.splitlines():
|
||||
line = line.strip()
|
||||
|
||||
Reference in New Issue
Block a user