Add global yt-dlp metadata rate limiter (5s + jitter between calls)
All fetch_video_metadata / fetch_channel_metadata / fetch_channel_playlists / fetch_available_subs calls now go through _meta_run which enforces a minimum 5s gap (+ 0.5-2.5s random jitter) across all concurrent tasks. Per-task sleep loops removed since the global lock serializes everything. Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
@@ -80,11 +80,7 @@ def _get_channel_or_404(db: Session, channel_id: int) -> Channel:
|
|||||||
|
|
||||||
|
|
||||||
def _index_channels_batch(channel_ids: list[int], user_id: int, delay: float = 1.5):
|
def _index_channels_batch(channel_ids: list[int], user_id: int, delay: float = 1.5):
|
||||||
"""Run channel syncs sequentially with a polite delay between requests."""
|
for cid in channel_ids:
|
||||||
import time
|
|
||||||
for i, cid in enumerate(channel_ids):
|
|
||||||
if i > 0:
|
|
||||||
time.sleep(delay)
|
|
||||||
_index_channel_task(cid, user_id)
|
_index_channel_task(cid, user_id)
|
||||||
|
|
||||||
|
|
||||||
@@ -208,9 +204,7 @@ def _enrich_missing_task(limit: int = 20):
|
|||||||
"""),
|
"""),
|
||||||
{"limit": limit},
|
{"limit": limit},
|
||||||
).mappings().all()
|
).mappings().all()
|
||||||
for i, row in enumerate(rows):
|
for row in rows:
|
||||||
if i > 0:
|
|
||||||
time.sleep(2.0)
|
|
||||||
try:
|
try:
|
||||||
meta = ytdlp.fetch_video_metadata(row["youtube_video_id"])
|
meta = ytdlp.fetch_video_metadata(row["youtube_video_id"])
|
||||||
if meta:
|
if meta:
|
||||||
@@ -824,9 +818,7 @@ def _fetch_popular_task(channel_id: int, youtube_channel_id: str, channel_name:
|
|||||||
_tasks[task_id]["done"] = 0
|
_tasks[task_id]["done"] = 0
|
||||||
|
|
||||||
try:
|
try:
|
||||||
for i, yt_id in enumerate(video_ids):
|
for yt_id in video_ids:
|
||||||
if i > 0:
|
|
||||||
time.sleep(2.0)
|
|
||||||
try:
|
try:
|
||||||
meta = ytdlp.fetch_video_metadata(yt_id)
|
meta = ytdlp.fetch_video_metadata(yt_id)
|
||||||
if meta:
|
if meta:
|
||||||
|
|||||||
@@ -1,8 +1,10 @@
|
|||||||
"""Subprocess wrapper for yt-dlp."""
|
"""Subprocess wrapper for yt-dlp."""
|
||||||
import json
|
import json
|
||||||
|
import random
|
||||||
import re
|
import re
|
||||||
import subprocess
|
import subprocess
|
||||||
import threading
|
import threading
|
||||||
|
import time
|
||||||
import urllib.request
|
import urllib.request
|
||||||
import xml.etree.ElementTree as ET
|
import xml.etree.ElementTree as ET
|
||||||
from datetime import datetime, timezone
|
from datetime import datetime, timezone
|
||||||
@@ -17,6 +19,24 @@ def _run(args: list[str], timeout: int = 60) -> tuple[str, str, int]:
|
|||||||
return result.stdout, result.stderr, result.returncode
|
return result.stdout, result.stderr, result.returncode
|
||||||
|
|
||||||
|
|
||||||
|
# Global rate limiter for all metadata fetches — prevents concurrent tasks from
|
||||||
|
# hammering YouTube and invalidating cookies.
|
||||||
|
_meta_lock = threading.Lock()
|
||||||
|
_meta_last_call: float = 0.0
|
||||||
|
_META_MIN_GAP = 5.0 # seconds between any two metadata requests
|
||||||
|
|
||||||
|
|
||||||
|
def _meta_run(args: list[str], timeout: int = 60) -> tuple[str, str, int]:
|
||||||
|
global _meta_last_call
|
||||||
|
with _meta_lock:
|
||||||
|
now = time.monotonic()
|
||||||
|
wait = _META_MIN_GAP - (now - _meta_last_call)
|
||||||
|
if wait > 0:
|
||||||
|
time.sleep(wait + random.uniform(0.5, 2.5))
|
||||||
|
_meta_last_call = time.monotonic()
|
||||||
|
return _run(args, timeout=timeout)
|
||||||
|
|
||||||
|
|
||||||
def _parse_date(date_str: str | None) -> datetime | None:
|
def _parse_date(date_str: str | None) -> datetime | None:
|
||||||
if not date_str:
|
if not date_str:
|
||||||
return None
|
return None
|
||||||
@@ -233,13 +253,13 @@ def fetch_video_metadata(video_id: str) -> dict | None:
|
|||||||
"yt-dlp", url,
|
"yt-dlp", url,
|
||||||
"--dump-json", "--no-download", "--no-playlist",
|
"--dump-json", "--no-download", "--no-playlist",
|
||||||
]
|
]
|
||||||
stdout, stderr, code = _run([*base_cmd, *cookie_args], timeout=30)
|
stdout, stderr, code = _meta_run([*base_cmd, *cookie_args], timeout=30)
|
||||||
if code != 0:
|
if code != 0:
|
||||||
print(f"[fetch_meta] FAILED code={code} stderr={stderr[:500]!r}", flush=True)
|
print(f"[fetch_meta] FAILED code={code} stderr={stderr[:500]!r}", flush=True)
|
||||||
# Retry without auth args — broken cookie config shouldn't block public videos
|
# Retry without auth args — broken cookie config shouldn't block public videos
|
||||||
if cookie_args:
|
if cookie_args:
|
||||||
print(f"[fetch_meta] retrying without cookie args", flush=True)
|
print(f"[fetch_meta] retrying without cookie args", flush=True)
|
||||||
stdout, stderr, code = _run(base_cmd, timeout=30)
|
stdout, stderr, code = _meta_run(base_cmd, timeout=30)
|
||||||
if code != 0:
|
if code != 0:
|
||||||
print(f"[fetch_meta] retry also FAILED code={code}", flush=True)
|
print(f"[fetch_meta] retry also FAILED code={code}", flush=True)
|
||||||
|
|
||||||
@@ -310,7 +330,7 @@ def fetch_channel_metadata(channel_id: str, max_videos: int = 30, start_video: i
|
|||||||
end = (start_video - 1 + max_videos) if start_video > 1 else max_videos
|
end = (start_video - 1 + max_videos) if start_video > 1 else max_videos
|
||||||
args += ["--playlist-end", str(end)]
|
args += ["--playlist-end", str(end)]
|
||||||
|
|
||||||
stdout, _, code = _run(args, timeout=60)
|
stdout, _, code = _meta_run(args, timeout=60)
|
||||||
if not stdout.strip():
|
if not stdout.strip():
|
||||||
return None
|
return None
|
||||||
|
|
||||||
@@ -359,7 +379,7 @@ def fetch_channel_playlists(channel_id: str, max_results: int = 100) -> list[dic
|
|||||||
url = f"https://www.youtube.com/{channel_id}/playlists"
|
url = f"https://www.youtube.com/{channel_id}/playlists"
|
||||||
else:
|
else:
|
||||||
url = f"https://www.youtube.com/channel/{channel_id}/playlists"
|
url = f"https://www.youtube.com/channel/{channel_id}/playlists"
|
||||||
stdout, _, code = _run([
|
stdout, _, code = _meta_run([
|
||||||
"yt-dlp", url,
|
"yt-dlp", url,
|
||||||
"--dump-json", "--flat-playlist",
|
"--dump-json", "--flat-playlist",
|
||||||
"--playlist-end", str(max_results),
|
"--playlist-end", str(max_results),
|
||||||
@@ -552,9 +572,9 @@ def fetch_available_subs(video_id: str) -> dict:
|
|||||||
url = f"https://www.youtube.com/watch?v={video_id}"
|
url = f"https://www.youtube.com/watch?v={video_id}"
|
||||||
base_cmd = ["yt-dlp", url, "--dump-json", "--no-download", "--no-playlist"]
|
base_cmd = ["yt-dlp", url, "--dump-json", "--no-download", "--no-playlist"]
|
||||||
cookie_args = _cookie_args()
|
cookie_args = _cookie_args()
|
||||||
stdout, _, code = _run([*base_cmd, *cookie_args], timeout=30)
|
stdout, _, code = _meta_run([*base_cmd, *cookie_args], timeout=30)
|
||||||
if code != 0 and cookie_args:
|
if code != 0 and cookie_args:
|
||||||
stdout, _, code = _run(base_cmd, timeout=30)
|
stdout, _, code = _meta_run(base_cmd, timeout=30)
|
||||||
|
|
||||||
for line in stdout.splitlines():
|
for line in stdout.splitlines():
|
||||||
line = line.strip()
|
line = line.strip()
|
||||||
|
|||||||
Reference in New Issue
Block a user