Fix cookie invalidation: give each yt-dlp process a private cookie file copy

Downloads run for minutes via Popen while metadata calls continue in parallel.
Both processes read from AND write back to the same --cookies file, causing
concurrent writes that corrupt the session cookie state.

Fix: _make_private_cookie_copy() intercepts --cookies <file> in any arg list
and swaps it for a NamedTemporaryFile copy. Each yt-dlp process gets its own
snapshot; write-backs go to the throwaway copy and are discarded on cleanup.

- _run() uses this for all subprocess.run calls (metadata, subtitles, comments)
- start_download() uses it for the long-lived Popen download process
- _meta_run() benefits automatically since it calls _run()

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
2026-05-27 01:48:29 +02:00
parent 592194f2ca
commit 4d255647a1

View File

@@ -1,8 +1,11 @@
"""Subprocess wrapper for yt-dlp.""" """Subprocess wrapper for yt-dlp."""
import json import json
import os
import random import random
import re import re
import shutil
import subprocess import subprocess
import tempfile
import threading import threading
import time import time
import urllib.request import urllib.request
@@ -14,9 +17,36 @@ from typing import Any
from ..config import settings from ..config import settings
def _make_private_cookie_copy(args: list[str]) -> tuple[list[str], str | None]:
"""Replace --cookies <file> with a private temp copy so concurrent yt-dlp
processes never write to the same cookie jar simultaneously."""
for i, arg in enumerate(args):
if arg == "--cookies" and i + 1 < len(args):
source = args[i + 1]
if Path(source).exists():
try:
tmp = tempfile.NamedTemporaryFile(suffix=".txt", delete=False)
tmp.close()
shutil.copy2(source, tmp.name)
modified = list(args)
modified[i + 1] = tmp.name
return modified, tmp.name
except Exception:
break
return list(args), None
def _run(args: list[str], timeout: int = 60) -> tuple[str, str, int]: def _run(args: list[str], timeout: int = 60) -> tuple[str, str, int]:
result = subprocess.run(args, capture_output=True, text=True, timeout=timeout) args, tmp_path = _make_private_cookie_copy(args)
return result.stdout, result.stderr, result.returncode try:
result = subprocess.run(args, capture_output=True, text=True, timeout=timeout)
return result.stdout, result.stderr, result.returncode
finally:
if tmp_path:
try:
os.unlink(tmp_path)
except Exception:
pass
# Global rate limiter for all metadata fetches — prevents concurrent tasks from # Global rate limiter for all metadata fetches — prevents concurrent tasks from
@@ -857,49 +887,62 @@ def start_download(
with _SEMAPHORE: with _SEMAPHORE:
cookie_args = _cookie_args() cookie_args = _cookie_args()
print(f"[ytdlp] cookie_args={cookie_args!r}", flush=True) print(f"[ytdlp] cookie_args={cookie_args!r}", flush=True)
process = subprocess.Popen( cmd = [
[ "yt-dlp", url,
"yt-dlp", url, "-f", fmt,
"-f", fmt, "--merge-output-format", "mp4",
"--merge-output-format", "mp4", "--no-part", "--no-mtime",
"--no-part", "--no-mtime", "-o", output_template,
"-o", output_template, "--newline", "--progress", "--no-colors",
"--newline", "--progress", "--no-colors", *subtitle_args,
*subtitle_args, *cookie_args,
*cookie_args, ]
], # Private cookie copy — download runs for minutes; without this,
stdout=subprocess.PIPE, # concurrent metadata calls would write-back to the same cookie file
stderr=subprocess.STDOUT, # and corrupt the session.
text=True, cmd, tmp_cookie_path = _make_private_cookie_copy(cmd)
) try:
process = subprocess.Popen(
cmd,
stdout=subprocess.PIPE,
stderr=subprocess.STDOUT,
text=True,
)
file_path = None try:
stream_index = 0 file_path = None
output_lines: list[str] = [] stream_index = 0
for line in process.stdout: output_lines: list[str] = []
line = line.strip() for line in process.stdout:
output_lines.append(line) line = line.strip()
if re.search(r"\[download\] Destination:", line): output_lines.append(line)
stream_index += 1 if re.search(r"\[download\] Destination:", line):
m = re.search(r"\[download\]\s+([\d.]+)%", line) stream_index += 1
if m: m = re.search(r"\[download\]\s+([\d.]+)%", line)
pct = float(m.group(1)) if m:
scaled = pct * 0.85 if stream_index <= 1 else 85.0 + pct * 0.10 pct = float(m.group(1))
on_progress(download_id, min(scaled, 95.0)) scaled = pct * 0.85 if stream_index <= 1 else 85.0 + pct * 0.10
m2 = re.search(r"\[(?:download|Merger)\] Destination: (.+)", line) on_progress(download_id, min(scaled, 95.0))
if m2: m2 = re.search(r"\[(?:download|Merger)\] Destination: (.+)", line)
file_path = m2.group(1).strip() if m2:
file_path = m2.group(1).strip()
process.wait() process.wait()
if process.returncode == 0: if process.returncode == 0:
_strip_vtt_cue_settings(video_id) _strip_vtt_cue_settings(video_id)
resolution = detect_resolution(file_path) if file_path else None resolution = detect_resolution(file_path) if file_path else None
on_complete(download_id, file_path, resolution) on_complete(download_id, file_path, resolution)
else: else:
tail = "\n".join(output_lines[-20:]) if output_lines else "(no output)" tail = "\n".join(output_lines[-20:]) if output_lines else "(no output)"
import logging import logging
logging.getLogger(__name__).error("yt-dlp failed (code %d):\n%s", process.returncode, tail) logging.getLogger(__name__).error("yt-dlp failed (code %d):\n%s", process.returncode, tail)
on_error(download_id, f"yt-dlp exited with code {process.returncode}:\n{tail}") on_error(download_id, f"yt-dlp exited with code {process.returncode}:\n{tail}")
finally:
if tmp_cookie_path:
try:
os.unlink(tmp_cookie_path)
except Exception:
pass
thread = threading.Thread(target=_run_download, daemon=True) thread = threading.Thread(target=_run_download, daemon=True)
thread.start() thread.start()