Fix hidden yt-dlp calls on video page causing cookie invalidation

Two background yt-dlp processes were firing every time a video page loaded:

1. importChapters (called unconditionally via useEffect on mount) was calling
   _upsert_video_from_yt with polite=False when chapters=NULL — no rate
   limiter, no download-pause check, runs concurrently with active downloads.
   Fix: return [] immediately when chapters=NULL and let the normal enrichment
   pipeline (already polite=True) fill them in.

2. get_video_by_yt_id schedules a background _enrich task whenever description
   or chapters are NULL. The frontend polls every 3 s while description is
   null, so dozens of enrichment tasks would pile up for the same video.
   Fix: deduplicate with _enriching set — only one background fetch per
   video_id at a time; the set entry is cleared when the task finishes.

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
2026-05-27 02:43:45 +02:00
parent 146a044e69
commit 1179b53f2e

View File

@@ -1,5 +1,6 @@
import os
import random
import threading
from datetime import datetime
from typing import Optional
@@ -17,6 +18,11 @@ from ..services.scoring import get_surprise_videos, get_discovery_injection
router = APIRouter()
# Tracks which video IDs currently have a background enrichment running,
# so repeated polls from the frontend don't spawn duplicate yt-dlp calls.
_enriching: set[str] = set()
_enriching_lock = threading.Lock()
def _update_affinity(db: Session, user_id: int, video: Video, delta: float):
"""Adjust tag/category affinity scores for a video. delta > 0 = positive signal."""
@@ -700,18 +706,12 @@ def import_chapters(
import json as _json
video = db.query(Video).filter(Video.id == video_id).first()
if not video:
if not video or video.chapters is None:
# chapters=NULL means enrichment hasn't run yet; the background fetch
# triggered by get_video_by_yt_id will fill this in. Don't call yt-dlp
# here — it runs polite=False and races with active downloads.
return []
# chapters=NULL means never fetched; fetch now and cache the result (even if empty)
if video.chapters is None:
_upsert_video_from_yt(db, video.youtube_video_id)
db.refresh(video)
# Mark as checked even if no chapters found, so we don't re-fetch next time
if video.chapters is None:
video.chapters = "[]"
db.commit()
chapters = _json.loads(video.chapters or "[]")
# Skip if trivial (single chapter) or already imported
if len(chapters) < 2:
@@ -935,7 +935,15 @@ def get_video_by_yt_id(
# Video unknown — must block to get at least a title before we can render anything
_upsert_video_from_yt(db, youtube_video_id)
elif existing.description is None or existing.chapters is None:
# Video known but missing enrichment — fetch in background, return immediately
# Video known but missing enrichment — schedule one background fetch.
# The frontend polls every 3 s while description is null; without the
# dedup guard each poll would spawn its own yt-dlp process.
with _enriching_lock:
already = youtube_video_id in _enriching
if not already:
_enriching.add(youtube_video_id)
if not already:
from ..database import SessionLocal
def _enrich(yt_id: str):
bg_db = SessionLocal()
@@ -943,6 +951,8 @@ def get_video_by_yt_id(
_upsert_video_from_yt(bg_db, yt_id, polite=True)
finally:
bg_db.close()
with _enriching_lock:
_enriching.discard(yt_id)
background_tasks.add_task(_enrich, youtube_video_id)
row = db.execute(