From 1179b53f2e41c32ee2a65b7b3f318421e3a4465a Mon Sep 17 00:00:00 2001 From: Mattias Thall Date: Wed, 27 May 2026 02:43:45 +0200 Subject: [PATCH] Fix hidden yt-dlp calls on video page causing cookie invalidation MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Two background yt-dlp processes were firing every time a video page loaded: 1. importChapters (called unconditionally via useEffect on mount) was calling _upsert_video_from_yt with polite=False when chapters=NULL — no rate limiter, no download-pause check, runs concurrently with active downloads. Fix: return [] immediately when chapters=NULL and let the normal enrichment pipeline (already polite=True) fill them in. 2. get_video_by_yt_id schedules a background _enrich task whenever description or chapters are NULL. The frontend polls every 3 s while description is null, so dozens of enrichment tasks would pile up for the same video. Fix: deduplicate with _enriching set — only one background fetch per video_id at a time; the set entry is cleared when the task finishes. Co-Authored-By: Claude Sonnet 4.6 --- backend/routers/videos.py | 48 +++++++++++++++++++++++---------------- 1 file changed, 29 insertions(+), 19 deletions(-) diff --git a/backend/routers/videos.py b/backend/routers/videos.py index 7f08145..d881ae5 100644 --- a/backend/routers/videos.py +++ b/backend/routers/videos.py @@ -1,5 +1,6 @@ import os import random +import threading from datetime import datetime from typing import Optional @@ -17,6 +18,11 @@ from ..services.scoring import get_surprise_videos, get_discovery_injection router = APIRouter() +# Tracks which video IDs currently have a background enrichment running, +# so repeated polls from the frontend don't spawn duplicate yt-dlp calls. +_enriching: set[str] = set() +_enriching_lock = threading.Lock() + def _update_affinity(db: Session, user_id: int, video: Video, delta: float): """Adjust tag/category affinity scores for a video. delta > 0 = positive signal.""" @@ -700,18 +706,12 @@ def import_chapters( import json as _json video = db.query(Video).filter(Video.id == video_id).first() - if not video: + if not video or video.chapters is None: + # chapters=NULL means enrichment hasn't run yet; the background fetch + # triggered by get_video_by_yt_id will fill this in. Don't call yt-dlp + # here — it runs polite=False and races with active downloads. return [] - # chapters=NULL means never fetched; fetch now and cache the result (even if empty) - if video.chapters is None: - _upsert_video_from_yt(db, video.youtube_video_id) - db.refresh(video) - # Mark as checked even if no chapters found, so we don't re-fetch next time - if video.chapters is None: - video.chapters = "[]" - db.commit() - chapters = _json.loads(video.chapters or "[]") # Skip if trivial (single chapter) or already imported if len(chapters) < 2: @@ -935,15 +935,25 @@ def get_video_by_yt_id( # Video unknown — must block to get at least a title before we can render anything _upsert_video_from_yt(db, youtube_video_id) elif existing.description is None or existing.chapters is None: - # Video known but missing enrichment — fetch in background, return immediately - from ..database import SessionLocal - def _enrich(yt_id: str): - bg_db = SessionLocal() - try: - _upsert_video_from_yt(bg_db, yt_id, polite=True) - finally: - bg_db.close() - background_tasks.add_task(_enrich, youtube_video_id) + # Video known but missing enrichment — schedule one background fetch. + # The frontend polls every 3 s while description is null; without the + # dedup guard each poll would spawn its own yt-dlp process. + with _enriching_lock: + already = youtube_video_id in _enriching + if not already: + _enriching.add(youtube_video_id) + + if not already: + from ..database import SessionLocal + def _enrich(yt_id: str): + bg_db = SessionLocal() + try: + _upsert_video_from_yt(bg_db, yt_id, polite=True) + finally: + bg_db.close() + with _enriching_lock: + _enriching.discard(yt_id) + background_tasks.add_task(_enrich, youtube_video_id) row = db.execute( text(_VIDEO_SELECT + "WHERE v.youtube_video_id = :yt_id"),