Fix hidden yt-dlp calls on video page causing cookie invalidation
Two background yt-dlp processes were firing every time a video page loaded: 1. importChapters (called unconditionally via useEffect on mount) was calling _upsert_video_from_yt with polite=False when chapters=NULL — no rate limiter, no download-pause check, runs concurrently with active downloads. Fix: return [] immediately when chapters=NULL and let the normal enrichment pipeline (already polite=True) fill them in. 2. get_video_by_yt_id schedules a background _enrich task whenever description or chapters are NULL. The frontend polls every 3 s while description is null, so dozens of enrichment tasks would pile up for the same video. Fix: deduplicate with _enriching set — only one background fetch per video_id at a time; the set entry is cleared when the task finishes. Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
@@ -1,5 +1,6 @@
|
||||
import os
|
||||
import random
|
||||
import threading
|
||||
from datetime import datetime
|
||||
from typing import Optional
|
||||
|
||||
@@ -17,6 +18,11 @@ from ..services.scoring import get_surprise_videos, get_discovery_injection
|
||||
|
||||
router = APIRouter()
|
||||
|
||||
# Tracks which video IDs currently have a background enrichment running,
|
||||
# so repeated polls from the frontend don't spawn duplicate yt-dlp calls.
|
||||
_enriching: set[str] = set()
|
||||
_enriching_lock = threading.Lock()
|
||||
|
||||
|
||||
def _update_affinity(db: Session, user_id: int, video: Video, delta: float):
|
||||
"""Adjust tag/category affinity scores for a video. delta > 0 = positive signal."""
|
||||
@@ -700,18 +706,12 @@ def import_chapters(
|
||||
import json as _json
|
||||
|
||||
video = db.query(Video).filter(Video.id == video_id).first()
|
||||
if not video:
|
||||
if not video or video.chapters is None:
|
||||
# chapters=NULL means enrichment hasn't run yet; the background fetch
|
||||
# triggered by get_video_by_yt_id will fill this in. Don't call yt-dlp
|
||||
# here — it runs polite=False and races with active downloads.
|
||||
return []
|
||||
|
||||
# chapters=NULL means never fetched; fetch now and cache the result (even if empty)
|
||||
if video.chapters is None:
|
||||
_upsert_video_from_yt(db, video.youtube_video_id)
|
||||
db.refresh(video)
|
||||
# Mark as checked even if no chapters found, so we don't re-fetch next time
|
||||
if video.chapters is None:
|
||||
video.chapters = "[]"
|
||||
db.commit()
|
||||
|
||||
chapters = _json.loads(video.chapters or "[]")
|
||||
# Skip if trivial (single chapter) or already imported
|
||||
if len(chapters) < 2:
|
||||
@@ -935,7 +935,15 @@ def get_video_by_yt_id(
|
||||
# Video unknown — must block to get at least a title before we can render anything
|
||||
_upsert_video_from_yt(db, youtube_video_id)
|
||||
elif existing.description is None or existing.chapters is None:
|
||||
# Video known but missing enrichment — fetch in background, return immediately
|
||||
# Video known but missing enrichment — schedule one background fetch.
|
||||
# The frontend polls every 3 s while description is null; without the
|
||||
# dedup guard each poll would spawn its own yt-dlp process.
|
||||
with _enriching_lock:
|
||||
already = youtube_video_id in _enriching
|
||||
if not already:
|
||||
_enriching.add(youtube_video_id)
|
||||
|
||||
if not already:
|
||||
from ..database import SessionLocal
|
||||
def _enrich(yt_id: str):
|
||||
bg_db = SessionLocal()
|
||||
@@ -943,6 +951,8 @@ def get_video_by_yt_id(
|
||||
_upsert_video_from_yt(bg_db, yt_id, polite=True)
|
||||
finally:
|
||||
bg_db.close()
|
||||
with _enriching_lock:
|
||||
_enriching.discard(yt_id)
|
||||
background_tasks.add_task(_enrich, youtube_video_id)
|
||||
|
||||
row = db.execute(
|
||||
|
||||
Reference in New Issue
Block a user