Fix hidden yt-dlp calls on video page causing cookie invalidation
Two background yt-dlp processes were firing every time a video page loaded: 1. importChapters (called unconditionally via useEffect on mount) was calling _upsert_video_from_yt with polite=False when chapters=NULL — no rate limiter, no download-pause check, runs concurrently with active downloads. Fix: return [] immediately when chapters=NULL and let the normal enrichment pipeline (already polite=True) fill them in. 2. get_video_by_yt_id schedules a background _enrich task whenever description or chapters are NULL. The frontend polls every 3 s while description is null, so dozens of enrichment tasks would pile up for the same video. Fix: deduplicate with _enriching set — only one background fetch per video_id at a time; the set entry is cleared when the task finishes. Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
@@ -1,5 +1,6 @@
|
|||||||
import os
|
import os
|
||||||
import random
|
import random
|
||||||
|
import threading
|
||||||
from datetime import datetime
|
from datetime import datetime
|
||||||
from typing import Optional
|
from typing import Optional
|
||||||
|
|
||||||
@@ -17,6 +18,11 @@ from ..services.scoring import get_surprise_videos, get_discovery_injection
|
|||||||
|
|
||||||
router = APIRouter()
|
router = APIRouter()
|
||||||
|
|
||||||
|
# Tracks which video IDs currently have a background enrichment running,
|
||||||
|
# so repeated polls from the frontend don't spawn duplicate yt-dlp calls.
|
||||||
|
_enriching: set[str] = set()
|
||||||
|
_enriching_lock = threading.Lock()
|
||||||
|
|
||||||
|
|
||||||
def _update_affinity(db: Session, user_id: int, video: Video, delta: float):
|
def _update_affinity(db: Session, user_id: int, video: Video, delta: float):
|
||||||
"""Adjust tag/category affinity scores for a video. delta > 0 = positive signal."""
|
"""Adjust tag/category affinity scores for a video. delta > 0 = positive signal."""
|
||||||
@@ -700,18 +706,12 @@ def import_chapters(
|
|||||||
import json as _json
|
import json as _json
|
||||||
|
|
||||||
video = db.query(Video).filter(Video.id == video_id).first()
|
video = db.query(Video).filter(Video.id == video_id).first()
|
||||||
if not video:
|
if not video or video.chapters is None:
|
||||||
|
# chapters=NULL means enrichment hasn't run yet; the background fetch
|
||||||
|
# triggered by get_video_by_yt_id will fill this in. Don't call yt-dlp
|
||||||
|
# here — it runs polite=False and races with active downloads.
|
||||||
return []
|
return []
|
||||||
|
|
||||||
# chapters=NULL means never fetched; fetch now and cache the result (even if empty)
|
|
||||||
if video.chapters is None:
|
|
||||||
_upsert_video_from_yt(db, video.youtube_video_id)
|
|
||||||
db.refresh(video)
|
|
||||||
# Mark as checked even if no chapters found, so we don't re-fetch next time
|
|
||||||
if video.chapters is None:
|
|
||||||
video.chapters = "[]"
|
|
||||||
db.commit()
|
|
||||||
|
|
||||||
chapters = _json.loads(video.chapters or "[]")
|
chapters = _json.loads(video.chapters or "[]")
|
||||||
# Skip if trivial (single chapter) or already imported
|
# Skip if trivial (single chapter) or already imported
|
||||||
if len(chapters) < 2:
|
if len(chapters) < 2:
|
||||||
@@ -935,15 +935,25 @@ def get_video_by_yt_id(
|
|||||||
# Video unknown — must block to get at least a title before we can render anything
|
# Video unknown — must block to get at least a title before we can render anything
|
||||||
_upsert_video_from_yt(db, youtube_video_id)
|
_upsert_video_from_yt(db, youtube_video_id)
|
||||||
elif existing.description is None or existing.chapters is None:
|
elif existing.description is None or existing.chapters is None:
|
||||||
# Video known but missing enrichment — fetch in background, return immediately
|
# Video known but missing enrichment — schedule one background fetch.
|
||||||
from ..database import SessionLocal
|
# The frontend polls every 3 s while description is null; without the
|
||||||
def _enrich(yt_id: str):
|
# dedup guard each poll would spawn its own yt-dlp process.
|
||||||
bg_db = SessionLocal()
|
with _enriching_lock:
|
||||||
try:
|
already = youtube_video_id in _enriching
|
||||||
_upsert_video_from_yt(bg_db, yt_id, polite=True)
|
if not already:
|
||||||
finally:
|
_enriching.add(youtube_video_id)
|
||||||
bg_db.close()
|
|
||||||
background_tasks.add_task(_enrich, youtube_video_id)
|
if not already:
|
||||||
|
from ..database import SessionLocal
|
||||||
|
def _enrich(yt_id: str):
|
||||||
|
bg_db = SessionLocal()
|
||||||
|
try:
|
||||||
|
_upsert_video_from_yt(bg_db, yt_id, polite=True)
|
||||||
|
finally:
|
||||||
|
bg_db.close()
|
||||||
|
with _enriching_lock:
|
||||||
|
_enriching.discard(yt_id)
|
||||||
|
background_tasks.add_task(_enrich, youtube_video_id)
|
||||||
|
|
||||||
row = db.execute(
|
row = db.execute(
|
||||||
text(_VIDEO_SELECT + "WHERE v.youtube_video_id = :yt_id"),
|
text(_VIDEO_SELECT + "WHERE v.youtube_video_id = :yt_id"),
|
||||||
|
|||||||
Reference in New Issue
Block a user