diff --git a/backend/routers/videos.py b/backend/routers/videos.py index 7f08145..d881ae5 100644 --- a/backend/routers/videos.py +++ b/backend/routers/videos.py @@ -1,5 +1,6 @@ import os import random +import threading from datetime import datetime from typing import Optional @@ -17,6 +18,11 @@ from ..services.scoring import get_surprise_videos, get_discovery_injection router = APIRouter() +# Tracks which video IDs currently have a background enrichment running, +# so repeated polls from the frontend don't spawn duplicate yt-dlp calls. +_enriching: set[str] = set() +_enriching_lock = threading.Lock() + def _update_affinity(db: Session, user_id: int, video: Video, delta: float): """Adjust tag/category affinity scores for a video. delta > 0 = positive signal.""" @@ -700,18 +706,12 @@ def import_chapters( import json as _json video = db.query(Video).filter(Video.id == video_id).first() - if not video: + if not video or video.chapters is None: + # chapters=NULL means enrichment hasn't run yet; the background fetch + # triggered by get_video_by_yt_id will fill this in. Don't call yt-dlp + # here — it runs polite=False and races with active downloads. return [] - # chapters=NULL means never fetched; fetch now and cache the result (even if empty) - if video.chapters is None: - _upsert_video_from_yt(db, video.youtube_video_id) - db.refresh(video) - # Mark as checked even if no chapters found, so we don't re-fetch next time - if video.chapters is None: - video.chapters = "[]" - db.commit() - chapters = _json.loads(video.chapters or "[]") # Skip if trivial (single chapter) or already imported if len(chapters) < 2: @@ -935,15 +935,25 @@ def get_video_by_yt_id( # Video unknown — must block to get at least a title before we can render anything _upsert_video_from_yt(db, youtube_video_id) elif existing.description is None or existing.chapters is None: - # Video known but missing enrichment — fetch in background, return immediately - from ..database import SessionLocal - def _enrich(yt_id: str): - bg_db = SessionLocal() - try: - _upsert_video_from_yt(bg_db, yt_id, polite=True) - finally: - bg_db.close() - background_tasks.add_task(_enrich, youtube_video_id) + # Video known but missing enrichment — schedule one background fetch. + # The frontend polls every 3 s while description is null; without the + # dedup guard each poll would spawn its own yt-dlp process. + with _enriching_lock: + already = youtube_video_id in _enriching + if not already: + _enriching.add(youtube_video_id) + + if not already: + from ..database import SessionLocal + def _enrich(yt_id: str): + bg_db = SessionLocal() + try: + _upsert_video_from_yt(bg_db, yt_id, polite=True) + finally: + bg_db.close() + with _enriching_lock: + _enriching.discard(yt_id) + background_tasks.add_task(_enrich, youtube_video_id) row = db.execute( text(_VIDEO_SELECT + "WHERE v.youtube_video_id = :yt_id"),