Fix popular fetch and improve date/view_count coverage
Popular fetch now does a two-phase approach: fast flat-playlist to get IDs in popularity order, then parallel full metadata fetch (8 workers) to get real view_count and published_at for each video. Previously flat-playlist mode returned timestamp/view_count as null. Enrich task now also backfills published_at and view_count (not just description). Startup limit 3→50, enrichment sleep 2s→0.5s. Raise all thread pool sizes to match 8-core machine: - Discovery search: 5→8 workers - Graph signal: 4→8 workers - Popular fetch: 5→8 workers - Download semaphore default 3→6, cap 10→16 Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
@@ -112,7 +112,7 @@ def _search_and_store(
|
||||
except Exception:
|
||||
return []
|
||||
|
||||
with ThreadPoolExecutor(max_workers=5) as pool:
|
||||
with ThreadPoolExecutor(max_workers=8) as pool:
|
||||
futures = {pool.submit(_do_search, q): q for q in queries}
|
||||
for fut in as_completed(futures):
|
||||
for video in fut.result():
|
||||
@@ -620,7 +620,7 @@ def update_graph_signal(db: Session, user_id: int):
|
||||
return []
|
||||
|
||||
featured_map: dict[str, list[str]] = {}
|
||||
with ThreadPoolExecutor(max_workers=4) as pool:
|
||||
with ThreadPoolExecutor(max_workers=8) as pool:
|
||||
futures = {pool.submit(_fetch, row["youtube_channel_id"]): row for row in sample}
|
||||
for fut in as_completed(futures):
|
||||
row = futures[fut]
|
||||
|
||||
@@ -665,7 +665,7 @@ def predicted_file_path(video_id: str) -> Path:
|
||||
return Path(settings.download_path) / f"{video_id}.mp4"
|
||||
|
||||
|
||||
_SEMAPHORE = threading.Semaphore(3)
|
||||
_SEMAPHORE = threading.Semaphore(6)
|
||||
_semaphore_lock = threading.Lock()
|
||||
_cookies_browser: str = ""
|
||||
_cookies_file: str = ""
|
||||
@@ -682,7 +682,7 @@ _oauth2_state_lock = threading.Lock()
|
||||
def set_max_concurrent(n: int) -> None:
|
||||
global _SEMAPHORE
|
||||
with _semaphore_lock:
|
||||
_SEMAPHORE = threading.Semaphore(max(1, min(n, 10)))
|
||||
_SEMAPHORE = threading.Semaphore(max(1, min(n, 16)))
|
||||
|
||||
|
||||
def set_cookies_browser(browser: str) -> None:
|
||||
|
||||
Reference in New Issue
Block a user