diff --git a/backend/routers/videos.py b/backend/routers/videos.py index fbf2f6a..7f08145 100644 --- a/backend/routers/videos.py +++ b/backend/routers/videos.py @@ -298,7 +298,14 @@ def home_feed( # mode == "ranked" (default) import random as _random - candidate_limit = limit * 4 # wider pool for tier sampling + + # Pull a large candidate pool per page. Each page draws from a NON-overlapping + # slice of the scored list so pagination actually moves through new material. + # candidate_limit >> limit so tier-sampling has real variety to choose from. + candidate_limit = min(limit * 15, 600) + page_num = offset // limit if limit > 0 else 0 + sql_offset = page_num * candidate_limit # non-overlapping pages + rows = db.execute( text(f""" WITH channel_stats AS ( @@ -361,24 +368,53 @@ def home_feed( {duration_clause} ) SELECT * FROM scored - WHERE rn <= 5 + WHERE rn <= 15 ORDER BY score DESC - LIMIT :candidate_limit OFFSET :offset + LIMIT :candidate_limit OFFSET :sql_offset """), - {"user_id": current_user.id, "candidate_limit": candidate_limit, "offset": offset, + {"user_id": current_user.id, "candidate_limit": candidate_limit, "sql_offset": sql_offset, "hide_watched": 1 if hide_watched else 0, "w_recency": w_recency, "w_affinity": w_affinity, "w_channel": w_channel}, ).mappings().all() - # Tier-based sampling with score perturbation so the feed varies each load + # Tier-based sampling: scores span -365..+100+ so ±N jitter is useless. + # Instead split the ranked pool into thirds and randomly sample from each, + # so every reshuffle genuinely picks a different mix of top/mid/wildcard videos. candidates = [dict(r) for r in rows] - for c in candidates: - c["_ps"] = c["score"] + _random.uniform(-12, 12) - candidates.sort(key=lambda x: x["_ps"], reverse=True) - top = candidates[:limit] + n = len(candidates) - # Track impressions for page 0 (first visit) — penalises videos shown but ignored - if offset == 0 and top: + if n <= limit: + _random.shuffle(candidates) + top = candidates + else: + split1 = max(n * 2 // 5, limit) # top 40 % + split2 = max(n * 4 // 5, split1 + 1) # next 40 % + t1 = candidates[:split1] + t2 = candidates[split1:split2] + t3 = candidates[split2:] + + # 60 % from t1, 30 % from t2, 10 % wildcards from t3 + n1 = limit * 6 // 10 + n2 = limit * 3 // 10 + n3 = limit - n1 - n2 + + picked = ( + _random.sample(t1, min(n1, len(t1))) + + (_random.sample(t2, min(n2, len(t2))) if t2 else []) + + (_random.sample(t3, min(n3, len(t3))) if t3 else []) + ) + # Fill any shortfall when a tier was smaller than requested + if len(picked) < limit: + already = {id(x) for x in picked} + rest = [x for x in candidates if id(x) not in already] + if rest: + picked += _random.sample(rest, min(limit - len(picked), len(rest))) + + _random.shuffle(picked) + top = picked[:limit] + + # Track impressions — penalises videos shown but not clicked on repeat visits + if page_num == 0 and top: for item in top: if not item["watched"]: db.execute(text(""" @@ -390,7 +426,7 @@ def home_feed( db.commit() followed = [ - VideoDetail(**{k: v for k, v in item.items() if k not in ("watched", "score", "rn", "_ps")}, + VideoDetail(**{k: v for k, v in item.items() if k not in ("watched", "score", "rn")}, is_watched=bool(item["watched"])) for item in top ]