Overhaul For You feed ranking and freshness

Ranking improvements:
- Wider candidate pool (4x limit) with ±12pt score perturbation so
  same-score videos shuffle differently each load
- Recent channel engagement signal: channels watched in past 30 days
  get a +4pts/watch boost
- Bail penalty: -25pts for videos started but abandoned before 20%
- Impression penalty: -3pts per prior feed appearance (capped at 10),
  so repeatedly-skipped videos sink naturally
- rn cap raised to 5 for more candidates; Python-side sampling picks top limit

Feed UX:
- Reshuffle button now available on For You (ranked) mode, not just Explore
- shuffleKey now always included in query key (not just random mode)
- Ranked mode staleTime reduced from 10min to 90s

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
2026-05-27 01:14:10 +02:00
parent c11e1fdaf7
commit bbf7cc939b
3 changed files with 48 additions and 13 deletions

View File

@@ -130,6 +130,7 @@ def on_startup():
note TEXT DEFAULT '',
created_at DATETIME DEFAULT CURRENT_TIMESTAMP
)""",
"ALTER TABLE user_videos ADD COLUMN feed_shown_count INTEGER NOT NULL DEFAULT 0",
]:
try:
db.execute(text(col_sql))

View File

@@ -297,6 +297,8 @@ def home_feed(
]
# mode == "ranked" (default)
import random as _random
candidate_limit = limit * 4 # wider pool for tier sampling
rows = db.execute(
text(f"""
WITH channel_stats AS (
@@ -305,7 +307,8 @@ def home_feed(
COUNT(CASE WHEN uv.watched = 1 THEN 1 END) AS watched_count,
COUNT(CASE WHEN uv.liked = 1 THEN 1 END) AS liked_count,
SUM(CASE WHEN uv.rating IS NOT NULL THEN uv.rating ELSE 0 END) AS rating_sum,
AVG(CASE WHEN uv.completion_percent IS NOT NULL THEN uv.completion_percent END) AS avg_completion_pct
AVG(CASE WHEN uv.completion_percent IS NOT NULL THEN uv.completion_percent END) AS avg_completion_pct,
COUNT(CASE WHEN uv.watched = 1 AND uv.last_watched_at > datetime('now', '-30 days') THEN 1 END) AS recent_watches
FROM videos v
LEFT JOIN user_videos uv ON v.id = uv.video_id AND uv.user_id = :user_id
GROUP BY v.channel_id
@@ -326,7 +329,9 @@ def home_feed(
(SQRT(CAST(COALESCE(cs.watched_count, 0) AS REAL)) * 5.0
+ COALESCE(cs.liked_count, 0) * 10.0
+ COALESCE(cs.rating_sum, 0) * 8.0
+ COALESCE(cs.avg_completion_pct, 50.0) * 0.08) * :w_channel
+ COALESCE(cs.avg_completion_pct, 50.0) * 0.08
+ COALESCE(cs.recent_watches, 0) * 4.0
) * :w_channel
+ MAX(COALESCE(julianday(v.published_at) - julianday('now'), -90), -365) * :w_recency
+ COALESCE((
SELECT COALESCE(SUM(uta.score), 0)
@@ -336,6 +341,10 @@ def home_feed(
OR instr(LOWER(COALESCE(v.tags, '')), '"' || uta.tag || '"') > 0)
LIMIT 5
), 0) * :w_affinity
- CASE WHEN COALESCE(uv.completion_percent, 100) < 20
AND COALESCE(uv.watch_progress_seconds, 0) > 30
THEN 25 ELSE 0 END
- 3 * MIN(COALESCE(uv.feed_shown_count, 0), 10)
AS score,
ROW_NUMBER() OVER (
PARTITION BY v.channel_id
@@ -352,17 +361,38 @@ def home_feed(
{duration_clause}
)
SELECT * FROM scored
WHERE rn <= 3
ORDER BY score DESC, RANDOM()
LIMIT :limit OFFSET :offset
WHERE rn <= 5
ORDER BY score DESC
LIMIT :candidate_limit OFFSET :offset
"""),
{"user_id": current_user.id, "limit": limit, "offset": offset, "hide_watched": 1 if hide_watched else 0,
{"user_id": current_user.id, "candidate_limit": candidate_limit, "offset": offset,
"hide_watched": 1 if hide_watched else 0,
"w_recency": w_recency, "w_affinity": w_affinity, "w_channel": w_channel},
).mappings().all()
# Tier-based sampling with score perturbation so the feed varies each load
candidates = [dict(r) for r in rows]
for c in candidates:
c["_ps"] = c["score"] + _random.uniform(-12, 12)
candidates.sort(key=lambda x: x["_ps"], reverse=True)
top = candidates[:limit]
# Track impressions for page 0 (first visit) — penalises videos shown but ignored
if offset == 0 and top:
for item in top:
if not item["watched"]:
db.execute(text("""
INSERT INTO user_videos (user_id, video_id, feed_shown_count)
VALUES (:uid, :vid, 1)
ON CONFLICT (user_id, video_id)
DO UPDATE SET feed_shown_count = feed_shown_count + 1
"""), {"uid": current_user.id, "vid": item["id"]})
db.commit()
followed = [
VideoDetail(**{k: v for k, v in dict(r).items() if k not in ("watched", "score", "rn")},
is_watched=bool(r["watched"]))
for r in rows
VideoDetail(**{k: v for k, v in item.items() if k not in ("watched", "score", "rn", "_ps")},
is_watched=bool(item["watched"]))
for item in top
]
# Inject discovery cards on every page: 1 every 5 followed cards.