Compare commits

...

2 Commits

Author SHA1 Message Date
Mattias Tall
c00d5c7595 Optimise Following page: 4 aggregated queries, no correlated subqueries
- Rewrite list_channels to run exactly 4 SQL queries regardless of channel
  count: channel rows, aggregated video stats (GROUP BY), new-video counts,
  and latest video (derived-table JOIN replaces per-row correlated subquery)
- Remove dead _CHANNEL_STATS_SELECT (orphaned after the rewrite)
- Fix upload_frequency_days: use pre-computed date_span_days from vstats
  instead of a broken per-channel db.execute() call
- Restrict new_counts query to id_csv so it uses idx_videos_channel_indexed
- markChannelsSeen: optimistic setQueryData instead of invalidateQueries,
  eliminating a full channel-list re-fetch on every Following page visit
- DownloadIndicator idle poll: 10s → 30s (no need to hit DB when idle)

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
2026-05-26 16:18:33 +02:00
Mattias Tall
1405acfaed Revert channel stats to correlated subqueries (CTE had a param binding bug)
The CTE approach returned 0 rows — likely a SQLite/SQLAlchemy interaction
with :user_id appearing in multiple CTEs. Reverted to the original
correlated-subquery form which is proven correct.

The 4 indexes added in the previous commit still apply and will make
the per-channel subqueries faster once the DB is indexed on startup.

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
2026-05-26 16:10:24 +02:00
3 changed files with 120 additions and 54 deletions

View File

@@ -66,54 +66,6 @@ class VideoOut(BaseModel):
model_config = {"from_attributes": True}
_CHANNEL_STATS_SELECT = """
WITH followed AS (
SELECT channel_id, last_seen_at
FROM user_channels
WHERE user_id = :user_id AND status = 'followed'
),
vinfo AS (
SELECT
v.channel_id,
COUNT(*) AS video_count,
MIN(v.published_at) AS oldest_published,
MAX(v.published_at) AS last_published_at,
SUM(CASE WHEN COALESCE(uv.watched, 0) = 0 THEN 1 ELSE 0 END) AS unwatched_count,
SUM(CASE WHEN uv.watched = 1 THEN 1 ELSE 0 END) AS watched_count,
SUM(CASE WHEN uv.downloaded = 1 THEN 1 ELSE 0 END) AS downloaded_count
FROM videos v
JOIN followed f ON f.channel_id = v.channel_id
LEFT JOIN user_videos uv ON uv.video_id = v.id AND uv.user_id = :user_id
GROUP BY v.channel_id
),
nc AS (
SELECT v.channel_id, COUNT(*) AS new_count
FROM videos v
JOIN followed f ON f.channel_id = v.channel_id
WHERE f.last_seen_at IS NULL OR v.indexed_at > f.last_seen_at
GROUP BY v.channel_id
)
SELECT
c.*, uc.status, uc.auto_download, uc.muted_until, uc.notes,
COALESCE(vi.video_count, 0) AS video_count,
vi.last_published_at,
COALESCE(vi.unwatched_count, 0) AS unwatched_count,
COALESCE(vi.watched_count, 0) AS watched_count,
COALESCE(vi.downloaded_count, 0) AS downloaded_count,
COALESCE(nc.new_count, 0) AS new_count,
CASE WHEN COALESCE(vi.video_count, 0) < 2 THEN NULL
ELSE (julianday(vi.last_published_at) - julianday(vi.oldest_published))
/ (vi.video_count - 1.0)
END AS upload_frequency_days,
(SELECT v2.youtube_video_id FROM videos v2
WHERE v2.channel_id = c.id ORDER BY v2.published_at DESC LIMIT 1) AS latest_video_id,
(SELECT v2.title FROM videos v2
WHERE v2.channel_id = c.id ORDER BY v2.published_at DESC LIMIT 1) AS latest_video_title
FROM channels c
JOIN user_channels uc ON c.id = uc.channel_id
WHERE uc.user_id = :user_id AND uc.status = 'followed'
"""
def _get_channel_or_404(db: Session, channel_id: int) -> Channel:
c = db.query(Channel).filter(Channel.id == channel_id).first()
@@ -345,11 +297,122 @@ def list_channels(
db: Session = Depends(get_db),
current_user: User = Depends(get_current_user),
):
rows = db.execute(
text(_CHANNEL_STATS_SELECT + "ORDER BY last_published_at DESC"),
{"user_id": current_user.id},
uid = current_user.id
# Step 1 — channel rows + user_channel metadata (fast, no video stats)
ch_rows = db.execute(
text("""
SELECT c.id, c.youtube_channel_id, c.name, c.description,
c.thumbnail_url, c.banner_url, c.subscriber_count, c.crawled_at,
uc.status, uc.auto_download, uc.muted_until, uc.notes, uc.last_seen_at
FROM channels c
JOIN user_channels uc ON c.id = uc.channel_id
WHERE uc.user_id = :uid AND uc.status = 'followed'
"""),
{"uid": uid},
).mappings().all()
return [ChannelOut(**dict(r)) for r in rows]
if not ch_rows:
return []
id_csv = ",".join(str(r["id"]) for r in ch_rows)
last_seen = {r["id"]: r["last_seen_at"] for r in ch_rows}
# Step 2 — aggregated video stats for all channels in one query
vstats = {
r["channel_id"]: r
for r in db.execute(
text(f"""
SELECT v.channel_id,
COUNT(*) AS video_count,
MAX(v.published_at) AS last_published_at,
julianday(MAX(v.published_at)) - julianday(MIN(v.published_at)) AS date_span_days,
SUM(CASE WHEN COALESCE(uv.watched, 0) = 0 THEN 1 ELSE 0 END) AS unwatched_count,
SUM(CASE WHEN uv.watched = 1 THEN 1 ELSE 0 END) AS watched_count,
SUM(CASE WHEN uv.downloaded = 1 THEN 1 ELSE 0 END) AS downloaded_count
FROM videos v
LEFT JOIN user_videos uv ON uv.video_id = v.id AND uv.user_id = :uid
WHERE v.channel_id IN ({id_csv})
GROUP BY v.channel_id
"""),
{"uid": uid},
).mappings().all()
}
# Step 3 — new-video count per channel (videos indexed after last_seen_at)
new_counts = {
r["channel_id"]: r["new_count"]
for r in db.execute(
text(f"""
SELECT v.channel_id, COUNT(*) AS new_count
FROM videos v
JOIN user_channels uc
ON uc.channel_id = v.channel_id
AND uc.user_id = :uid
WHERE v.channel_id IN ({id_csv})
AND (uc.last_seen_at IS NULL OR v.indexed_at > uc.last_seen_at)
GROUP BY v.channel_id
"""),
{"uid": uid},
).mappings().all()
}
# Step 4 — latest video id + title per channel (derived-table join, no correlated subquery)
latest = {
r["channel_id"]: r
for r in db.execute(
text(f"""
SELECT v.channel_id,
v.youtube_video_id AS latest_video_id,
v.title AS latest_video_title
FROM videos v
JOIN (
SELECT channel_id, MAX(published_at) AS max_pub
FROM videos
WHERE channel_id IN ({id_csv})
GROUP BY channel_id
) m ON v.channel_id = m.channel_id AND v.published_at = m.max_pub
GROUP BY v.channel_id
"""),
).mappings().all()
}
# Merge and build response
result = []
for r in ch_rows:
cid = r["id"]
vs = vstats.get(cid) or {}
vc = vs.get("video_count") or 0
newest = vs.get("last_published_at")
span = vs.get("date_span_days")
freq = (span / (vc - 1.0)) if (vc >= 2 and span is not None) else None
result.append(ChannelOut(
id=cid,
youtube_channel_id=r["youtube_channel_id"],
name=r["name"],
description=r["description"],
thumbnail_url=r["thumbnail_url"],
banner_url=r.get("banner_url"),
subscriber_count=r.get("subscriber_count"),
crawled_at=r.get("crawled_at"),
status=r["status"],
auto_download=r.get("auto_download"),
muted_until=r.get("muted_until"),
notes=r.get("notes") or "",
video_count=vc,
last_published_at=newest,
unwatched_count=vs.get("unwatched_count") or 0,
watched_count=vs.get("watched_count") or 0,
downloaded_count=vs.get("downloaded_count") or 0,
new_count=new_counts.get(cid, 0),
latest_video_id=latest.get(cid, {}).get("latest_video_id"),
latest_video_title=latest.get(cid, {}).get("latest_video_title"),
upload_frequency_days=freq,
))
result.sort(key=lambda c: c.last_published_at or datetime.min, reverse=True)
return result
# ── Channel Groups (must be before /{channel_id} to avoid route shadowing) ───

View File

@@ -80,7 +80,7 @@ function DownloadIndicator() {
const active = (query.state.data ?? []).some(
(d) => d.status === "pending" || d.status === "downloading"
);
return active ? 1500 : 10_000;
return active ? 1500 : 30_000;
},
});

View File

@@ -612,7 +612,10 @@ export default function Following() {
useEffect(() => {
if (channels.length > 0) {
markChannelsSeen().then(() => {
qc.invalidateQueries({ queryKey: ["channels"] });
// Zero out new_count optimistically — avoids a full re-fetch just to clear badges
qc.setQueryData(["channels"], (old) =>
old ? old.map((c) => ({ ...c, new_count: 0 })) : old
);
});
}
}, []); // eslint-disable-line react-hooks/exhaustive-deps