Initial commit — YT Hub

Self-hosted personal YouTube management app.
FastAPI + SQLite backend, React + Vite + Tailwind frontend.
Dockerfiles and compose included for Portainer deployment.

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
inputnoise
2026-05-25 20:09:04 +02:00
commit 1827dd6c4e
63 changed files with 14480 additions and 0 deletions

14
backend/Dockerfile Normal file
View File

@@ -0,0 +1,14 @@
FROM python:3.12-slim
WORKDIR /app
RUN apt-get update && apt-get install -y ffmpeg && rm -rf /var/lib/apt/lists/*
COPY backend/requirements.txt ./backend/requirements.txt
RUN pip install --no-cache-dir -r backend/requirements.txt
COPY backend/ ./backend/
EXPOSE 8000
CMD ["uvicorn", "backend.main:app", "--host", "0.0.0.0", "--port", "8000"]

0
backend/__init__.py Normal file
View File

49
backend/auth_utils.py Normal file
View File

@@ -0,0 +1,49 @@
from datetime import datetime, timedelta
from typing import Optional
import bcrypt
from fastapi import Depends, HTTPException, status
from fastapi.security import OAuth2PasswordBearer
from jose import JWTError, jwt
from sqlalchemy.orm import Session
from .config import settings
from .database import get_db
from .models import User
oauth2_scheme = OAuth2PasswordBearer(tokenUrl="/api/auth/login")
def verify_password(plain: str, hashed: str) -> bool:
return bcrypt.checkpw(plain.encode(), hashed.encode())
def hash_password(plain: str) -> str:
return bcrypt.hashpw(plain.encode(), bcrypt.gensalt()).decode()
def create_access_token(data: dict, expires_delta: Optional[timedelta] = None) -> str:
to_encode = data.copy()
expire = datetime.utcnow() + (expires_delta or timedelta(minutes=settings.access_token_expire_minutes))
to_encode["exp"] = expire
return jwt.encode(to_encode, settings.secret_key, algorithm=settings.algorithm)
def get_current_user(token: str = Depends(oauth2_scheme), db: Session = Depends(get_db)) -> User:
credentials_exception = HTTPException(
status_code=status.HTTP_401_UNAUTHORIZED,
detail="Could not validate credentials",
headers={"WWW-Authenticate": "Bearer"},
)
try:
payload = jwt.decode(token, settings.secret_key, algorithms=[settings.algorithm])
user_id: int = payload.get("sub")
if user_id is None:
raise credentials_exception
except JWTError:
raise credentials_exception
user = db.query(User).filter(User.id == int(user_id)).first()
if user is None:
raise credentials_exception
return user

16
backend/config.py Normal file
View File

@@ -0,0 +1,16 @@
from pydantic_settings import BaseSettings, SettingsConfigDict
class Settings(BaseSettings):
model_config = SettingsConfigDict(env_file=".env", env_file_encoding="utf-8", extra="ignore")
database_url: str = "sqlite:///./app.db"
download_path: str = "./downloads"
secret_key: str = "changeme-use-a-real-secret-in-production"
algorithm: str = "HS256"
access_token_expire_minutes: int = 60 * 24 * 7 # 1 week
jellyfin_url: str = ""
allow_registration: bool = True
settings = Settings()

90
backend/database.py Normal file
View File

@@ -0,0 +1,90 @@
from sqlalchemy import create_engine, event, text
from sqlalchemy.orm import sessionmaker, DeclarativeBase
from .config import settings
engine = create_engine(
settings.database_url,
connect_args={"check_same_thread": False},
echo=False,
)
@event.listens_for(engine, "connect")
def set_sqlite_pragma(dbapi_conn, _):
cursor = dbapi_conn.cursor()
cursor.execute("PRAGMA journal_mode=WAL")
cursor.execute("PRAGMA foreign_keys=ON")
cursor.close()
SessionLocal = sessionmaker(autocommit=False, autoflush=False, bind=engine)
class Base(DeclarativeBase):
pass
FTS_SETUP_SQL = """
CREATE VIRTUAL TABLE IF NOT EXISTS videos_fts USING fts5(
title, description, content=videos, content_rowid=id
);
CREATE VIRTUAL TABLE IF NOT EXISTS channels_fts USING fts5(
name, description, content=channels, content_rowid=id
);
CREATE TRIGGER IF NOT EXISTS videos_ai AFTER INSERT ON videos BEGIN
INSERT INTO videos_fts(rowid, title, description)
VALUES (new.id, new.title, COALESCE(new.description, ''));
END;
CREATE TRIGGER IF NOT EXISTS videos_ad AFTER DELETE ON videos BEGIN
INSERT INTO videos_fts(videos_fts, rowid, title, description)
VALUES ('delete', old.id, old.title, COALESCE(old.description, ''));
END;
CREATE TRIGGER IF NOT EXISTS videos_au AFTER UPDATE ON videos BEGIN
INSERT INTO videos_fts(videos_fts, rowid, title, description)
VALUES ('delete', old.id, old.title, COALESCE(old.description, ''));
INSERT INTO videos_fts(rowid, title, description)
VALUES (new.id, new.title, COALESCE(new.description, ''));
END;
CREATE TRIGGER IF NOT EXISTS channels_ai AFTER INSERT ON channels BEGIN
INSERT INTO channels_fts(rowid, name, description)
VALUES (new.id, new.name, COALESCE(new.description, ''));
END;
CREATE TRIGGER IF NOT EXISTS channels_ad AFTER DELETE ON channels BEGIN
INSERT INTO channels_fts(channels_fts, rowid, name, description)
VALUES ('delete', old.id, old.name, COALESCE(old.description, ''));
END;
CREATE TRIGGER IF NOT EXISTS channels_au AFTER UPDATE ON channels BEGIN
INSERT INTO channels_fts(channels_fts, rowid, name, description)
VALUES ('delete', old.id, old.name, COALESCE(old.description, ''));
INSERT INTO channels_fts(rowid, name, description)
VALUES (new.id, new.name, COALESCE(new.description, ''));
END;
"""
def init_db():
from . import models # noqa: F401
Base.metadata.create_all(bind=engine)
# executescript handles multi-statement SQL including trigger BEGIN...END blocks
raw_conn = engine.raw_connection()
try:
raw_conn.executescript(FTS_SETUP_SQL)
finally:
raw_conn.close()
def get_db():
db = SessionLocal()
try:
yield db
finally:
db.close()

159
backend/main.py Normal file
View File

@@ -0,0 +1,159 @@
import os
from fastapi import FastAPI
from fastapi.middleware.cors import CORSMiddleware
from fastapi.staticfiles import StaticFiles
from .config import settings
from .database import init_db, SessionLocal
from .services import ytdlp as ytdlp_service
from .routers import auth, channels, videos, search, downloads, discovery, settings as settings_router, stats as stats_router, export as export_router, collections as collections_router, admin as admin_router
app = FastAPI(title="YouTube Hub", version="0.1.0")
app.add_middleware(
CORSMiddleware,
allow_origins=["*"],
allow_credentials=True,
allow_methods=["*"],
allow_headers=["*"],
)
app.include_router(auth.router, prefix="/api/auth", tags=["auth"])
app.include_router(channels.router, prefix="/api/channels", tags=["channels"])
app.include_router(videos.router, prefix="/api/videos", tags=["videos"])
app.include_router(search.router, prefix="/api/search", tags=["search"])
app.include_router(downloads.router, prefix="/api/downloads", tags=["downloads"])
app.include_router(discovery.router, prefix="/api/discovery", tags=["discovery"])
app.include_router(settings_router.router, prefix="/api/settings", tags=["settings"])
app.include_router(stats_router.router, prefix="/api/stats", tags=["stats"])
app.include_router(export_router.router, prefix="/api/export", tags=["export"])
app.include_router(collections_router.router, prefix="/api/collections", tags=["collections"])
app.include_router(admin_router.router, prefix="/api/admin", tags=["admin"])
os.makedirs(settings.download_path, exist_ok=True)
app.mount("/files", StaticFiles(directory=settings.download_path), name="files")
@app.on_event("startup")
def on_startup():
from sqlalchemy import text
init_db()
db = SessionLocal()
for col_sql in [
"ALTER TABLE user_videos ADD COLUMN liked BOOLEAN DEFAULT FALSE",
"ALTER TABLE user_videos ADD COLUMN liked_at DATETIME",
"ALTER TABLE downloads ADD COLUMN resolution TEXT",
"ALTER TABLE user_channels ADD COLUMN auto_download BOOLEAN DEFAULT NULL",
"ALTER TABLE user_channels ADD COLUMN last_seen_at DATETIME",
"ALTER TABLE discovery_queue ADD COLUMN preview_json TEXT",
"ALTER TABLE channels ADD COLUMN subscriber_count INTEGER",
"ALTER TABLE user_settings ADD COLUMN cookies_browser TEXT DEFAULT ''",
"ALTER TABLE user_settings ADD COLUMN theater_mode INTEGER DEFAULT 0",
"ALTER TABLE user_channels ADD COLUMN muted_until DATETIME DEFAULT NULL",
"ALTER TABLE user_settings ADD COLUMN calm_mode INTEGER DEFAULT 0",
"ALTER TABLE user_settings ADD COLUMN hide_subscriber_counts INTEGER DEFAULT 0",
"ALTER TABLE user_settings ADD COLUMN autoplay_enabled INTEGER DEFAULT 0",
"ALTER TABLE user_videos ADD COLUMN rating INTEGER DEFAULT NULL",
"ALTER TABLE downloads ADD COLUMN pending_delete_at DATETIME DEFAULT NULL",
"ALTER TABLE user_channels ADD COLUMN notes TEXT DEFAULT ''",
"ALTER TABLE videos ADD COLUMN chapters TEXT DEFAULT NULL",
"ALTER TABLE video_bookmarks ADD COLUMN source TEXT DEFAULT 'manual'",
"ALTER TABLE user_videos ADD COLUMN completion_percent REAL DEFAULT NULL",
"ALTER TABLE user_videos ADD COLUMN rewatch_count INTEGER DEFAULT 0",
"ALTER TABLE users ADD COLUMN is_admin INTEGER DEFAULT 0",
"""CREATE TABLE IF NOT EXISTS system_config (
key TEXT PRIMARY KEY,
value TEXT NOT NULL
)""",
"ALTER TABLE user_settings ADD COLUMN feed_weight_recency REAL DEFAULT 5.0",
"ALTER TABLE user_settings ADD COLUMN feed_weight_affinity REAL DEFAULT 5.0",
"ALTER TABLE user_settings ADD COLUMN feed_weight_channel REAL DEFAULT 5.0",
"""CREATE TABLE IF NOT EXISTS search_history (
id INTEGER PRIMARY KEY AUTOINCREMENT,
user_id INTEGER NOT NULL REFERENCES users(id) ON DELETE CASCADE,
query TEXT NOT NULL,
searched_at DATETIME DEFAULT CURRENT_TIMESTAMP
)""",
"""CREATE TABLE IF NOT EXISTS user_tag_affinity (
id INTEGER PRIMARY KEY AUTOINCREMENT,
user_id INTEGER NOT NULL REFERENCES users(id) ON DELETE CASCADE,
tag TEXT NOT NULL,
score REAL DEFAULT 0.0,
updated_at DATETIME DEFAULT CURRENT_TIMESTAMP,
UNIQUE(user_id, tag)
)""",
"""CREATE TABLE IF NOT EXISTS collections (
id INTEGER PRIMARY KEY AUTOINCREMENT,
user_id INTEGER NOT NULL REFERENCES users(id) ON DELETE CASCADE,
name TEXT NOT NULL,
created_at DATETIME DEFAULT CURRENT_TIMESTAMP
)""",
"""CREATE TABLE IF NOT EXISTS collection_items (
id INTEGER PRIMARY KEY AUTOINCREMENT,
collection_id INTEGER NOT NULL REFERENCES collections(id) ON DELETE CASCADE,
video_id INTEGER NOT NULL REFERENCES videos(id) ON DELETE CASCADE,
added_at DATETIME DEFAULT CURRENT_TIMESTAMP,
UNIQUE(collection_id, video_id)
)""",
"""CREATE TABLE IF NOT EXISTS video_bookmarks (
id INTEGER PRIMARY KEY AUTOINCREMENT,
user_id INTEGER NOT NULL REFERENCES users(id) ON DELETE CASCADE,
video_id INTEGER NOT NULL REFERENCES videos(id) ON DELETE CASCADE,
timestamp_seconds INTEGER NOT NULL,
note TEXT DEFAULT '',
created_at DATETIME DEFAULT CURRENT_TIMESTAMP
)""",
]:
try:
db.execute(text(col_sql))
db.commit()
except Exception:
db.rollback()
try:
# Rebuild FTS indexes so all existing rows are searchable
db.execute(text("INSERT INTO videos_fts(videos_fts) VALUES('rebuild')"))
db.execute(text("INSERT INTO channels_fts(channels_fts) VALUES('rebuild')"))
# Migrate signed/expiring YouTube thumbnail URLs to stable format
db.execute(text("""
UPDATE videos
SET thumbnail_url = 'https://i.ytimg.com/vi/' || youtube_video_id || '/hqdefault.jpg'
WHERE thumbnail_url IS NULL
OR thumbnail_url NOT LIKE 'https://i.ytimg.com/vi/%/hqdefault.jpg'
"""))
db.commit()
# On a fresh install with no admin yet, promote the first registered user
from .models import User as UserModel, SystemConfig
has_admin = db.query(UserModel).filter_by(is_admin=True).first()
if not has_admin:
first_user = db.query(UserModel).order_by(UserModel.id).first()
if first_user:
first_user.is_admin = True
db.commit()
# Seed system_config from env if not already set
if not db.query(SystemConfig).filter_by(key="allow_registration").first():
db.add(SystemConfig(
key="allow_registration",
value="true" if settings.allow_registration else "false",
))
db.commit()
# Apply user's saved concurrent download limit on startup
from .models import UserSettings
first_user_settings = db.query(UserSettings).first()
if first_user_settings:
ytdlp_service.set_max_concurrent(first_user_settings.max_concurrent_downloads)
ytdlp_service.set_cookies_browser(first_user_settings.cookies_browser or "")
finally:
db.close()
# Backfill descriptions for videos that don't have them yet (runs in background)
import threading
from .routers.channels import _enrich_missing_task
threading.Thread(target=_enrich_missing_task, args=(10,), daemon=True).start()
@app.get("/api/health")
def health():
return {"status": "ok"}

221
backend/models.py Normal file
View File

@@ -0,0 +1,221 @@
from datetime import datetime
from sqlalchemy import (
Boolean, Column, DateTime, Float, ForeignKey,
Integer, String, Text, UniqueConstraint,
)
from .database import Base
class User(Base):
__tablename__ = "users"
id = Column(Integer, primary_key=True, index=True)
username = Column(String, unique=True, nullable=False, index=True)
email = Column(String, unique=True, nullable=False, index=True)
hashed_password = Column(String, nullable=False)
is_admin = Column(Boolean, default=False)
created_at = Column(DateTime, default=datetime.utcnow)
class Channel(Base):
__tablename__ = "channels"
id = Column(Integer, primary_key=True, index=True)
youtube_channel_id = Column(String, unique=True, nullable=False, index=True)
name = Column(String, nullable=False)
description = Column(Text)
thumbnail_url = Column(String)
banner_url = Column(String)
crawled_at = Column(DateTime)
subscriber_count = Column(Integer)
class UserChannel(Base):
__tablename__ = "user_channels"
__table_args__ = (UniqueConstraint("user_id", "channel_id"),)
id = Column(Integer, primary_key=True, index=True)
user_id = Column(Integer, ForeignKey("users.id", ondelete="CASCADE"), nullable=False)
channel_id = Column(Integer, ForeignKey("channels.id", ondelete="CASCADE"), nullable=False)
status = Column(String, default="followed") # followed / dismissed / pending
added_at = Column(DateTime, default=datetime.utcnow)
auto_download = Column(Boolean, default=None) # None = use global, True/False = override
last_seen_at = Column(DateTime, default=None)
muted_until = Column(DateTime, default=None)
notes = Column(Text, default="")
class Video(Base):
__tablename__ = "videos"
id = Column(Integer, primary_key=True, index=True)
youtube_video_id = Column(String, unique=True, nullable=False, index=True)
channel_id = Column(Integer, ForeignKey("channels.id", ondelete="SET NULL"), nullable=True)
title = Column(String, nullable=False)
description = Column(Text)
thumbnail_url = Column(String)
duration_seconds = Column(Integer)
published_at = Column(DateTime)
indexed_at = Column(DateTime, default=datetime.utcnow)
tags = Column(Text) # JSON array string
category = Column(String)
chapters = Column(Text) # JSON array of {start_time, end_time, title}
class UserVideo(Base):
__tablename__ = "user_videos"
__table_args__ = (UniqueConstraint("user_id", "video_id"),)
id = Column(Integer, primary_key=True, index=True)
user_id = Column(Integer, ForeignKey("users.id", ondelete="CASCADE"), nullable=False)
video_id = Column(Integer, ForeignKey("videos.id", ondelete="CASCADE"), nullable=False)
watched = Column(Boolean, default=False)
watch_progress_seconds = Column(Integer, default=0)
completion_percent = Column(Float, default=None) # 0100, set when video ends/navigates away
rewatch_count = Column(Integer, default=0) # incremented each time a completed video is replayed
queued = Column(Boolean, default=False)
downloaded = Column(Boolean, default=False)
liked = Column(Boolean, default=False)
rating = Column(Integer, default=None) # NULL=unrated, 1=thumbs up, -1=thumbs down
downloaded_at = Column(DateTime)
liked_at = Column(DateTime)
last_watched_at = Column(DateTime)
class Download(Base):
__tablename__ = "downloads"
id = Column(Integer, primary_key=True, index=True)
user_id = Column(Integer, ForeignKey("users.id", ondelete="CASCADE"), nullable=False)
video_id = Column(Integer, ForeignKey("videos.id", ondelete="CASCADE"), nullable=False)
status = Column(String, default="pending") # pending / downloading / complete / failed
progress_percent = Column(Float, default=0.0)
file_path = Column(String)
resolution = Column(String) # e.g. "1080p", "720p"
created_at = Column(DateTime, default=datetime.utcnow)
completed_at = Column(DateTime)
error_message = Column(Text)
pending_delete_at = Column(DateTime, default=None)
class UserSettings(Base):
__tablename__ = "user_settings"
id = Column(Integer, primary_key=True, index=True)
user_id = Column(Integer, ForeignKey("users.id", ondelete="CASCADE"), nullable=False, unique=True)
preferred_quality = Column(String, default="best") # best / 1080p / 720p / 480p / 360p
max_concurrent_downloads = Column(Integer, default=3) # 15
hide_watched_from_feed = Column(Boolean, default=False)
mark_watched_at_percent = Column(Integer, default=90) # 50100
auto_download_on_sync = Column(Boolean, default=False)
cookies_browser = Column(String, default="") # chrome / firefox / etc., "" = disabled
theater_mode = Column(Boolean, default=False)
discovery_regions = Column(String, default="US,SE") # comma-separated ISO country codes
calm_mode = Column(Boolean, default=False)
hide_subscriber_counts = Column(Boolean, default=False)
autoplay_enabled = Column(Boolean, default=False)
feed_weight_recency = Column(Float, default=5.0) # 010
feed_weight_affinity = Column(Float, default=5.0) # 010
feed_weight_channel = Column(Float, default=5.0) # 010
class DiscoveryQueue(Base):
__tablename__ = "discovery_queue"
__table_args__ = (UniqueConstraint("user_id", "channel_id"),)
id = Column(Integer, primary_key=True, index=True)
user_id = Column(Integer, ForeignKey("users.id", ondelete="CASCADE"), nullable=False)
channel_id = Column(Integer, ForeignKey("channels.id", ondelete="CASCADE"), nullable=False)
score = Column(Float, default=0.0)
source = Column(String) # search / community / category / liked
seen = Column(Boolean, default=False)
preview_json = Column(Text) # JSON: [{thumbnail_url, title}, ...]
created_at = Column(DateTime, default=datetime.utcnow)
class ChannelGroup(Base):
__tablename__ = "channel_groups"
id = Column(Integer, primary_key=True, index=True)
user_id = Column(Integer, ForeignKey("users.id", ondelete="CASCADE"), nullable=False)
name = Column(String, nullable=False)
created_at = Column(DateTime, default=datetime.utcnow)
class ChannelGroupMember(Base):
__tablename__ = "channel_group_members"
__table_args__ = (UniqueConstraint("group_id", "channel_id"),)
id = Column(Integer, primary_key=True, index=True)
group_id = Column(Integer, ForeignKey("channel_groups.id", ondelete="CASCADE"), nullable=False)
channel_id = Column(Integer, ForeignKey("channels.id", ondelete="CASCADE"), nullable=False)
class UserTagAffinity(Base):
"""Per-user taste signal: how much the user engages with a given tag or category."""
__tablename__ = "user_tag_affinity"
__table_args__ = (UniqueConstraint("user_id", "tag"),)
id = Column(Integer, primary_key=True, index=True)
user_id = Column(Integer, ForeignKey("users.id", ondelete="CASCADE"), nullable=False)
tag = Column(String, nullable=False, index=True)
score = Column(Float, default=0.0)
updated_at = Column(DateTime, default=datetime.utcnow)
class Collection(Base):
__tablename__ = "collections"
id = Column(Integer, primary_key=True, index=True)
user_id = Column(Integer, ForeignKey("users.id", ondelete="CASCADE"), nullable=False)
name = Column(String, nullable=False)
created_at = Column(DateTime, default=datetime.utcnow)
class CollectionItem(Base):
__tablename__ = "collection_items"
__table_args__ = (UniqueConstraint("collection_id", "video_id"),)
id = Column(Integer, primary_key=True, index=True)
collection_id = Column(Integer, ForeignKey("collections.id", ondelete="CASCADE"), nullable=False)
video_id = Column(Integer, ForeignKey("videos.id", ondelete="CASCADE"), nullable=False)
added_at = Column(DateTime, default=datetime.utcnow)
class GraphEdge(Base):
__tablename__ = "graph_edges"
__table_args__ = (UniqueConstraint("from_channel_id", "to_channel_id"),)
id = Column(Integer, primary_key=True, index=True)
from_channel_id = Column(Integer, ForeignKey("channels.id", ondelete="CASCADE"), nullable=False)
to_channel_id = Column(Integer, ForeignKey("channels.id", ondelete="CASCADE"), nullable=False)
mention_count = Column(Integer, default=1)
last_seen = Column(DateTime, default=datetime.utcnow)
class SystemConfig(Base):
__tablename__ = "system_config"
key = Column(String, primary_key=True)
value = Column(String, nullable=False)
class SearchHistory(Base):
__tablename__ = "search_history"
id = Column(Integer, primary_key=True, index=True)
user_id = Column(Integer, ForeignKey("users.id", ondelete="CASCADE"), nullable=False)
query = Column(String, nullable=False)
searched_at = Column(DateTime, default=datetime.utcnow)
class VideoBookmark(Base):
__tablename__ = "video_bookmarks"
id = Column(Integer, primary_key=True, index=True)
user_id = Column(Integer, ForeignKey("users.id", ondelete="CASCADE"), nullable=False)
video_id = Column(Integer, ForeignKey("videos.id", ondelete="CASCADE"), nullable=False)
timestamp_seconds = Column(Integer, nullable=False)
note = Column(Text, default="")
source = Column(String, default="manual") # manual | auto
created_at = Column(DateTime, default=datetime.utcnow)

10
backend/requirements.txt Normal file
View File

@@ -0,0 +1,10 @@
fastapi==0.115.5
uvicorn[standard]==0.32.1
sqlalchemy==2.0.36
python-jose[cryptography]==3.3.0
bcrypt==5.0.0
python-multipart==0.0.12
pydantic-settings==2.6.1
yt-dlp>=2024.11.18
httpx==0.27.2
aiofiles==24.1.0

View File

86
backend/routers/admin.py Normal file
View File

@@ -0,0 +1,86 @@
from datetime import datetime
from typing import Optional
from fastapi import APIRouter, Depends, HTTPException
from pydantic import BaseModel
from sqlalchemy.orm import Session
from ..auth_utils import get_current_user
from ..database import get_db
from ..models import SystemConfig, User
router = APIRouter()
def _require_admin(current_user: User = Depends(get_current_user)) -> User:
if not current_user.is_admin:
raise HTTPException(status_code=403, detail="Admin only")
return current_user
class UserOut(BaseModel):
id: int
username: str
email: str
is_admin: bool
created_at: Optional[datetime]
model_config = {"from_attributes": True}
class ConfigOut(BaseModel):
allow_registration: bool
class ConfigPatch(BaseModel):
allow_registration: Optional[bool] = None
@router.get("/users", response_model=list[UserOut])
def list_users(
db: Session = Depends(get_db),
_: User = Depends(_require_admin),
):
return db.query(User).order_by(User.id).all()
@router.delete("/users/{user_id}", status_code=204)
def delete_user(
user_id: int,
db: Session = Depends(get_db),
current_user: User = Depends(_require_admin),
):
if user_id == current_user.id:
raise HTTPException(status_code=400, detail="Cannot delete your own account")
user = db.query(User).filter_by(id=user_id).first()
if not user:
raise HTTPException(status_code=404, detail="User not found")
db.delete(user)
db.commit()
@router.get("/config", response_model=ConfigOut)
def get_config(
db: Session = Depends(get_db),
_: User = Depends(_require_admin),
):
row = db.query(SystemConfig).filter_by(key="allow_registration").first()
return ConfigOut(allow_registration=row.value == "true" if row else True)
@router.patch("/config", response_model=ConfigOut)
def update_config(
body: ConfigPatch,
db: Session = Depends(get_db),
_: User = Depends(_require_admin),
):
if body.allow_registration is not None:
row = db.query(SystemConfig).filter_by(key="allow_registration").first()
if row:
row.value = "true" if body.allow_registration else "false"
else:
db.add(SystemConfig(key="allow_registration",
value="true" if body.allow_registration else "false"))
db.commit()
row = db.query(SystemConfig).filter_by(key="allow_registration").first()
return ConfigOut(allow_registration=row.value == "true" if row else True)

76
backend/routers/auth.py Normal file
View File

@@ -0,0 +1,76 @@
from fastapi import APIRouter, Depends, HTTPException, status
from fastapi.security import OAuth2PasswordRequestForm
from pydantic import BaseModel, EmailStr
from sqlalchemy.orm import Session
from ..auth_utils import create_access_token, get_current_user, hash_password, verify_password
from ..database import get_db
from ..models import SystemConfig, User
router = APIRouter()
class RegisterRequest(BaseModel):
username: str
email: str
password: str
class TokenResponse(BaseModel):
access_token: str
token_type: str = "bearer"
class UserResponse(BaseModel):
id: int
username: str
email: str
is_admin: bool = False
model_config = {"from_attributes": True}
@router.post("/register", response_model=TokenResponse, status_code=status.HTTP_201_CREATED)
def register(body: RegisterRequest, db: Session = Depends(get_db)):
# Allow registration if no users exist yet (bootstrap), otherwise check config
has_users = db.query(User).first() is not None
if has_users:
cfg = db.query(SystemConfig).filter_by(key="allow_registration").first()
if cfg and cfg.value != "true":
raise HTTPException(status_code=403, detail="Registration is disabled")
if db.query(User).filter(User.username == body.username).first():
raise HTTPException(status_code=400, detail="Username already taken")
if db.query(User).filter(User.email == body.email).first():
raise HTTPException(status_code=400, detail="Email already registered")
is_first = not has_users
user = User(
username=body.username,
email=body.email,
hashed_password=hash_password(body.password),
is_admin=is_first,
)
db.add(user)
db.commit()
db.refresh(user)
token = create_access_token({"sub": str(user.id)})
return TokenResponse(access_token=token)
@router.post("/login", response_model=TokenResponse)
def login(form: OAuth2PasswordRequestForm = Depends(), db: Session = Depends(get_db)):
user = db.query(User).filter(User.username == form.username).first()
if not user or not verify_password(form.password, user.hashed_password):
raise HTTPException(
status_code=status.HTTP_401_UNAUTHORIZED,
detail="Incorrect username or password",
)
token = create_access_token({"sub": str(user.id)})
return TokenResponse(access_token=token)
@router.get("/me", response_model=UserResponse)
def me(current_user: User = Depends(get_current_user)):
return current_user

712
backend/routers/channels.py Normal file
View File

@@ -0,0 +1,712 @@
import json
from datetime import datetime, timedelta
from typing import Optional
from fastapi import APIRouter, BackgroundTasks, Depends, HTTPException, status
from pydantic import BaseModel
from sqlalchemy.orm import Session
from sqlalchemy import text
from ..auth_utils import get_current_user
from ..database import get_db
from ..models import Channel, ChannelGroup, ChannelGroupMember, Download, User, UserChannel, UserSettings, UserVideo, Video
from ..services import ytdlp
router = APIRouter()
class ChannelOut(BaseModel):
id: int
youtube_channel_id: str
name: str
description: Optional[str]
thumbnail_url: Optional[str]
banner_url: Optional[str]
crawled_at: Optional[datetime]
status: Optional[str]
auto_download: Optional[bool] = None
subscriber_count: Optional[int] = None
video_count: int = 0
unwatched_count: int = 0
watched_count: int = 0
downloaded_count: int = 0
last_published_at: Optional[datetime] = None
new_count: int = 0
latest_video_id: Optional[str] = None
latest_video_title: Optional[str] = None
muted_until: Optional[datetime] = None
upload_frequency_days: Optional[float] = None
notes: Optional[str] = ""
model_config = {"from_attributes": True}
class ChannelGroupOut(BaseModel):
id: int
name: str
channel_ids: list[int] = []
model_config = {"from_attributes": True}
class VideoOut(BaseModel):
id: int
youtube_video_id: str
title: str
thumbnail_url: Optional[str]
duration_seconds: Optional[int]
published_at: Optional[datetime]
channel_id: Optional[int] = None
channel_name: Optional[str] = None
channel_youtube_id: Optional[str] = None
is_downloaded: bool = False
is_watched: bool = False
queued: bool = False
model_config = {"from_attributes": True}
_CHANNEL_STATS_SELECT = """
SELECT c.*, uc.status, uc.auto_download, uc.muted_until, uc.notes,
(SELECT COUNT(*) FROM videos WHERE channel_id = c.id) AS video_count,
(SELECT MAX(v.published_at) FROM videos v WHERE v.channel_id = c.id) AS last_published_at,
(SELECT COUNT(*) FROM videos v
LEFT JOIN user_videos uv ON v.id = uv.video_id AND uv.user_id = :user_id
WHERE v.channel_id = c.id AND COALESCE(uv.watched, 0) = 0) AS unwatched_count,
(SELECT COUNT(*) FROM videos v
JOIN user_videos uv ON v.id = uv.video_id AND uv.user_id = :user_id
WHERE v.channel_id = c.id AND uv.watched = 1) AS watched_count,
(SELECT COUNT(*) FROM videos v
JOIN user_videos uv ON v.id = uv.video_id AND uv.user_id = :user_id
WHERE v.channel_id = c.id AND uv.downloaded = 1) AS downloaded_count,
(SELECT COUNT(*) FROM videos v
WHERE v.channel_id = c.id
AND (uc.last_seen_at IS NULL OR v.indexed_at > uc.last_seen_at)) AS new_count,
(SELECT v.youtube_video_id FROM videos v
WHERE v.channel_id = c.id ORDER BY v.published_at DESC LIMIT 1) AS latest_video_id,
(SELECT v.title FROM videos v
WHERE v.channel_id = c.id ORDER BY v.published_at DESC LIMIT 1) AS latest_video_title,
(SELECT
CASE WHEN COUNT(*) < 2 THEN NULL
ELSE CAST((julianday(MAX(sub.published_at)) - julianday(MIN(sub.published_at))) AS REAL) / (COUNT(*) - 1)
END
FROM (SELECT published_at FROM videos WHERE channel_id = c.id AND published_at IS NOT NULL ORDER BY published_at DESC LIMIT 15) sub
) AS upload_frequency_days
FROM channels c
JOIN user_channels uc ON c.id = uc.channel_id
WHERE uc.user_id = :user_id AND uc.status = 'followed'
"""
def _get_channel_or_404(db: Session, channel_id: int) -> Channel:
c = db.query(Channel).filter(Channel.id == channel_id).first()
if not c:
raise HTTPException(status_code=404, detail="Channel not found")
return c
def _index_channel_task(channel_id: int, user_id: int):
from ..database import SessionLocal
db = SessionLocal()
try:
channel = db.query(Channel).filter_by(id=channel_id).first()
if not channel:
return
result = ytdlp.fetch_channel_metadata(channel.youtube_channel_id)
if not result:
return
ch_data = result.get("channel", {})
if ch_data:
for k, v in ch_data.items():
if hasattr(channel, k) and v is not None and v != "":
setattr(channel, k, v)
channel.crawled_at = datetime.utcnow()
db.merge(channel)
new_video_ids = []
for vdata in result.get("videos", []):
yt_id = vdata.get("youtube_video_id")
if not yt_id:
continue
existing = db.query(Video).filter_by(youtube_video_id=yt_id).first()
if not existing:
new_video = Video(
youtube_video_id=yt_id,
channel_id=channel.id,
title=vdata.get("title", ""),
description=vdata.get("description"),
thumbnail_url=vdata.get("thumbnail_url"),
duration_seconds=vdata.get("duration_seconds"),
published_at=vdata.get("published_at"),
tags=vdata.get("tags"),
category=vdata.get("category"),
)
db.add(new_video)
db.flush()
new_video_ids.append((yt_id, new_video.id))
else:
# Backfill missing metadata on existing videos
if existing.published_at is None and vdata.get("published_at"):
existing.published_at = vdata["published_at"]
if not existing.title and vdata.get("title"):
existing.title = vdata["title"]
if not existing.thumbnail_url and vdata.get("thumbnail_url"):
existing.thumbnail_url = vdata["thumbnail_url"]
if not existing.duration_seconds and vdata.get("duration_seconds"):
existing.duration_seconds = vdata["duration_seconds"]
if not existing.description and vdata.get("description"):
existing.description = vdata["description"]
db.commit()
# Auto-download new videos if setting says to
if new_video_ids and user_id:
uc = db.query(UserChannel).filter_by(user_id=user_id, channel_id=channel.id).first()
user_settings = db.query(UserSettings).filter_by(user_id=user_id).first()
global_auto = user_settings.auto_download_on_sync if user_settings else False
channel_auto = uc.auto_download if uc and uc.auto_download is not None else global_auto
if channel_auto:
quality = user_settings.preferred_quality if user_settings else "best"
from ..routers.downloads import _on_progress, _on_complete, _on_error
for yt_id, vid_id in new_video_ids:
existing_dl = db.query(Download).filter_by(
user_id=user_id, video_id=vid_id
).filter(Download.status.in_(["pending", "downloading", "complete"])).first()
if not existing_dl:
dl = Download(user_id=user_id, video_id=vid_id, status="pending")
db.add(dl)
db.flush()
import threading
t = threading.Thread(
target=ytdlp.start_download,
args=(yt_id, dl.id, _on_progress, _on_complete, _on_error, quality),
daemon=True,
)
t.start()
db.commit()
except Exception:
db.rollback()
finally:
db.close()
def _discovery_task(user_id: int):
from ..database import SessionLocal
from ..services.discovery import run_full_discovery
db = SessionLocal()
try:
run_full_discovery(db, user_id)
except Exception:
pass
finally:
db.close()
def _enrich_missing_task(limit: int = 20):
"""Fetch full metadata for videos that are missing a description."""
from ..database import SessionLocal
db = SessionLocal()
try:
rows = db.execute(
text("""
SELECT v.id, v.youtube_video_id FROM videos v
WHERE v.description IS NULL
ORDER BY
-- prioritise: followed-channel videos first, then discovery queue, then rest
(EXISTS (SELECT 1 FROM user_channels uc
WHERE uc.channel_id = v.channel_id AND uc.status = 'followed')) DESC,
(EXISTS (SELECT 1 FROM discovery_queue dq
WHERE dq.channel_id = v.channel_id)) DESC,
v.id DESC
LIMIT :limit
"""),
{"limit": limit},
).mappings().all()
for row in rows:
try:
meta = ytdlp.fetch_video_metadata(row["youtube_video_id"])
if meta:
vid = db.query(Video).filter_by(id=row["id"]).first()
if vid:
if meta.get("description") is not None:
vid.description = meta["description"] or ""
if not vid.tags and meta.get("tags"):
vid.tags = meta["tags"]
if not vid.category and meta.get("category"):
vid.category = meta["category"]
if not vid.chapters and meta.get("chapters"):
vid.chapters = meta["chapters"]
db.commit()
except Exception:
db.rollback()
finally:
db.close()
@router.get("/feed", response_model=list[VideoOut])
def channel_feed(
limit: int = 24,
offset: int = 0,
db: Session = Depends(get_db),
current_user: User = Depends(get_current_user),
):
rows = db.execute(
text("""
SELECT v.id, v.youtube_video_id, v.title, v.thumbnail_url,
v.duration_seconds, v.published_at,
c.id AS channel_id, c.name AS channel_name, c.youtube_channel_id AS channel_youtube_id,
COALESCE(uv.downloaded, 0) AS is_downloaded,
COALESCE(uv.watched, 0) AS is_watched,
COALESCE(uv.queued, 0) AS queued
FROM videos v
JOIN channels c ON v.channel_id = c.id
JOIN user_channels uc ON c.id = uc.channel_id AND uc.user_id = :user_id AND uc.status = 'followed'
LEFT JOIN user_videos uv ON v.id = uv.video_id AND uv.user_id = :user_id
ORDER BY v.published_at DESC
LIMIT :limit OFFSET :offset
"""),
{"user_id": current_user.id, "limit": limit, "offset": offset},
).mappings().all()
return [VideoOut(**dict(r)) for r in rows]
@router.post("/sync-all", status_code=202)
def sync_all_channels(
background_tasks: BackgroundTasks,
db: Session = Depends(get_db),
current_user: User = Depends(get_current_user),
):
channels = db.execute(
text("""
SELECT c.id FROM channels c
JOIN user_channels uc ON c.id = uc.channel_id
WHERE uc.user_id = :uid AND uc.status = 'followed'
"""),
{"uid": current_user.id},
).mappings().all()
for row in channels:
background_tasks.add_task(_index_channel_task, row["id"], current_user.id)
if channels:
background_tasks.add_task(_discovery_task, current_user.id)
background_tasks.add_task(_enrich_missing_task, 20)
return {"indexing": len(channels)}
@router.post("/mark-seen", status_code=204)
def mark_channels_seen(
db: Session = Depends(get_db),
current_user: User = Depends(get_current_user),
):
db.execute(
text("UPDATE user_channels SET last_seen_at = :now WHERE user_id = :uid AND status = 'followed'"),
{"now": datetime.utcnow(), "uid": current_user.id},
)
db.commit()
@router.get("", response_model=list[ChannelOut])
def list_channels(
db: Session = Depends(get_db),
current_user: User = Depends(get_current_user),
):
rows = db.execute(
text(_CHANNEL_STATS_SELECT + "ORDER BY last_published_at DESC"),
{"user_id": current_user.id},
).mappings().all()
return [ChannelOut(**dict(r)) for r in rows]
# ── Channel Groups (must be before /{channel_id} to avoid route shadowing) ───
@router.get("/groups", response_model=list[ChannelGroupOut])
def list_groups(
db: Session = Depends(get_db),
current_user: User = Depends(get_current_user),
):
groups = db.query(ChannelGroup).filter_by(user_id=current_user.id).all()
result = []
for g in groups:
members = db.query(ChannelGroupMember).filter_by(group_id=g.id).all()
result.append(ChannelGroupOut(id=g.id, name=g.name, channel_ids=[m.channel_id for m in members]))
return result
@router.post("/groups", response_model=ChannelGroupOut, status_code=201)
def create_group(
body: dict,
db: Session = Depends(get_db),
current_user: User = Depends(get_current_user),
):
name = (body.get("name") or "").strip()
if not name:
raise HTTPException(status_code=400, detail="name required")
g = ChannelGroup(user_id=current_user.id, name=name)
db.add(g)
db.commit()
db.refresh(g)
return ChannelGroupOut(id=g.id, name=g.name, channel_ids=[])
@router.delete("/groups/{group_id}", status_code=204)
def delete_group(
group_id: int,
db: Session = Depends(get_db),
current_user: User = Depends(get_current_user),
):
g = db.query(ChannelGroup).filter_by(id=group_id, user_id=current_user.id).first()
if not g:
raise HTTPException(status_code=404, detail="Group not found")
db.delete(g)
db.commit()
@router.patch("/groups/{group_id}", response_model=ChannelGroupOut)
def rename_group(
group_id: int,
body: dict,
db: Session = Depends(get_db),
current_user: User = Depends(get_current_user),
):
g = db.query(ChannelGroup).filter_by(id=group_id, user_id=current_user.id).first()
if not g:
raise HTTPException(status_code=404, detail="Group not found")
name = (body.get("name") or "").strip()
if name:
g.name = name
db.commit()
members = db.query(ChannelGroupMember).filter_by(group_id=g.id).all()
return ChannelGroupOut(id=g.id, name=g.name, channel_ids=[m.channel_id for m in members])
@router.post("/groups/{group_id}/channels/{channel_id}", status_code=204)
def add_channel_to_group(
group_id: int,
channel_id: int,
db: Session = Depends(get_db),
current_user: User = Depends(get_current_user),
):
g = db.query(ChannelGroup).filter_by(id=group_id, user_id=current_user.id).first()
if not g:
raise HTTPException(status_code=404, detail="Group not found")
existing = db.query(ChannelGroupMember).filter_by(group_id=group_id, channel_id=channel_id).first()
if not existing:
db.add(ChannelGroupMember(group_id=group_id, channel_id=channel_id))
db.commit()
@router.delete("/groups/{group_id}/channels/{channel_id}", status_code=204)
def remove_channel_from_group_route(
group_id: int,
channel_id: int,
db: Session = Depends(get_db),
current_user: User = Depends(get_current_user),
):
m = db.query(ChannelGroupMember).filter_by(group_id=group_id, channel_id=channel_id).first()
if m:
db.delete(m)
db.commit()
class BulkChannelBody(BaseModel):
channel_ids: list[int]
action: str # "mute" | "unmute" | "unfollow"
@router.post("/bulk-action", status_code=200)
def bulk_channel_action(
body: BulkChannelBody,
db: Session = Depends(get_db),
current_user: User = Depends(get_current_user),
):
if not body.channel_ids:
return {"ok": True}
placeholders = ",".join(str(int(i)) for i in body.channel_ids)
if body.action == "mute":
db.execute(
text(f"""
UPDATE user_channels SET muted_until = :until
WHERE user_id = :user_id AND channel_id IN ({placeholders})
"""),
{"until": datetime.utcnow() + timedelta(days=30), "user_id": current_user.id},
)
elif body.action == "unmute":
db.execute(
text(f"UPDATE user_channels SET muted_until = NULL WHERE user_id = :user_id AND channel_id IN ({placeholders})"),
{"user_id": current_user.id},
)
elif body.action == "unfollow":
db.execute(
text(f"DELETE FROM user_channels WHERE user_id = :user_id AND channel_id IN ({placeholders})"),
{"user_id": current_user.id},
)
db.commit()
return {"ok": True}
@router.get("/{channel_id}", response_model=ChannelOut)
def get_channel(
channel_id: int,
db: Session = Depends(get_db),
current_user: User = Depends(get_current_user),
):
row = db.execute(
text("""
SELECT c.*, uc.status, uc.auto_download, uc.muted_until,
(SELECT COUNT(*) FROM videos WHERE channel_id = c.id) AS video_count,
(SELECT MAX(v.published_at) FROM videos v WHERE v.channel_id = c.id) AS last_published_at,
(SELECT COUNT(*) FROM videos v
LEFT JOIN user_videos uv ON v.id = uv.video_id AND uv.user_id = :user_id
WHERE v.channel_id = c.id AND COALESCE(uv.watched, 0) = 0) AS unwatched_count,
(SELECT COUNT(*) FROM videos v
JOIN user_videos uv ON v.id = uv.video_id AND uv.user_id = :user_id
WHERE v.channel_id = c.id AND uv.watched = 1) AS watched_count,
(SELECT COUNT(*) FROM videos v
JOIN user_videos uv ON v.id = uv.video_id AND uv.user_id = :user_id
WHERE v.channel_id = c.id AND uv.downloaded = 1) AS downloaded_count,
0 AS new_count,
(SELECT v.youtube_video_id FROM videos v
WHERE v.channel_id = c.id ORDER BY v.published_at DESC LIMIT 1) AS latest_video_id,
(SELECT v.title FROM videos v
WHERE v.channel_id = c.id ORDER BY v.published_at DESC LIMIT 1) AS latest_video_title
FROM channels c
LEFT JOIN user_channels uc ON c.id = uc.channel_id AND uc.user_id = :user_id
WHERE c.id = :channel_id
"""),
{"user_id": current_user.id, "channel_id": channel_id},
).mappings().first()
if not row:
raise HTTPException(status_code=404, detail="Channel not found")
return ChannelOut(**dict(row))
@router.get("/{channel_id}/videos", response_model=list[VideoOut])
def get_channel_videos(
channel_id: int,
db: Session = Depends(get_db),
current_user: User = Depends(get_current_user),
):
_get_channel_or_404(db, channel_id)
rows = db.execute(
text("""
SELECT v.id, v.youtube_video_id, v.title, v.thumbnail_url,
v.duration_seconds, v.published_at,
COALESCE(uv.downloaded, 0) AS is_downloaded,
COALESCE(uv.watched, 0) AS is_watched
FROM videos v
LEFT JOIN user_videos uv ON v.id = uv.video_id AND uv.user_id = :user_id
WHERE v.channel_id = :channel_id
ORDER BY v.published_at DESC
"""),
{"user_id": current_user.id, "channel_id": channel_id},
).mappings().all()
return [VideoOut(**dict(r)) for r in rows]
@router.post("/{channel_id}/follow", status_code=status.HTTP_204_NO_CONTENT)
def follow_channel(
channel_id: int,
db: Session = Depends(get_db),
current_user: User = Depends(get_current_user),
):
_get_channel_or_404(db, channel_id)
uc = db.query(UserChannel).filter_by(user_id=current_user.id, channel_id=channel_id).first()
if uc:
uc.status = "followed"
else:
db.add(UserChannel(user_id=current_user.id, channel_id=channel_id, status="followed"))
db.commit()
@router.delete("/{channel_id}/follow", status_code=status.HTTP_204_NO_CONTENT)
def unfollow_channel(
channel_id: int,
db: Session = Depends(get_db),
current_user: User = Depends(get_current_user),
):
uc = db.query(UserChannel).filter_by(user_id=current_user.id, channel_id=channel_id).first()
if uc:
db.delete(uc)
db.commit()
@router.patch("/{channel_id}/auto-download", status_code=200)
def set_channel_auto_download(
channel_id: int,
body: dict,
db: Session = Depends(get_db),
current_user: User = Depends(get_current_user),
):
uc = db.query(UserChannel).filter_by(user_id=current_user.id, channel_id=channel_id).first()
if not uc:
raise HTTPException(status_code=404, detail="Not following this channel")
value = body.get("auto_download") # True / False / None
uc.auto_download = value
db.commit()
return {"auto_download": uc.auto_download}
@router.post("/{channel_id}/index", status_code=status.HTTP_202_ACCEPTED)
def index_channel(
channel_id: int,
background_tasks: BackgroundTasks,
db: Session = Depends(get_db),
current_user: User = Depends(get_current_user),
):
_get_channel_or_404(db, channel_id)
background_tasks.add_task(_index_channel_task, channel_id, current_user.id)
return {"detail": "Indexing started"}
@router.post("/follow-bulk", status_code=200)
def follow_bulk(
body: dict,
db: Session = Depends(get_db),
current_user: User = Depends(get_current_user),
):
"""Follow a large list of channel handles/IDs without hitting YouTube.
Creates stub Channel records for unknowns and UserChannel rows immediately.
Metadata (name, thumbnail, videos) fills in when the user hits Sync All.
"""
handles = body.get("handles", [])
if not handles or not isinstance(handles, list):
raise HTTPException(status_code=400, detail="handles list required")
followed = 0
already = 0
created = 0
for handle in handles:
handle = str(handle).strip()
if not handle:
continue
channel = db.query(Channel).filter_by(youtube_channel_id=handle).first()
if not channel:
# Stub — name defaults to handle, filled in on next index
channel = Channel(
youtube_channel_id=handle,
name=handle.lstrip("@"),
)
db.add(channel)
db.flush()
created += 1
uc = db.query(UserChannel).filter_by(
user_id=current_user.id, channel_id=channel.id
).first()
if uc:
if uc.status != "followed":
uc.status = "followed"
followed += 1
else:
already += 1
else:
db.add(UserChannel(
user_id=current_user.id,
channel_id=channel.id,
status="followed",
))
followed += 1
db.commit()
return {"followed": followed, "already_following": already, "new_channels": created}
@router.patch("/{channel_id}/notes", status_code=200)
def update_channel_notes(
channel_id: int,
body: dict,
db: Session = Depends(get_db),
current_user: User = Depends(get_current_user),
):
uc = db.query(UserChannel).filter_by(user_id=current_user.id, channel_id=channel_id).first()
if not uc:
raise HTTPException(status_code=404, detail="Not following this channel")
uc.notes = body.get("notes", "") or ""
db.commit()
return {"ok": True}
@router.post("/{channel_id}/mute", status_code=204)
def mute_channel(
channel_id: int,
db: Session = Depends(get_db),
current_user: User = Depends(get_current_user),
):
uc = db.query(UserChannel).filter_by(user_id=current_user.id, channel_id=channel_id).first()
if not uc:
raise HTTPException(status_code=404, detail="Not following this channel")
uc.muted_until = datetime.utcnow() + timedelta(days=30)
db.commit()
@router.delete("/{channel_id}/mute", status_code=204)
def unmute_channel(
channel_id: int,
db: Session = Depends(get_db),
current_user: User = Depends(get_current_user),
):
uc = db.query(UserChannel).filter_by(user_id=current_user.id, channel_id=channel_id).first()
if uc:
uc.muted_until = None
db.commit()
@router.post("/follow-by-url", status_code=status.HTTP_201_CREATED)
def follow_by_url(
body: dict,
background_tasks: BackgroundTasks,
db: Session = Depends(get_db),
current_user: User = Depends(get_current_user),
):
yt_channel_id = body.get("youtube_channel_id") or body.get("channel_id")
if not yt_channel_id:
raise HTTPException(status_code=400, detail="youtube_channel_id required")
channel = db.query(Channel).filter_by(youtube_channel_id=yt_channel_id).first()
if not channel:
meta = ytdlp.fetch_channel_metadata(yt_channel_id, max_videos=30)
if not meta or not meta.get("channel"):
raise HTTPException(status_code=404, detail="Channel not found on YouTube")
ch_data = meta["channel"]
channel = Channel(**{k: v for k, v in ch_data.items() if hasattr(Channel, k)})
channel.crawled_at = datetime.utcnow()
db.add(channel)
db.flush()
for vdata in meta.get("videos", []):
yt_id = vdata.get("youtube_video_id")
if not yt_id:
continue
if not db.query(Video).filter_by(youtube_video_id=yt_id).first():
db.add(Video(
youtube_video_id=yt_id,
channel_id=channel.id,
title=vdata.get("title", ""),
description=vdata.get("description"),
thumbnail_url=vdata.get("thumbnail_url"),
duration_seconds=vdata.get("duration_seconds"),
published_at=vdata.get("published_at"),
tags=vdata.get("tags"),
category=vdata.get("category"),
))
db.commit()
db.refresh(channel)
uc = db.query(UserChannel).filter_by(user_id=current_user.id, channel_id=channel.id).first()
if uc:
uc.status = "followed"
else:
db.add(UserChannel(user_id=current_user.id, channel_id=channel.id, status="followed"))
db.commit()
background_tasks.add_task(_discovery_task, current_user.id)
return {"channel_id": channel.id, "name": channel.name}

View File

@@ -0,0 +1,178 @@
from datetime import datetime
from typing import Optional
from fastapi import APIRouter, Depends, HTTPException
from pydantic import BaseModel
from sqlalchemy.orm import Session
from sqlalchemy import text
from ..auth_utils import get_current_user
from ..database import get_db
from ..models import Collection, CollectionItem, User, Video
router = APIRouter()
class CollectionOut(BaseModel):
id: int
name: str
created_at: datetime
video_count: int = 0
thumbnails: list[str] = []
model_config = {"from_attributes": True}
class CollectionCreate(BaseModel):
name: str
@router.get("", response_model=list[CollectionOut])
def list_collections(
db: Session = Depends(get_db),
current_user: User = Depends(get_current_user),
):
rows = db.execute(
text("""
SELECT c.id, c.name, c.created_at,
COUNT(ci.id) AS video_count
FROM collections c
LEFT JOIN collection_items ci ON c.id = ci.collection_id
WHERE c.user_id = :uid
GROUP BY c.id
ORDER BY c.created_at DESC
"""),
{"uid": current_user.id},
).mappings().all()
result = []
for row in rows:
# Grab up to 4 thumbnails for mosaic preview
thumbs = db.execute(
text("""
SELECT v.thumbnail_url FROM collection_items ci
JOIN videos v ON ci.video_id = v.id
WHERE ci.collection_id = :cid AND v.thumbnail_url IS NOT NULL
ORDER BY ci.added_at DESC LIMIT 4
"""),
{"cid": row["id"]},
).scalars().all()
result.append(CollectionOut(
id=row["id"],
name=row["name"],
created_at=row["created_at"],
video_count=row["video_count"],
thumbnails=list(thumbs),
))
return result
@router.post("", response_model=CollectionOut, status_code=201)
def create_collection(
body: CollectionCreate,
db: Session = Depends(get_db),
current_user: User = Depends(get_current_user),
):
name = (body.name or "").strip()
if not name:
raise HTTPException(status_code=400, detail="Name required")
col = Collection(user_id=current_user.id, name=name)
db.add(col)
db.commit()
db.refresh(col)
return CollectionOut(id=col.id, name=col.name, created_at=col.created_at, video_count=0, thumbnails=[])
@router.patch("/{collection_id}", response_model=CollectionOut)
def rename_collection(
collection_id: int,
body: CollectionCreate,
db: Session = Depends(get_db),
current_user: User = Depends(get_current_user),
):
col = db.query(Collection).filter_by(id=collection_id, user_id=current_user.id).first()
if not col:
raise HTTPException(status_code=404, detail="Not found")
col.name = (body.name or "").strip() or col.name
db.commit()
count = db.query(CollectionItem).filter_by(collection_id=col.id).count()
return CollectionOut(id=col.id, name=col.name, created_at=col.created_at, video_count=count)
@router.delete("/{collection_id}", status_code=204)
def delete_collection(
collection_id: int,
db: Session = Depends(get_db),
current_user: User = Depends(get_current_user),
):
col = db.query(Collection).filter_by(id=collection_id, user_id=current_user.id).first()
if col:
db.delete(col)
db.commit()
@router.get("/{collection_id}/videos")
def get_collection_videos(
collection_id: int,
db: Session = Depends(get_db),
current_user: User = Depends(get_current_user),
):
col = db.query(Collection).filter_by(id=collection_id, user_id=current_user.id).first()
if not col:
raise HTTPException(status_code=404, detail="Not found")
rows = db.execute(
text("""
SELECT v.id, v.youtube_video_id, v.title, v.thumbnail_url,
v.duration_seconds, v.published_at, v.description,
c.id AS channel_id, c.name AS channel_name,
COALESCE(uv.watched, 0) AS is_watched,
COALESCE(uv.downloaded, 0) AS is_downloaded,
COALESCE(uv.liked, 0) AS liked,
COALESCE(uv.queued, 0) AS queued,
ci.added_at
FROM collection_items ci
JOIN videos v ON ci.video_id = v.id
LEFT JOIN channels c ON v.channel_id = c.id
LEFT JOIN user_videos uv ON v.id = uv.video_id AND uv.user_id = :uid
WHERE ci.collection_id = :cid
ORDER BY ci.added_at DESC
"""),
{"uid": current_user.id, "cid": collection_id},
).mappings().all()
return {"collection": {"id": col.id, "name": col.name}, "videos": [dict(r) for r in rows]}
@router.post("/{collection_id}/videos", status_code=201)
def add_to_collection(
collection_id: int,
body: dict,
db: Session = Depends(get_db),
current_user: User = Depends(get_current_user),
):
col = db.query(Collection).filter_by(id=collection_id, user_id=current_user.id).first()
if not col:
raise HTTPException(status_code=404, detail="Not found")
video_id = body.get("video_id")
if not video_id:
raise HTTPException(status_code=400, detail="video_id required")
existing = db.query(CollectionItem).filter_by(collection_id=collection_id, video_id=video_id).first()
if not existing:
db.add(CollectionItem(collection_id=collection_id, video_id=video_id))
db.commit()
return {"ok": True}
@router.delete("/{collection_id}/videos/{video_id}", status_code=204)
def remove_from_collection(
collection_id: int,
video_id: int,
db: Session = Depends(get_db),
current_user: User = Depends(get_current_user),
):
col = db.query(Collection).filter_by(id=collection_id, user_id=current_user.id).first()
if not col:
raise HTTPException(status_code=404, detail="Not found")
item = db.query(CollectionItem).filter_by(collection_id=collection_id, video_id=video_id).first()
if item:
db.delete(item)
db.commit()

View File

@@ -0,0 +1,239 @@
import json
from typing import Optional
from fastapi import APIRouter, BackgroundTasks, Depends, HTTPException
from pydantic import BaseModel
from sqlalchemy.orm import Session
from sqlalchemy import text
from ..auth_utils import get_current_user
from ..database import get_db
from ..models import Channel, DiscoveryQueue, User, UserChannel, UserSettings
from ..services.discovery import run_full_discovery
router = APIRouter()
class PreviewVideo(BaseModel):
thumbnail_url: str
title: str
class DiscoveryItem(BaseModel):
id: int
channel_id: int
youtube_channel_id: str
name: str
description: Optional[str]
thumbnail_url: Optional[str]
subscriber_count: Optional[int] = None
score: float
source: Optional[str]
recent_video_titles: list[str] = []
preview_videos: list[PreviewVideo] = []
model_config = {"from_attributes": True}
@router.get("", response_model=list[DiscoveryItem])
def list_discovery(
offset: int = 0,
limit: int = 50,
db: Session = Depends(get_db),
current_user: User = Depends(get_current_user),
):
rows = db.execute(
text("""
SELECT dq.id, dq.channel_id, dq.score, dq.source, dq.preview_json,
c.youtube_channel_id, c.name, c.description, c.thumbnail_url, c.subscriber_count
FROM discovery_queue dq
JOIN channels c ON dq.channel_id = c.id
WHERE dq.user_id = :user_id AND dq.seen = 0
AND dq.channel_id NOT IN (
SELECT channel_id FROM user_channels
WHERE user_id = :user_id AND status IN ('followed', 'dismissed')
)
ORDER BY dq.score DESC
LIMIT :limit OFFSET :offset
"""),
{"user_id": current_user.id, "limit": limit, "offset": offset},
).mappings().all()
items = []
for row in rows:
row = dict(row)
video_rows = db.execute(
text("""
SELECT title, youtube_video_id FROM videos
WHERE channel_id = :channel_id
ORDER BY published_at DESC
LIMIT 3
"""),
{"channel_id": row["channel_id"]},
).fetchall()
row["recent_video_titles"] = [r[0] for r in video_rows]
if video_rows:
row["preview_videos"] = [
{
"thumbnail_url": f"https://i.ytimg.com/vi/{r[1]}/hqdefault.jpg",
"title": r[0],
}
for r in video_rows
]
else:
try:
row["preview_videos"] = json.loads(row.get("preview_json") or "[]")
except (json.JSONDecodeError, TypeError):
row["preview_videos"] = []
items.append(DiscoveryItem(**row))
return items
@router.post("/{channel_id}/follow", status_code=204)
def follow_discovery(
channel_id: int,
db: Session = Depends(get_db),
current_user: User = Depends(get_current_user),
):
channel = db.query(Channel).filter(Channel.id == channel_id).first()
if not channel:
raise HTTPException(status_code=404, detail="Channel not found")
uc = db.query(UserChannel).filter_by(user_id=current_user.id, channel_id=channel_id).first()
if uc:
uc.status = "followed"
else:
db.add(UserChannel(user_id=current_user.id, channel_id=channel_id, status="followed"))
dq = db.query(DiscoveryQueue).filter_by(user_id=current_user.id, channel_id=channel_id).first()
if dq:
dq.seen = True
db.commit()
@router.post("/{channel_id}/dismiss", status_code=204)
def dismiss_discovery(
channel_id: int,
db: Session = Depends(get_db),
current_user: User = Depends(get_current_user),
):
uc = db.query(UserChannel).filter_by(user_id=current_user.id, channel_id=channel_id).first()
if uc:
uc.status = "dismissed"
else:
db.add(UserChannel(user_id=current_user.id, channel_id=channel_id, status="dismissed"))
dq = db.query(DiscoveryQueue).filter_by(user_id=current_user.id, channel_id=channel_id).first()
if dq:
dq.seen = True
db.commit()
@router.post("/refresh", status_code=202)
def refresh_discovery(
background_tasks: BackgroundTasks,
db: Session = Depends(get_db),
current_user: User = Depends(get_current_user),
):
s = db.query(UserSettings).filter_by(user_id=current_user.id).first()
regions_str = (s.discovery_regions if s and s.discovery_regions else "US,SE")
regions = [r.strip().upper() for r in regions_str.split(",") if r.strip()]
background_tasks.add_task(run_full_discovery, db, current_user.id, regions)
from .channels import _enrich_missing_task
background_tasks.add_task(_enrich_missing_task, 20)
return {"detail": "Discovery refresh started"}
@router.get("/videos", response_model=list[dict])
def discovery_videos(
offset: int = 0,
limit: int = 50,
db: Session = Depends(get_db),
current_user: User = Depends(get_current_user),
):
"""2 recent videos from every channel in the discovery queue that has indexed content.
Ordered by channel score so the best-matched channels surface first.
Channels fall out naturally when dismissed or followed."""
rows = db.execute(
text("""
SELECT * FROM (
SELECT v.id, v.youtube_video_id, v.title, v.thumbnail_url,
v.duration_seconds, v.published_at,
c.id AS channel_id, c.name AS channel_name,
c.youtube_channel_id AS channel_youtube_id,
dq.score,
ROW_NUMBER() OVER (
PARTITION BY c.id ORDER BY v.published_at DESC NULLS LAST
) AS rn
FROM videos v
JOIN channels c ON v.channel_id = c.id
JOIN discovery_queue dq ON c.id = dq.channel_id
WHERE dq.user_id = :user_id AND dq.seen = 0
AND dq.channel_id NOT IN (
SELECT channel_id FROM user_channels
WHERE user_id = :user_id AND status IN ('followed', 'dismissed')
)
)
WHERE rn <= 2
ORDER BY score DESC, rn ASC, RANDOM()
LIMIT :limit OFFSET :offset
"""),
{"user_id": current_user.id, "limit": limit, "offset": offset},
).mappings().all()
return [dict(r) for r in rows]
@router.post("/videos/{youtube_video_id}/dismiss", status_code=204)
def dismiss_discovery_video(
youtube_video_id: str,
db: Session = Depends(get_db),
current_user: User = Depends(get_current_user),
):
"""Dismiss all discovery for the channel that owns this video."""
from ..models import Video
video = db.query(Video).filter_by(youtube_video_id=youtube_video_id).first()
if not video or not video.channel_id:
raise HTTPException(status_code=404, detail="Video not found")
channel_id = video.channel_id
uc = db.query(UserChannel).filter_by(user_id=current_user.id, channel_id=channel_id).first()
if uc:
uc.status = "dismissed"
else:
db.add(UserChannel(user_id=current_user.id, channel_id=channel_id, status="dismissed"))
dq = db.query(DiscoveryQueue).filter_by(user_id=current_user.id, channel_id=channel_id).first()
if dq:
dq.seen = True
db.commit()
@router.get("/community", response_model=list[dict])
def community_shelf(
db: Session = Depends(get_db),
current_user: User = Depends(get_current_user),
):
"""Channels downloaded by other users, weighted by count."""
rows = db.execute(
text("""
SELECT c.id, c.youtube_channel_id, c.name, c.thumbnail_url,
COUNT(DISTINCT d.user_id) AS downloader_count,
v.title AS latest_title, v.thumbnail_url AS latest_thumbnail
FROM downloads d
JOIN videos v ON d.video_id = v.id
JOIN channels c ON v.channel_id = c.id
WHERE d.user_id != :user_id
AND d.status = 'complete'
AND v.id NOT IN (
SELECT uv.video_id FROM user_videos uv
WHERE uv.user_id = :user_id AND (uv.watched = 1 OR uv.downloaded = 1)
)
GROUP BY c.id
ORDER BY downloader_count DESC
LIMIT 20
"""),
{"user_id": current_user.id},
).mappings().all()
return [dict(r) for r in rows]

View File

@@ -0,0 +1,362 @@
import os
from datetime import datetime, timedelta
from typing import Optional
from fastapi import APIRouter, Depends, HTTPException, BackgroundTasks
from pydantic import BaseModel
from sqlalchemy.orm import Session
from sqlalchemy import text
from ..auth_utils import get_current_user
from ..config import settings
from ..database import get_db, SessionLocal
from ..models import Channel, Download, User, UserSettings, UserVideo, Video
from ..services import ytdlp
router = APIRouter()
class DownloadRequest(BaseModel):
youtube_video_id: str
quality: Optional[str] = None
TRASH_TTL_DAYS = 7
class DownloadOut(BaseModel):
id: int
status: str
progress_percent: float
video_title: Optional[str]
video_thumbnail_url: Optional[str]
youtube_video_id: Optional[str]
file_url: Optional[str]
resolution: Optional[str]
created_at: datetime
completed_at: Optional[datetime]
error_message: Optional[str]
pending_delete_at: Optional[datetime] = None
model_config = {"from_attributes": True}
def _on_progress(download_id: int, pct: float):
db = SessionLocal()
try:
dl = db.query(Download).filter(Download.id == download_id).first()
if dl:
dl.progress_percent = pct
dl.status = "downloading"
db.commit()
finally:
db.close()
def _on_complete(download_id: int, file_path: Optional[str], resolution: Optional[str] = None):
db = SessionLocal()
try:
dl = db.query(Download).filter(Download.id == download_id).first()
if dl:
dl.status = "complete"
dl.progress_percent = 100.0
dl.completed_at = datetime.utcnow()
dl.file_path = file_path
dl.resolution = resolution
db.commit()
uv = db.query(UserVideo).filter_by(user_id=dl.user_id, video_id=dl.video_id).first()
if not uv:
uv = UserVideo(user_id=dl.user_id, video_id=dl.video_id)
db.add(uv)
uv.downloaded = True
uv.downloaded_at = datetime.utcnow()
db.commit()
finally:
db.close()
def _on_error(download_id: int, message: str):
db = SessionLocal()
try:
dl = db.query(Download).filter(Download.id == download_id).first()
if dl:
dl.status = "failed"
dl.error_message = message
db.commit()
finally:
db.close()
def _ensure_video(db: Session, youtube_video_id: str) -> Video:
video = db.query(Video).filter_by(youtube_video_id=youtube_video_id).first()
if video:
return video
meta = ytdlp.fetch_video_metadata(youtube_video_id)
if not meta:
raise HTTPException(status_code=404, detail="Video not found on YouTube")
ch_data = meta.pop("channel", {}) or {}
yt_channel_id = ch_data.get("youtube_channel_id")
channel = None
if yt_channel_id:
channel = db.query(Channel).filter_by(youtube_channel_id=yt_channel_id).first()
if not channel:
channel = Channel(**{k: v for k, v in ch_data.items() if hasattr(Channel, k)})
db.add(channel)
db.flush()
video = Video(
channel_id=channel.id if channel else None,
**{k: v for k, v in meta.items() if hasattr(Video, k)},
)
db.add(video)
db.commit()
db.refresh(video)
return video
@router.post("", response_model=DownloadOut, status_code=201)
def create_download(
body: DownloadRequest,
background_tasks: BackgroundTasks,
db: Session = Depends(get_db),
current_user: User = Depends(get_current_user),
):
video = _ensure_video(db, body.youtube_video_id)
user_settings = db.query(UserSettings).filter_by(user_id=current_user.id).first()
default_quality = user_settings.preferred_quality if user_settings else "best"
quality = body.quality if body.quality in ytdlp.QUALITY_FORMATS else default_quality
_DL_SELECT = """
SELECT d.id, d.status, d.progress_percent, d.resolution,
d.created_at, d.completed_at, d.error_message, d.pending_delete_at,
v.title AS video_title, v.thumbnail_url AS video_thumbnail_url,
v.youtube_video_id,
'/files/' || v.youtube_video_id || '.mp4' AS file_url
FROM downloads d JOIN videos v ON d.video_id = v.id
WHERE d.id = :id
"""
existing = db.query(Download).filter_by(
user_id=current_user.id,
video_id=video.id,
).filter(Download.status.in_(["pending", "downloading", "complete"])).first()
if existing:
row = db.execute(text(_DL_SELECT), {"id": existing.id}).mappings().first()
return DownloadOut(**dict(row))
dl = Download(user_id=current_user.id, video_id=video.id, status="pending")
db.add(dl)
db.commit()
db.refresh(dl)
background_tasks.add_task(
ytdlp.start_download,
video.youtube_video_id, dl.id,
_on_progress, _on_complete, _on_error,
quality,
)
row = db.execute(text(_DL_SELECT), {"id": dl.id}).mappings().first()
return DownloadOut(**dict(row))
@router.get("", response_model=list[DownloadOut])
def list_downloads(
db: Session = Depends(get_db),
current_user: User = Depends(get_current_user),
):
_purge_expired_trash(db)
rows = db.execute(
text("""
SELECT d.id, d.status, d.progress_percent, d.created_at, d.completed_at,
d.error_message, d.pending_delete_at, d.resolution,
v.title AS video_title, v.thumbnail_url AS video_thumbnail_url,
v.youtube_video_id,
'/files/' || v.youtube_video_id || '.mp4' AS file_url
FROM downloads d JOIN videos v ON d.video_id = v.id
WHERE d.user_id = :user_id
ORDER BY d.created_at DESC
LIMIT 200
"""),
{"user_id": current_user.id},
).mappings().all()
return [DownloadOut(**dict(r)) for r in rows]
def _get_quality(db, user_id: int) -> str:
s = db.query(UserSettings).filter_by(user_id=user_id).first()
return s.preferred_quality if s else "best"
@router.post("/channel/{channel_id}", status_code=202)
def download_channel_videos(
channel_id: int,
background_tasks: BackgroundTasks,
db: Session = Depends(get_db),
current_user: User = Depends(get_current_user),
):
quality = _get_quality(db, current_user.id)
rows = db.execute(
text("""
SELECT v.id, v.youtube_video_id
FROM videos v
LEFT JOIN downloads d ON v.id = d.video_id AND d.user_id = :uid
AND d.status IN ('pending', 'downloading', 'complete')
WHERE v.channel_id = :cid AND d.id IS NULL
"""),
{"uid": current_user.id, "cid": channel_id},
).mappings().all()
count = 0
for row in rows:
dl = Download(user_id=current_user.id, video_id=row["id"], status="pending")
db.add(dl)
db.flush()
background_tasks.add_task(
ytdlp.start_download, row["youtube_video_id"], dl.id,
_on_progress, _on_complete, _on_error, quality,
)
count += 1
db.commit()
return {"queued": count}
@router.post("/following", status_code=202)
def download_following_videos(
background_tasks: BackgroundTasks,
db: Session = Depends(get_db),
current_user: User = Depends(get_current_user),
):
quality = _get_quality(db, current_user.id)
rows = db.execute(
text("""
SELECT v.id, v.youtube_video_id
FROM videos v
JOIN channels c ON v.channel_id = c.id
JOIN user_channels uc ON c.id = uc.channel_id
AND uc.user_id = :uid AND uc.status = 'followed'
LEFT JOIN downloads d ON v.id = d.video_id AND d.user_id = :uid
AND d.status IN ('pending', 'downloading', 'complete')
WHERE d.id IS NULL
"""),
{"uid": current_user.id},
).mappings().all()
count = 0
for row in rows:
dl = Download(user_id=current_user.id, video_id=row["id"], status="pending")
db.add(dl)
db.flush()
background_tasks.add_task(
ytdlp.start_download, row["youtube_video_id"], dl.id,
_on_progress, _on_complete, _on_error, quality,
)
count += 1
db.commit()
return {"queued": count}
def _purge_expired_trash(db: Session):
expired = db.execute(
text("SELECT id, video_id, user_id FROM downloads WHERE pending_delete_at IS NOT NULL AND pending_delete_at <= :now"),
{"now": datetime.utcnow()},
).mappings().all()
for row in expired:
video = db.query(Video).filter_by(id=row["video_id"]).first()
if video:
fp = ytdlp.predicted_file_path(video.youtube_video_id)
if fp.exists():
try:
os.remove(fp)
except OSError:
pass
uv = db.query(UserVideo).filter_by(user_id=row["user_id"], video_id=row["video_id"]).first()
if uv:
uv.downloaded = False
uv.downloaded_at = None
db.execute(text("DELETE FROM downloads WHERE id = :id"), {"id": row["id"]})
if expired:
db.commit()
def _delete_download_record(db: Session, dl: "Download", user_id: int):
video = db.query(Video).filter_by(id=dl.video_id).first()
if video:
fp = ytdlp.predicted_file_path(video.youtube_video_id)
if fp.exists():
try:
os.remove(fp)
except OSError:
pass
uv = db.query(UserVideo).filter_by(user_id=user_id, video_id=dl.video_id).first()
if uv:
uv.downloaded = False
uv.downloaded_at = None
db.delete(dl)
@router.delete("/all", status_code=204)
def delete_all_downloads(
db: Session = Depends(get_db),
current_user: User = Depends(get_current_user),
):
dls = db.query(Download).filter(
Download.user_id == current_user.id,
Download.status.notin_(["pending", "downloading"]),
).all()
for dl in dls:
_delete_download_record(db, dl, current_user.id)
db.commit()
@router.post("/{download_id}/restore", status_code=200)
def restore_download(
download_id: int,
db: Session = Depends(get_db),
current_user: User = Depends(get_current_user),
):
dl = db.query(Download).filter_by(id=download_id, user_id=current_user.id).first()
if not dl:
raise HTTPException(status_code=404, detail="Download not found")
dl.pending_delete_at = None
db.commit()
return {"ok": True}
@router.delete("/{download_id}", status_code=204)
def delete_download(
download_id: int,
db: Session = Depends(get_db),
current_user: User = Depends(get_current_user),
):
"""Delete a download record and its file from disk. Resets downloaded flag on the video."""
dl = db.query(Download).filter_by(id=download_id, user_id=current_user.id).first()
if not dl:
raise HTTPException(status_code=404, detail="Download not found")
_delete_download_record(db, dl, current_user.id)
db.commit()
@router.get("/{download_id}", response_model=DownloadOut)
def get_download(
download_id: int,
db: Session = Depends(get_db),
current_user: User = Depends(get_current_user),
):
row = db.execute(
text("""
SELECT d.id, d.status, d.progress_percent, d.resolution,
d.created_at, d.completed_at, d.error_message, d.pending_delete_at,
v.title AS video_title, v.thumbnail_url AS video_thumbnail_url,
v.youtube_video_id,
'/files/' || v.youtube_video_id || '.mp4' AS file_url
FROM downloads d JOIN videos v ON d.video_id = v.id
WHERE d.id = :id AND d.user_id = :user_id
"""),
{"id": download_id, "user_id": current_user.id},
).mappings().first()
if not row:
raise HTTPException(status_code=404, detail="Download not found")
return DownloadOut(**dict(row))

81
backend/routers/export.py Normal file
View File

@@ -0,0 +1,81 @@
from datetime import datetime
from fastapi import APIRouter, Depends
from fastapi.responses import JSONResponse
from sqlalchemy.orm import Session
from sqlalchemy import text
from ..auth_utils import get_current_user
from ..database import get_db
from ..models import User
router = APIRouter()
@router.get("")
def export_data(
db: Session = Depends(get_db),
current_user: User = Depends(get_current_user),
):
uid = current_user.id
watch_history = db.execute(text("""
SELECT v.youtube_video_id, v.title, c.name AS channel_name,
uv.watch_progress_seconds, uv.last_watched_at
FROM user_videos uv
JOIN videos v ON v.id = uv.video_id
LEFT JOIN channels c ON c.id = v.channel_id
WHERE uv.user_id = :uid AND uv.watched = 1
ORDER BY uv.last_watched_at DESC
"""), {"uid": uid}).mappings().all()
ratings = db.execute(text("""
SELECT v.youtube_video_id, v.title, c.name AS channel_name, uv.rating
FROM user_videos uv
JOIN videos v ON v.id = uv.video_id
LEFT JOIN channels c ON c.id = v.channel_id
WHERE uv.user_id = :uid AND uv.rating IS NOT NULL
ORDER BY v.title
"""), {"uid": uid}).mappings().all()
liked = db.execute(text("""
SELECT v.youtube_video_id, v.title, c.name AS channel_name, uv.liked_at
FROM user_videos uv
JOIN videos v ON v.id = uv.video_id
LEFT JOIN channels c ON c.id = v.channel_id
WHERE uv.user_id = :uid AND uv.liked = 1
ORDER BY uv.liked_at DESC
"""), {"uid": uid}).mappings().all()
bookmarks = db.execute(text("""
SELECT v.youtube_video_id, v.title, c.name AS channel_name,
vb.timestamp_seconds, vb.note, vb.created_at
FROM video_bookmarks vb
JOIN videos v ON v.id = vb.video_id
LEFT JOIN channels c ON c.id = v.channel_id
WHERE vb.user_id = :uid
ORDER BY vb.created_at DESC
"""), {"uid": uid}).mappings().all()
queue = db.execute(text("""
SELECT v.youtube_video_id, v.title, c.name AS channel_name
FROM user_videos uv
JOIN videos v ON v.id = uv.video_id
LEFT JOIN channels c ON c.id = v.channel_id
WHERE uv.user_id = :uid AND uv.queued = 1
ORDER BY v.title
"""), {"uid": uid}).mappings().all()
payload = {
"exported_at": datetime.utcnow().isoformat(),
"username": current_user.username,
"watch_history": [dict(r) for r in watch_history],
"ratings": [dict(r) for r in ratings],
"liked": [dict(r) for r in liked],
"bookmarks": [dict(r) for r in bookmarks],
"queue": [dict(r) for r in queue],
}
return JSONResponse(
content=payload,
headers={"Content-Disposition": f"attachment; filename=ythub-export-{datetime.utcnow().strftime('%Y%m%d')}.json"},
)

332
backend/routers/search.py Normal file
View File

@@ -0,0 +1,332 @@
"""Two-tier search: local FTS5 first, yt-dlp live fallback."""
import json
import re as _re
from datetime import datetime
from typing import Optional
from fastapi import APIRouter, Depends, Query
from pydantic import BaseModel
from sqlalchemy.orm import Session
from sqlalchemy import text
from ..auth_utils import get_current_user
from ..database import get_db
from ..models import User, Video, Channel, UserVideo, SearchHistory, UserTagAffinity
from ..services import ytdlp
router = APIRouter()
_STOPWORDS = {
"the","a","an","is","it","in","on","at","to","of","and","or","for",
"with","this","that","are","was","be","by","as","from","has","have",
"will","can","but","not","my","i","you","your","we","how","what",
"why","when","which","who","more","about","than","do","did","does",
}
def _query_affinity_tags(q: str) -> list[str]:
words = _re.sub(r"[^\w\s]", "", q.lower()).split()
return [w for w in words if len(w) >= 3 and w not in _STOPWORDS]
def _log_search(db: Session, user_id: int, q: str):
"""Persist search query and bump affinity scores for its meaningful terms."""
db.add(SearchHistory(user_id=user_id, query=q.strip()))
for tag in _query_affinity_tags(q):
existing = db.query(UserTagAffinity).filter_by(user_id=user_id, tag=tag).first()
if existing:
existing.score = min(existing.score + 0.3, 50.0)
existing.updated_at = datetime.utcnow()
else:
db.add(UserTagAffinity(user_id=user_id, tag=tag, score=0.3,
updated_at=datetime.utcnow()))
try:
db.commit()
except Exception:
db.rollback()
class VideoResult(BaseModel):
youtube_video_id: str
title: str
thumbnail_url: Optional[str]
duration_seconds: Optional[int]
channel_name: str
channel_youtube_id: Optional[str]
published_at: Optional[datetime]
is_local: bool
is_downloaded: bool
is_watched: bool
local_video_id: Optional[int]
model_config = {"from_attributes": True}
class ChannelResult(BaseModel):
youtube_channel_id: str
name: str
thumbnail_url: Optional[str]
description: Optional[str]
is_followed: bool
local_channel_id: Optional[int]
subscriber_count: Optional[int] = None
video_count: Optional[int] = None
model_config = {"from_attributes": True}
class SearchResponse(BaseModel):
videos: list[VideoResult]
channels: list[ChannelResult]
source: str # "local" | "live" | "mixed"
query: str
def _sanitize_fts(q: str) -> str:
"""Strip FTS5 syntax characters and return a safe multi-word query."""
clean = _re.sub(r'["\(\)\[\]\{\}\*\+\?\!\^\~\-]', ' ', q)
words = [w for w in clean.split() if w.upper() not in ("AND", "OR", "NOT")]
if not words:
return '""'
return " ".join(words)
def _local_video_search(db: Session, user_id: int, q: str, limit: int = 100) -> list[dict]:
try:
rows = db.execute(
text("""
SELECT
v.id, v.youtube_video_id, v.title, v.thumbnail_url,
v.duration_seconds, v.published_at,
c.name AS channel_name, c.youtube_channel_id,
COALESCE(uv.downloaded, 0) AS is_downloaded,
COALESCE(uv.watched, 0) AS is_watched
FROM videos_fts fts
JOIN videos v ON fts.rowid = v.id
LEFT JOIN channels c ON v.channel_id = c.id
LEFT JOIN user_videos uv ON v.id = uv.video_id AND uv.user_id = :user_id
WHERE videos_fts MATCH :query
ORDER BY rank
LIMIT :limit
"""),
{"user_id": user_id, "query": _sanitize_fts(q), "limit": limit},
).mappings().all()
except Exception:
return []
return [dict(r) for r in rows]
def _local_channel_search(db: Session, user_id: int, q: str, limit: int = 5) -> list[dict]:
try:
rows = db.execute(
text("""
SELECT
c.id, c.youtube_channel_id, c.name, c.thumbnail_url, c.description,
c.subscriber_count,
CASE WHEN uc.status = 'followed' THEN 1 ELSE 0 END AS is_followed,
(SELECT COUNT(*) FROM videos WHERE channel_id = c.id) AS video_count
FROM channels_fts fts
JOIN channels c ON fts.rowid = c.id
LEFT JOIN user_channels uc ON c.id = uc.channel_id AND uc.user_id = :user_id
WHERE channels_fts MATCH :query
ORDER BY rank
LIMIT :limit
"""),
{"user_id": user_id, "query": _sanitize_fts(q), "limit": limit},
).mappings().all()
except Exception:
return []
return [dict(r) for r in rows]
def _upsert_channel_from_meta(db: Session, ch: dict) -> Channel:
existing = db.query(Channel).filter_by(youtube_channel_id=ch["youtube_channel_id"]).first()
if not existing:
existing = Channel(**{k: v for k, v in ch.items() if hasattr(Channel, k)})
db.add(existing)
db.flush()
return existing
def _live_search_to_results(
db: Session, user_id: int, raw: list[dict]
) -> list[VideoResult]:
results = []
for item in raw:
yt_id = item.get("youtube_video_id")
if not yt_id:
continue
local = db.query(Video).filter_by(youtube_video_id=yt_id).first()
uv = None
if local:
uv = db.query(UserVideo).filter_by(user_id=user_id, video_id=local.id).first()
ch = item.get("channel", {}) or {}
# Prefer the DB date — flat-playlist search results rarely include upload_date
published_at = (local.published_at if local and local.published_at
else item.get("published_at"))
results.append(VideoResult(
youtube_video_id=yt_id,
title=item["title"],
thumbnail_url=item.get("thumbnail_url"),
duration_seconds=item.get("duration_seconds"),
channel_name=ch.get("name", ""),
channel_youtube_id=ch.get("youtube_channel_id"),
published_at=published_at,
is_local=local is not None,
is_downloaded=bool(uv and uv.downloaded),
is_watched=bool(uv and uv.watched),
local_video_id=local.id if local else None,
))
return results
@router.get("", response_model=SearchResponse)
def search(
q: str = Query(..., min_length=1),
live: bool = Query(False),
db: Session = Depends(get_db),
current_user: User = Depends(get_current_user),
):
_log_search(db, current_user.id, q)
# Always run local search first
local_videos = _local_video_search(db, current_user.id, q)
local_channels = _local_channel_search(db, current_user.id, q)
video_results = [
VideoResult(
youtube_video_id=r["youtube_video_id"],
title=r["title"],
thumbnail_url=r["thumbnail_url"],
duration_seconds=r["duration_seconds"],
channel_name=r["channel_name"] or "",
channel_youtube_id=r["youtube_channel_id"],
published_at=r["published_at"],
is_local=True,
is_downloaded=bool(r["is_downloaded"]),
is_watched=bool(r["is_watched"]),
local_video_id=r["id"],
)
for r in local_videos
]
channel_results = [
ChannelResult(
youtube_channel_id=r["youtube_channel_id"],
name=r["name"],
thumbnail_url=r["thumbnail_url"],
description=r["description"],
is_followed=bool(r["is_followed"]),
local_channel_id=r["id"],
subscriber_count=r.get("subscriber_count"),
video_count=r.get("video_count"),
)
for r in local_channels
]
# Synthesize channel cards from video results for channels not yet in the list
found_ch_ids = {c.youtube_channel_id for c in channel_results}
def _channel_card_from_db(yt_ch_id: str) -> Optional[ChannelResult]:
row = db.execute(
text("""
SELECT c.id, c.youtube_channel_id, c.name, c.thumbnail_url, c.description,
c.subscriber_count,
CASE WHEN uc.status = 'followed' THEN 1 ELSE 0 END AS is_followed,
(SELECT COUNT(*) FROM videos WHERE channel_id = c.id) AS video_count
FROM channels c
LEFT JOIN user_channels uc ON c.id = uc.channel_id AND uc.user_id = :user_id
WHERE c.youtube_channel_id = :yt_ch_id
"""),
{"user_id": current_user.id, "yt_ch_id": yt_ch_id},
).mappings().first()
if not row:
return None
return ChannelResult(
youtube_channel_id=row["youtube_channel_id"],
name=row["name"],
thumbnail_url=row["thumbnail_url"],
description=row["description"],
is_followed=bool(row["is_followed"]),
local_channel_id=row["id"],
subscriber_count=row.get("subscriber_count"),
video_count=row.get("video_count"),
)
if video_results:
for v in video_results:
if not v.channel_youtube_id or v.channel_youtube_id in found_ch_ids:
continue
found_ch_ids.add(v.channel_youtube_id)
card = _channel_card_from_db(v.channel_youtube_id)
if card:
channel_results.append(card)
source = "local" if (video_results or channel_results) else "none"
# Fall back to live yt-dlp search if no local results or explicitly requested
if not video_results or live:
try:
live_raw = ytdlp.search_youtube(q)
live_results = _live_search_to_results(db, current_user.id, live_raw)
except Exception:
live_results = []
live_raw = []
if live_results:
# Merge: deduplicate by youtube_video_id, local results take priority
local_ids = {v.youtube_video_id for v in video_results}
for r in live_results:
if r.youtube_video_id not in local_ids:
video_results.append(r)
source = "live" if source == "none" else "mixed"
# Synthesize channel cards from YouTube results for channels not in local DB
ch_by_yt_id: dict[str, dict] = {}
for item in live_raw:
ch = item.get("channel") or {}
yt_ch_id = ch.get("youtube_channel_id")
if yt_ch_id and yt_ch_id not in found_ch_ids and yt_ch_id not in ch_by_yt_id:
ch_by_yt_id[yt_ch_id] = ch
for yt_ch_id, ch in ch_by_yt_id.items():
card = _channel_card_from_db(yt_ch_id)
if card:
channel_results.append(card)
found_ch_ids.add(yt_ch_id)
else:
name = (ch.get("name") or "").strip()
if name:
channel_results.append(ChannelResult(
youtube_channel_id=yt_ch_id,
name=name,
thumbnail_url=None,
description=None,
is_followed=False,
local_channel_id=None,
))
found_ch_ids.add(yt_ch_id)
return SearchResponse(
videos=video_results,
channels=channel_results,
source=source,
query=q,
)
@router.get("/history")
def search_history(
db: Session = Depends(get_db),
current_user: User = Depends(get_current_user),
):
"""Return the last 8 unique queries for the current user."""
rows = db.execute(
text("""
SELECT query FROM search_history
WHERE user_id = :uid
GROUP BY query
ORDER BY MAX(searched_at) DESC
LIMIT 8
"""),
{"uid": current_user.id},
).scalars().all()
return {"queries": list(rows)}

117
backend/routers/settings.py Normal file
View File

@@ -0,0 +1,117 @@
from typing import Optional
from fastapi import APIRouter, Depends
from pydantic import BaseModel, Field
from sqlalchemy.orm import Session
from ..auth_utils import get_current_user
from ..database import get_db
from ..models import User, UserSettings
from ..services import ytdlp
router = APIRouter()
VALID_BROWSERS = {"", "chrome", "chromium", "firefox", "brave", "edge", "opera", "safari"}
VALID_REGIONS = {"US", "SE", "GB", "DE", "JP", "FR", "CA", "AU", "BR", "IN", "KR", "MX"}
class SettingsOut(BaseModel):
preferred_quality: str
max_concurrent_downloads: int
hide_watched_from_feed: bool
mark_watched_at_percent: int
auto_download_on_sync: bool
cookies_browser: str = ""
theater_mode: bool = False
discovery_regions: str = "US,SE"
calm_mode: bool = False
hide_subscriber_counts: bool = False
autoplay_enabled: bool = False
feed_weight_recency: float = 5.0
feed_weight_affinity: float = 5.0
feed_weight_channel: float = 5.0
model_config = {"from_attributes": True}
class SettingsPatch(BaseModel):
preferred_quality: Optional[str] = None
max_concurrent_downloads: Optional[int] = Field(None, ge=1, le=5)
hide_watched_from_feed: Optional[bool] = None
mark_watched_at_percent: Optional[int] = Field(None, ge=50, le=100)
auto_download_on_sync: Optional[bool] = None
cookies_browser: Optional[str] = None
theater_mode: Optional[bool] = None
discovery_regions: Optional[str] = None
calm_mode: Optional[bool] = None
hide_subscriber_counts: Optional[bool] = None
autoplay_enabled: Optional[bool] = None
feed_weight_recency: Optional[float] = Field(None, ge=0.0, le=10.0)
feed_weight_affinity: Optional[float] = Field(None, ge=0.0, le=10.0)
feed_weight_channel: Optional[float] = Field(None, ge=0.0, le=10.0)
def _get_or_create(db: Session, user_id: int) -> UserSettings:
s = db.query(UserSettings).filter_by(user_id=user_id).first()
if not s:
s = UserSettings(user_id=user_id)
db.add(s)
db.commit()
db.refresh(s)
return s
@router.get("", response_model=SettingsOut)
def get_settings(
db: Session = Depends(get_db),
current_user: User = Depends(get_current_user),
):
return _get_or_create(db, current_user.id)
@router.patch("", response_model=SettingsOut)
def update_settings(
body: SettingsPatch,
db: Session = Depends(get_db),
current_user: User = Depends(get_current_user),
):
s = _get_or_create(db, current_user.id)
if body.preferred_quality is not None and body.preferred_quality in ytdlp.QUALITY_FORMATS:
s.preferred_quality = body.preferred_quality
if body.max_concurrent_downloads is not None:
s.max_concurrent_downloads = body.max_concurrent_downloads
ytdlp.set_max_concurrent(body.max_concurrent_downloads)
if body.hide_watched_from_feed is not None:
s.hide_watched_from_feed = body.hide_watched_from_feed
if body.mark_watched_at_percent is not None:
s.mark_watched_at_percent = body.mark_watched_at_percent
if body.auto_download_on_sync is not None:
s.auto_download_on_sync = body.auto_download_on_sync
if body.cookies_browser is not None and body.cookies_browser in VALID_BROWSERS:
s.cookies_browser = body.cookies_browser
ytdlp.set_cookies_browser(body.cookies_browser)
if body.theater_mode is not None:
s.theater_mode = body.theater_mode
if body.discovery_regions is not None:
# Validate: comma-separated list of known region codes
codes = [r.strip().upper() for r in body.discovery_regions.split(",") if r.strip()]
valid = [c for c in codes if c in VALID_REGIONS]
if valid:
s.discovery_regions = ",".join(valid)
if body.calm_mode is not None:
s.calm_mode = body.calm_mode
if body.hide_subscriber_counts is not None:
s.hide_subscriber_counts = body.hide_subscriber_counts
if body.autoplay_enabled is not None:
s.autoplay_enabled = body.autoplay_enabled
if body.feed_weight_recency is not None:
s.feed_weight_recency = body.feed_weight_recency
if body.feed_weight_affinity is not None:
s.feed_weight_affinity = body.feed_weight_affinity
if body.feed_weight_channel is not None:
s.feed_weight_channel = body.feed_weight_channel
db.commit()
db.refresh(s)
return s

144
backend/routers/stats.py Normal file
View File

@@ -0,0 +1,144 @@
from fastapi import APIRouter, Depends
from sqlalchemy.orm import Session
from sqlalchemy import text
from ..auth_utils import get_current_user
from ..database import get_db
from ..models import User
router = APIRouter()
@router.get("")
def get_stats(
db: Session = Depends(get_db),
current_user: User = Depends(get_current_user),
):
uid = current_user.id
totals = db.execute(
text("""
SELECT
COUNT(*) AS total_watched,
SUM(uv.watch_progress_seconds) AS total_watch_seconds
FROM user_videos uv
WHERE uv.user_id = :uid AND uv.watched = 1
"""),
{"uid": uid},
).mappings().first()
top_channels = db.execute(
text("""
SELECT c.id, c.name,
COUNT(*) AS watch_count,
SUM(uv.watch_progress_seconds) AS watch_seconds
FROM user_videos uv
JOIN videos v ON uv.video_id = v.id
JOIN channels c ON v.channel_id = c.id
WHERE uv.user_id = :uid AND uv.watched = 1
GROUP BY c.id, c.name
ORDER BY watch_seconds DESC
LIMIT 10
"""),
{"uid": uid},
).mappings().all()
daily = db.execute(
text("""
SELECT date(uv.last_watched_at) AS date,
COUNT(*) AS count,
SUM(uv.watch_progress_seconds) AS seconds
FROM user_videos uv
WHERE uv.user_id = :uid
AND uv.watched = 1
AND uv.last_watched_at >= datetime('now', '-30 days')
GROUP BY date(uv.last_watched_at)
ORDER BY date ASC
"""),
{"uid": uid},
).mappings().all()
this_week = db.execute(
text("""
SELECT COUNT(*) AS count, SUM(uv.watch_progress_seconds) AS seconds
FROM user_videos uv
WHERE uv.user_id = :uid AND uv.watched = 1
AND uv.last_watched_at >= datetime('now', '-7 days')
"""),
{"uid": uid},
).mappings().first()
this_month = db.execute(
text("""
SELECT COUNT(*) AS count, SUM(uv.watch_progress_seconds) AS seconds
FROM user_videos uv
WHERE uv.user_id = :uid AND uv.watched = 1
AND uv.last_watched_at >= datetime('now', '-30 days')
"""),
{"uid": uid},
).mappings().first()
avg_completion = db.execute(
text("""
SELECT AVG(uv.completion_percent) AS avg_pct,
COUNT(CASE WHEN uv.completion_percent >= 90 THEN 1 END) AS finished_count,
COUNT(CASE WHEN uv.completion_percent < 20 AND uv.completion_percent IS NOT NULL THEN 1 END) AS bailed_count,
SUM(uv.rewatch_count) AS total_rewatches,
COUNT(CASE WHEN uv.rewatch_count > 0 THEN 1 END) AS rewatched_videos
FROM user_videos uv
WHERE uv.user_id = :uid AND uv.watched = 1
"""),
{"uid": uid},
).mappings().first()
top_categories = db.execute(
text("""
SELECT v.category, COUNT(*) AS watch_count,
AVG(uv.completion_percent) AS avg_completion
FROM user_videos uv
JOIN videos v ON uv.video_id = v.id
WHERE uv.user_id = :uid AND uv.watched = 1 AND v.category IS NOT NULL
GROUP BY v.category
ORDER BY watch_count DESC
LIMIT 8
"""),
{"uid": uid},
).mappings().all()
taste_profile = db.execute(
text("""
SELECT tag, score FROM user_tag_affinity
WHERE user_id = :uid AND score > 0
ORDER BY score DESC
LIMIT 20
"""),
{"uid": uid},
).mappings().all()
liked_count = db.execute(
text("SELECT COUNT(*) AS n FROM user_videos WHERE user_id = :uid AND liked = 1"),
{"uid": uid},
).mappings().first()
return {
"total_watched": totals["total_watched"] or 0,
"total_watch_seconds": totals["total_watch_seconds"] or 0,
"top_channels": [dict(r) for r in top_channels],
"daily": [dict(r) for r in daily],
"this_week": {
"count": this_week["count"] or 0,
"seconds": this_week["seconds"] or 0,
},
"this_month": {
"count": this_month["count"] or 0,
"seconds": this_month["seconds"] or 0,
},
"avg_completion_percent": round(avg_completion["avg_pct"] or 0, 1),
"finished_count": avg_completion["finished_count"] or 0,
"bailed_count": avg_completion["bailed_count"] or 0,
"total_rewatches": avg_completion["total_rewatches"] or 0,
"rewatched_videos": avg_completion["rewatched_videos"] or 0,
"total_liked": liked_count["n"] or 0,
"top_categories": [dict(r) for r in top_categories],
"taste_profile": [dict(r) for r in taste_profile],
}

923
backend/routers/videos.py Normal file
View File

@@ -0,0 +1,923 @@
import os
import random
from datetime import datetime
from typing import Optional
from fastapi import APIRouter, BackgroundTasks, Depends, HTTPException
from pydantic import BaseModel
from sqlalchemy.orm import Session
from sqlalchemy import text
from ..auth_utils import get_current_user
from ..config import settings
from ..database import get_db
from ..models import Channel, User, UserSettings, UserTagAffinity, UserVideo, Video
from ..services import ytdlp
from ..services.scoring import get_surprise_videos, get_discovery_injection
router = APIRouter()
def _update_affinity(db: Session, user_id: int, video: Video, delta: float):
"""Adjust tag/category affinity scores for a video. delta > 0 = positive signal."""
import json as _json
tags = []
if video.category:
tags.append(video.category.lower().strip())
if video.tags:
try:
for t in _json.loads(video.tags)[:8]:
if t and t.strip():
tags.append(t.lower().strip())
except Exception:
pass
for tag in set(tags):
existing = db.query(UserTagAffinity).filter_by(user_id=user_id, tag=tag).first()
if existing:
existing.score = max(existing.score + delta, -20.0)
existing.updated_at = datetime.utcnow()
else:
if delta > 0:
db.add(UserTagAffinity(user_id=user_id, tag=tag, score=delta))
class VideoDetail(BaseModel):
id: int
youtube_video_id: str
title: str
description: Optional[str]
thumbnail_url: Optional[str]
duration_seconds: Optional[int]
published_at: Optional[datetime]
channel_id: Optional[int] = None
channel_name: Optional[str]
channel_youtube_id: Optional[str]
tags: Optional[str]
category: Optional[str]
is_downloaded: bool = False
is_watched: bool = False
liked: bool = False
watch_progress_seconds: int = 0
queued: bool = False
rating: Optional[int] = None
channel_followed: bool = False
download_resolution: Optional[str] = None
local_file_url: Optional[str] = None
is_recommended: bool = False
model_config = {"from_attributes": True}
def _local_file_url(file_path: Optional[str]) -> Optional[str]:
if not file_path or not os.path.exists(file_path):
return None
try:
rel = os.path.relpath(file_path, settings.download_path)
return f"/files/{rel}"
except ValueError:
return None
class ProgressUpdate(BaseModel):
watch_progress_seconds: int
watched: Optional[bool] = None
def _get_uv(db: Session, user_id: int, video_id: int) -> UserVideo:
uv = db.query(UserVideo).filter_by(user_id=user_id, video_id=video_id).first()
if not uv:
uv = UserVideo(user_id=user_id, video_id=video_id)
db.add(uv)
db.flush()
return uv
@router.get("/history", response_model=list[VideoDetail])
def watch_history(
limit: int = 25,
offset: int = 0,
channel_id: Optional[int] = None,
db: Session = Depends(get_db),
current_user: User = Depends(get_current_user),
):
where_extra = "AND v.channel_id = :channel_id" if channel_id else ""
params: dict = {"user_id": current_user.id, "limit": limit, "offset": offset}
if channel_id:
params["channel_id"] = channel_id
rows = db.execute(
text(_VIDEO_SELECT + f"""
WHERE uv.user_id = :user_id AND uv.watched = 1
{where_extra}
ORDER BY uv.last_watched_at DESC
LIMIT :limit OFFSET :offset
"""),
params,
).mappings().all()
return [_row_to_detail(r) for r in rows]
@router.get("/home-feed", response_model=list[VideoDetail])
def home_feed(
limit: int = 25,
offset: int = 0,
mode: str = "ranked", # ranked | chronological | random | inbox
duration: str = "", # "" | short | medium | long
db: Session = Depends(get_db),
current_user: User = Depends(get_current_user),
):
duration_clause = {
"short": "AND v.duration_seconds <= 600",
"medium": "AND v.duration_seconds > 600 AND v.duration_seconds <= 1800",
"long": "AND v.duration_seconds > 1800",
}.get(duration, "")
user_settings = db.query(UserSettings).filter_by(user_id=current_user.id).first()
hide_watched = user_settings.hide_watched_from_feed if user_settings else False
w_recency = (user_settings.feed_weight_recency if user_settings and user_settings.feed_weight_recency is not None else 5.0) / 5.0
w_affinity = (user_settings.feed_weight_affinity if user_settings and user_settings.feed_weight_affinity is not None else 5.0) / 5.0
w_channel = (user_settings.feed_weight_channel if user_settings and user_settings.feed_weight_channel is not None else 5.0) / 5.0
if mode == "chronological":
rows = db.execute(
text(f"""
SELECT v.id, v.youtube_video_id, v.title, v.description, v.thumbnail_url,
v.duration_seconds, v.published_at, v.tags, v.category,
c.id AS channel_id, c.name AS channel_name,
c.youtube_channel_id AS channel_youtube_id,
COALESCE(uv.watched, 0) AS watched,
COALESCE(uv.watch_progress_seconds, 0) AS watch_progress_seconds,
COALESCE(uv.downloaded, 0) AS is_downloaded,
COALESCE(uv.queued, 0) AS queued,
NULL AS file_path
FROM videos v
JOIN channels c ON v.channel_id = c.id
JOIN user_channels uc
ON c.id = uc.channel_id AND uc.user_id = :user_id AND uc.status = 'followed'
LEFT JOIN user_videos uv ON v.id = uv.video_id AND uv.user_id = :user_id
WHERE (:hide_watched = 0 OR COALESCE(uv.watched, 0) = 0)
AND (uc.muted_until IS NULL OR datetime(uc.muted_until) < datetime('now'))
{duration_clause}
ORDER BY v.published_at DESC NULLS LAST
LIMIT :limit OFFSET :offset
"""),
{"user_id": current_user.id, "limit": limit, "offset": offset, "hide_watched": 1 if hide_watched else 0},
).mappings().all()
return [
VideoDetail(**{k: v for k, v in dict(r).items() if k not in ("watched",)},
is_watched=bool(r["watched"]))
for r in rows
]
if mode == "random":
# Random videos from the discovery pool — unweighted, no score ordering
rows = db.execute(
text(f"""
SELECT v.id, v.youtube_video_id, v.title, v.description, v.thumbnail_url,
v.duration_seconds, v.published_at, v.tags, v.category,
c.id AS channel_id, c.name AS channel_name,
c.youtube_channel_id AS channel_youtube_id,
COALESCE(uv.watched, 0) AS watched,
COALESCE(uv.watch_progress_seconds, 0) AS watch_progress_seconds,
COALESCE(uv.downloaded, 0) AS is_downloaded,
COALESCE(uv.queued, 0) AS queued,
NULL AS file_path
FROM videos v
JOIN channels c ON v.channel_id = c.id
JOIN discovery_queue dq ON c.id = dq.channel_id
LEFT JOIN user_videos uv ON v.id = uv.video_id AND uv.user_id = :user_id
WHERE dq.user_id = :user_id AND dq.seen = 0
AND v.published_at IS NOT NULL
AND dq.channel_id NOT IN (
SELECT channel_id FROM user_channels
WHERE user_id = :user_id AND status IN ('followed', 'dismissed')
)
{duration_clause}
ORDER BY RANDOM()
LIMIT :limit OFFSET :offset
"""),
{"user_id": current_user.id, "limit": limit, "offset": offset},
).mappings().all()
return [
VideoDetail(**{k: v for k, v in dict(r).items() if k not in ("watched",)},
is_watched=bool(r["watched"]), is_recommended=True)
for r in rows
]
if mode == "inbox":
rows = db.execute(
text(f"""
SELECT v.id, v.youtube_video_id, v.title, v.description, v.thumbnail_url,
v.duration_seconds, v.published_at, v.tags, v.category,
c.id AS channel_id, c.name AS channel_name,
c.youtube_channel_id AS channel_youtube_id,
COALESCE(uv.watched, 0) AS watched,
COALESCE(uv.watch_progress_seconds, 0) AS watch_progress_seconds,
COALESCE(uv.downloaded, 0) AS is_downloaded,
COALESCE(uv.queued, 0) AS queued,
NULL AS file_path
FROM videos v
JOIN channels c ON v.channel_id = c.id
JOIN user_channels uc
ON c.id = uc.channel_id AND uc.user_id = :user_id AND uc.status = 'followed'
LEFT JOIN user_videos uv ON v.id = uv.video_id AND uv.user_id = :user_id
WHERE COALESCE(uv.watched, 0) = 0
AND (
(uc.last_seen_at IS NULL AND v.indexed_at >= datetime('now', '-7 days'))
OR
(uc.last_seen_at IS NOT NULL AND v.indexed_at > uc.last_seen_at)
)
{duration_clause}
ORDER BY v.indexed_at DESC
LIMIT :limit OFFSET :offset
"""),
{"user_id": current_user.id, "limit": limit, "offset": offset},
).mappings().all()
return [
VideoDetail(**{k: v for k, v in dict(r).items() if k not in ("watched",)},
is_watched=bool(r["watched"]))
for r in rows
]
# mode == "ranked" (default)
rows = db.execute(
text(f"""
WITH channel_stats AS (
SELECT
v.channel_id,
COUNT(CASE WHEN uv.watched = 1 THEN 1 END) AS watched_count,
COUNT(CASE WHEN uv.liked = 1 THEN 1 END) AS liked_count,
SUM(CASE WHEN uv.rating IS NOT NULL THEN uv.rating ELSE 0 END) AS rating_sum
FROM videos v
LEFT JOIN user_videos uv ON v.id = uv.video_id AND uv.user_id = :user_id
GROUP BY v.channel_id
),
scored AS (
SELECT
v.id, v.youtube_video_id, v.title, v.description, v.thumbnail_url,
v.duration_seconds, v.published_at, v.tags, v.category,
c.id AS channel_id, c.name AS channel_name,
c.youtube_channel_id AS channel_youtube_id,
COALESCE(uv.watched, 0) AS watched,
COALESCE(uv.watch_progress_seconds, 0) AS watch_progress_seconds,
COALESCE(uv.downloaded, 0) AS is_downloaded,
COALESCE(uv.queued, 0) AS queued,
uv.rating AS rating,
NULL AS file_path,
(SQRT(CAST(COALESCE(cs.watched_count, 0) AS REAL)) * 6.0
+ COALESCE(cs.liked_count, 0) * 12.0
+ COALESCE(cs.rating_sum, 0) * 8.0) * :w_channel
+ MAX(COALESCE(julianday(v.published_at) - julianday('now'), -90), -365) * :w_recency
+ COALESCE((
SELECT uta.score FROM user_tag_affinity uta
WHERE uta.user_id = :user_id
AND uta.tag = LOWER(COALESCE(v.category, ''))
LIMIT 1
), 0) * 3.0 * :w_affinity
AS score,
ROW_NUMBER() OVER (
PARTITION BY v.channel_id
ORDER BY v.published_at DESC NULLS LAST, v.id DESC
) AS rn
FROM videos v
JOIN channels c ON v.channel_id = c.id
JOIN user_channels uc
ON c.id = uc.channel_id AND uc.user_id = :user_id AND uc.status = 'followed'
LEFT JOIN user_videos uv ON v.id = uv.video_id AND uv.user_id = :user_id
LEFT JOIN channel_stats cs ON v.channel_id = cs.channel_id
WHERE (:hide_watched = 0 OR COALESCE(uv.watched, 0) = 0)
AND (uc.muted_until IS NULL OR datetime(uc.muted_until) < datetime('now'))
{duration_clause}
)
SELECT * FROM scored
WHERE rn <= 3
ORDER BY score DESC, RANDOM()
LIMIT :limit OFFSET :offset
"""),
{"user_id": current_user.id, "limit": limit, "offset": offset, "hide_watched": 1 if hide_watched else 0,
"w_recency": w_recency, "w_affinity": w_affinity, "w_channel": w_channel},
).mappings().all()
followed = [
VideoDetail(**{k: v for k, v in dict(r).items() if k not in ("watched", "score", "rn")},
is_watched=bool(r["watched"]))
for r in rows
]
# Inject discovery cards on every page: 1 every 5 followed cards.
disc_per_page = max(limit // 5, 1)
disc_offset = (offset // limit) * disc_per_page if limit > 0 else 0
disc_rows = db.execute(
text("""
SELECT v.id, v.youtube_video_id, v.title, v.description, v.thumbnail_url,
v.duration_seconds, v.published_at, v.tags, v.category,
c.id AS channel_id, c.name AS channel_name,
c.youtube_channel_id AS channel_youtube_id
FROM discovery_queue dq
JOIN channels c ON dq.channel_id = c.id
JOIN videos v ON v.channel_id = c.id
WHERE dq.user_id = :user_id AND dq.seen = 0
AND v.published_at IS NOT NULL
AND dq.channel_id NOT IN (
SELECT channel_id FROM user_channels
WHERE user_id = :user_id AND status IN ('followed', 'dismissed')
)
AND v.id = (
SELECT id FROM videos
WHERE channel_id = c.id AND published_at IS NOT NULL
ORDER BY published_at DESC LIMIT 1
)
ORDER BY dq.score DESC
LIMIT :disc_limit OFFSET :disc_offset
"""),
{"user_id": current_user.id, "disc_limit": disc_per_page, "disc_offset": disc_offset},
).mappings().all()
disc = [
VideoDetail(**{k: v for k, v in dict(r).items()},
is_recommended=True, is_watched=False, is_downloaded=False)
for r in disc_rows
]
# Interleave: one discovery card every 5 followed cards
result: list[VideoDetail] = []
disc_iter = iter(disc)
for i, v in enumerate(followed):
if i > 0 and i % 5 == 0:
rec = next(disc_iter, None)
if rec:
result.append(rec)
result.append(v)
result.extend(disc_iter)
return result
@router.get("/continue-watching", response_model=list[VideoDetail])
def continue_watching(
db: Session = Depends(get_db),
current_user: User = Depends(get_current_user),
):
rows = db.execute(
text("""
SELECT v.id, v.youtube_video_id, v.title, v.description, v.thumbnail_url,
v.duration_seconds, v.published_at, v.tags, v.category,
c.name AS channel_name, c.youtube_channel_id AS channel_youtube_id,
uv.watched, uv.watch_progress_seconds, uv.downloaded AS is_downloaded,
uv.queued
FROM user_videos uv
JOIN videos v ON uv.video_id = v.id
LEFT JOIN channels c ON v.channel_id = c.id
WHERE uv.user_id = :user_id
AND uv.watch_progress_seconds > 0
AND (uv.watched IS NULL OR uv.watched = 0)
ORDER BY uv.last_watched_at DESC
LIMIT 20
"""),
{"user_id": current_user.id},
).mappings().all()
return [VideoDetail(**{k: v for k, v in dict(r).items() if k != "watched"},
is_watched=bool(r["watched"])) for r in rows]
@router.get("/long", response_model=list[VideoDetail])
def long_videos(
db: Session = Depends(get_db),
current_user: User = Depends(get_current_user),
):
rows = db.execute(
text("""
SELECT v.id, v.youtube_video_id, v.title, v.description, v.thumbnail_url,
v.duration_seconds, v.published_at, v.tags, v.category,
c.name AS channel_name, c.youtube_channel_id AS channel_youtube_id,
COALESCE(uv.watched, 0) AS watched,
COALESCE(uv.watch_progress_seconds, 0) AS watch_progress_seconds,
COALESCE(uv.downloaded, 0) AS is_downloaded,
COALESCE(uv.queued, 0) AS queued
FROM videos v
LEFT JOIN channels c ON v.channel_id = c.id
LEFT JOIN user_videos uv ON v.id = uv.video_id AND uv.user_id = :user_id
WHERE uv.downloaded = 1
AND v.duration_seconds > 2700
ORDER BY RANDOM()
LIMIT 20
"""),
{"user_id": current_user.id},
).mappings().all()
return [VideoDetail(**dict(r), is_watched=bool(r["watched"])) for r in rows]
@router.get("/surprise", response_model=list[dict])
def surprise_me(
db: Session = Depends(get_db),
current_user: User = Depends(get_current_user),
):
results = get_surprise_videos(db, current_user.id, limit=10)
# 1 in 5 chance: inject a discovery item
if random.random() < 0.2:
injection = get_discovery_injection(db, current_user.id)
if injection and results:
results.insert(random.randint(0, min(4, len(results))), {**injection, "is_discovery": True})
return results
_VIDEO_SELECT = """
SELECT v.id, v.youtube_video_id, v.title, v.description, v.thumbnail_url,
v.duration_seconds, v.published_at, v.tags, v.category,
c.id AS channel_id, c.name AS channel_name, c.youtube_channel_id AS channel_youtube_id,
COALESCE(uv.watched, 0) AS watched,
COALESCE(uv.watch_progress_seconds, 0) AS watch_progress_seconds,
COALESCE(uv.downloaded, 0) AS is_downloaded,
COALESCE(uv.liked, 0) AS liked,
COALESCE(uv.queued, 0) AS queued,
uv.rating AS rating,
CASE WHEN uc.id IS NOT NULL THEN 1 ELSE 0 END AS channel_followed,
d.file_path, d.resolution AS download_resolution
FROM videos v
LEFT JOIN channels c ON v.channel_id = c.id
LEFT JOIN user_videos uv ON v.id = uv.video_id AND uv.user_id = :user_id
LEFT JOIN downloads d ON v.id = d.video_id AND d.user_id = :user_id AND d.status = 'complete'
LEFT JOIN user_channels uc ON c.id = uc.channel_id AND uc.user_id = :user_id AND uc.status = 'followed'
"""
def _row_to_detail(row) -> VideoDetail:
r = dict(row)
return VideoDetail(
**{k: v for k, v in r.items() if k not in ("watched", "file_path", "score")},
is_watched=bool(r["watched"]),
local_file_url=_local_file_url(r.get("file_path")),
)
def _upsert_video_from_yt(db: Session, youtube_video_id: str) -> bool:
"""Fetch fresh metadata from yt-dlp and upsert video + channel. Returns True if successful."""
meta = ytdlp.fetch_video_metadata(youtube_video_id)
if not meta:
return False
ch_data = meta.pop("channel", {}) or {}
yt_channel_id = ch_data.get("youtube_channel_id")
channel = None
if yt_channel_id:
channel = db.query(Channel).filter_by(youtube_channel_id=yt_channel_id).first()
if not channel:
channel = Channel(**{k: v for k, v in ch_data.items() if hasattr(Channel, k)})
db.add(channel)
db.flush()
else:
for k, v in ch_data.items():
if hasattr(channel, k) and v is not None and k != "thumbnail_url":
setattr(channel, k, v)
video = db.query(Video).filter_by(youtube_video_id=youtube_video_id).first()
if not video:
video = Video(
channel_id=channel.id if channel else None,
**{k: v for k, v in meta.items() if hasattr(Video, k)},
)
db.add(video)
else:
for k, v in meta.items():
if not hasattr(video, k) or v is None:
continue
# Don't overwrite already-set description/tags with empty strings from yt-dlp
if k in ("description", "tags") and v == "" and getattr(video, k) is not None:
continue
setattr(video, k, v)
if channel:
video.channel_id = channel.id
db.commit()
return True
class BookmarkOut(BaseModel):
id: int
video_id: int
timestamp_seconds: int
note: Optional[str]
source: str = "manual"
created_at: datetime
model_config = {"from_attributes": True}
class BookmarkCreate(BaseModel):
timestamp_seconds: int
note: Optional[str] = ""
class BookmarkPatch(BaseModel):
note: str
@router.get("/{video_id}/bookmarks", response_model=list[BookmarkOut])
def get_bookmarks(
video_id: int,
db: Session = Depends(get_db),
current_user: User = Depends(get_current_user),
):
from ..models import VideoBookmark
items = db.query(VideoBookmark).filter_by(user_id=current_user.id, video_id=video_id).order_by(VideoBookmark.timestamp_seconds).all()
return [BookmarkOut.model_validate(b) for b in items]
@router.post("/{video_id}/bookmarks", response_model=BookmarkOut, status_code=201)
def create_bookmark(
video_id: int,
body: BookmarkCreate,
db: Session = Depends(get_db),
current_user: User = Depends(get_current_user),
):
from ..models import VideoBookmark
video = db.query(Video).filter(Video.id == video_id).first()
if not video:
raise HTTPException(status_code=404, detail="Video not found")
bm = VideoBookmark(
user_id=current_user.id,
video_id=video_id,
timestamp_seconds=body.timestamp_seconds,
note=body.note or "",
)
db.add(bm)
_update_affinity(db, current_user.id, video, +2.0)
db.commit()
db.refresh(bm)
return BookmarkOut.model_validate(bm)
@router.post("/{video_id}/bookmarks/import-chapters", response_model=list[BookmarkOut], status_code=200)
def import_chapters(
video_id: int,
db: Session = Depends(get_db),
current_user: User = Depends(get_current_user),
):
"""Create auto bookmarks from stored chapter data. Idempotent — skips if already imported.
If chapters have never been fetched (NULL), refreshes metadata from yt-dlp first."""
from ..models import VideoBookmark
import json as _json
video = db.query(Video).filter(Video.id == video_id).first()
if not video:
return []
# chapters=NULL means never fetched; fetch now and cache the result (even if empty)
if video.chapters is None:
_upsert_video_from_yt(db, video.youtube_video_id)
db.refresh(video)
# Mark as checked even if no chapters found, so we don't re-fetch next time
if video.chapters is None:
video.chapters = "[]"
db.commit()
chapters = _json.loads(video.chapters or "[]")
# Skip if trivial (single chapter) or already imported
if len(chapters) < 2:
return []
existing = db.query(VideoBookmark).filter_by(user_id=current_user.id, video_id=video_id, source="auto").first()
if existing:
return []
created = []
for ch in chapters:
bm = VideoBookmark(
user_id=current_user.id,
video_id=video_id,
timestamp_seconds=ch["start_time"],
note=ch["title"],
source="auto",
)
db.add(bm)
created.append(bm)
db.commit()
for bm in created:
db.refresh(bm)
return [BookmarkOut.model_validate(bm) for bm in created]
@router.delete("/{video_id}/bookmarks/clear-chapters", status_code=204)
def clear_chapters(
video_id: int,
db: Session = Depends(get_db),
current_user: User = Depends(get_current_user),
):
"""Delete all auto-imported chapter bookmarks for this video."""
from ..models import VideoBookmark
db.query(VideoBookmark).filter_by(user_id=current_user.id, video_id=video_id, source="auto").delete()
db.commit()
@router.patch("/{video_id}/bookmarks/{bookmark_id}", response_model=BookmarkOut)
def update_bookmark(
video_id: int,
bookmark_id: int,
body: BookmarkPatch,
db: Session = Depends(get_db),
current_user: User = Depends(get_current_user),
):
from ..models import VideoBookmark
bm = db.query(VideoBookmark).filter_by(id=bookmark_id, user_id=current_user.id, video_id=video_id).first()
if not bm:
raise HTTPException(status_code=404, detail="Bookmark not found")
bm.note = body.note
db.commit()
db.refresh(bm)
return BookmarkOut.model_validate(bm)
@router.delete("/{video_id}/bookmarks/{bookmark_id}", status_code=204)
def delete_bookmark(
video_id: int,
bookmark_id: int,
db: Session = Depends(get_db),
current_user: User = Depends(get_current_user),
):
from ..models import VideoBookmark
bm = db.query(VideoBookmark).filter_by(id=bookmark_id, user_id=current_user.id, video_id=video_id).first()
if bm:
db.delete(bm)
db.commit()
@router.get("/queue", response_model=list[VideoDetail])
def queued_videos(
db: Session = Depends(get_db),
current_user: User = Depends(get_current_user),
):
rows = db.execute(
text(_VIDEO_SELECT + """
WHERE uv.user_id = :user_id AND uv.queued = 1
ORDER BY uv.id DESC
"""),
{"user_id": current_user.id},
).mappings().all()
return [_row_to_detail(r) for r in rows]
@router.get("/liked", response_model=list[VideoDetail])
def liked_videos(
db: Session = Depends(get_db),
current_user: User = Depends(get_current_user),
):
rows = db.execute(
text(_VIDEO_SELECT + """
WHERE uv.user_id = :user_id AND uv.liked = 1
ORDER BY uv.liked_at DESC
"""),
{"user_id": current_user.id},
).mappings().all()
return [_row_to_detail(r) for r in rows]
@router.get("/by-yt/{youtube_video_id}", response_model=VideoDetail)
def get_video_by_yt_id(
youtube_video_id: str,
background_tasks: BackgroundTasks,
db: Session = Depends(get_db),
current_user: User = Depends(get_current_user),
):
existing = db.query(Video).filter_by(youtube_video_id=youtube_video_id).first()
if not existing or not existing.title:
# Video unknown — must block to get at least a title before we can render anything
_upsert_video_from_yt(db, youtube_video_id)
elif existing.description is None or existing.chapters is None:
# Video known but missing enrichment — fetch in background, return immediately
from ..database import SessionLocal
def _enrich(yt_id: str):
bg_db = SessionLocal()
try:
_upsert_video_from_yt(bg_db, yt_id)
finally:
bg_db.close()
background_tasks.add_task(_enrich, youtube_video_id)
row = db.execute(
text(_VIDEO_SELECT + "WHERE v.youtube_video_id = :yt_id"),
{"user_id": current_user.id, "yt_id": youtube_video_id},
).mappings().first()
if not row:
raise HTTPException(status_code=404, detail="Video not found")
return _row_to_detail(row)
@router.get("/{video_id}/related", response_model=list[VideoDetail])
def related_videos(
video_id: int,
mode: str = "weighted", # weighted | random
db: Session = Depends(get_db),
current_user: User = Depends(get_current_user),
):
"""Videos from discovery-queue channels, ordered by discovery score or randomly."""
video = db.query(Video).filter(Video.id == video_id).first()
if not video:
raise HTTPException(status_code=404, detail="Video not found")
if mode == "random":
order_clause = "ORDER BY RANDOM()"
else:
order_clause = "ORDER BY rn ASC, score DESC, RANDOM()"
rows = db.execute(
text(f"""
SELECT * FROM (
SELECT v.id, v.youtube_video_id, v.title, v.description, v.thumbnail_url,
v.duration_seconds, v.published_at, v.tags, v.category,
c.id AS channel_id, c.name AS channel_name,
c.youtube_channel_id AS channel_youtube_id,
COALESCE(uv.watched, 0) AS watched,
COALESCE(uv.watch_progress_seconds, 0) AS watch_progress_seconds,
COALESCE(uv.downloaded, 0) AS is_downloaded,
COALESCE(uv.liked, 0) AS liked,
COALESCE(uv.queued, 0) AS queued,
0 AS channel_followed,
NULL AS file_path, NULL AS download_resolution,
dq.score,
ROW_NUMBER() OVER (
PARTITION BY v.channel_id
ORDER BY v.published_at DESC NULLS LAST
) AS rn
FROM videos v
JOIN channels c ON v.channel_id = c.id
JOIN discovery_queue dq ON c.id = dq.channel_id
LEFT JOIN user_videos uv ON v.id = uv.video_id AND uv.user_id = :user_id
WHERE dq.user_id = :user_id AND dq.seen = 0
AND dq.channel_id NOT IN (
SELECT channel_id FROM user_channels
WHERE user_id = :user_id AND status IN ('followed', 'dismissed')
)
AND v.channel_id != :channel_id
)
WHERE rn <= 2
{order_clause}
LIMIT 14
"""),
{"user_id": current_user.id, "channel_id": video.channel_id or 0},
).mappings().all()
return [_row_to_detail(r) for r in rows]
@router.get("/{video_id}", response_model=VideoDetail)
def get_video(
video_id: int,
db: Session = Depends(get_db),
current_user: User = Depends(get_current_user),
):
row = db.execute(
text(_VIDEO_SELECT + "WHERE v.id = :video_id"),
{"user_id": current_user.id, "video_id": video_id},
).mappings().first()
if not row:
raise HTTPException(status_code=404, detail="Video not found")
return _row_to_detail(row)
@router.patch("/{video_id}/progress")
def update_progress(
video_id: int,
body: ProgressUpdate,
db: Session = Depends(get_db),
current_user: User = Depends(get_current_user),
):
video = db.query(Video).filter(Video.id == video_id).first()
if not video:
raise HTTPException(status_code=404, detail="Video not found")
uv = _get_uv(db, current_user.id, video_id)
from ..models import Download
from datetime import timedelta
prev_watched = bool(uv.watched)
uv.watch_progress_seconds = body.watch_progress_seconds
uv.last_watched_at = datetime.utcnow()
# Compute completion percent whenever we have duration
if video.duration_seconds and video.duration_seconds > 0:
uv.completion_percent = round(
min(body.watch_progress_seconds / video.duration_seconds * 100, 100), 1
)
if body.watched is not None:
if body.watched and not prev_watched:
# First completion — positive affinity signal
uv.watched = True
_update_affinity(db, current_user.id, video, +2.0)
dl = db.query(Download).filter_by(
user_id=current_user.id, video_id=video_id, status="complete"
).filter(Download.pending_delete_at.is_(None)).first()
if dl:
dl.pending_delete_at = datetime.utcnow() + timedelta(days=7)
elif body.watched and prev_watched:
# Rewatch — strongest positive signal
uv.rewatch_count = (uv.rewatch_count or 0) + 1
_update_affinity(db, current_user.id, video, +3.0)
elif not body.watched:
uv.watched = False
# Early bail signal: navigating away before 20% without marking watched
elif not prev_watched and video.duration_seconds and video.duration_seconds > 60:
pct = body.watch_progress_seconds / video.duration_seconds
if pct < 0.20:
_update_affinity(db, current_user.id, video, -0.5)
db.commit()
return {"ok": True}
@router.post("/{video_id}/like")
def toggle_like(
video_id: int,
db: Session = Depends(get_db),
current_user: User = Depends(get_current_user),
):
from ..models import DiscoveryQueue, UserChannel
video = db.query(Video).filter(Video.id == video_id).first()
if not video:
raise HTTPException(status_code=404, detail="Video not found")
uv = _get_uv(db, current_user.id, video_id)
uv.liked = not uv.liked
uv.liked_at = datetime.utcnow() if uv.liked else None
# When liking a video from a channel not yet followed, boost that channel's
# discovery score directly so it rises to the top of recommendations.
if uv.liked and video.channel_id:
uc = db.query(UserChannel).filter_by(
user_id=current_user.id, channel_id=video.channel_id,
).first()
not_followed = not uc or uc.status not in ("followed", "dismissed")
if not_followed:
dq = db.query(DiscoveryQueue).filter_by(
user_id=current_user.id, channel_id=video.channel_id,
).first()
if dq:
dq.score += 30.0
else:
db.add(DiscoveryQueue(
user_id=current_user.id,
channel_id=video.channel_id,
score=30.0,
source="liked",
))
# Affinity: like = strong positive, unlike = remove that boost
_update_affinity(db, current_user.id, video, +3.0 if uv.liked else -3.0)
db.commit()
return {"liked": uv.liked}
class RateBody(BaseModel):
rating: int # 1 = thumbs up, -1 = thumbs down, 0 = clear
@router.post("/{video_id}/rate")
def rate_video(
video_id: int,
body: RateBody,
db: Session = Depends(get_db),
current_user: User = Depends(get_current_user),
):
from ..models import DiscoveryQueue, UserChannel
video = db.query(Video).filter(Video.id == video_id).first()
if not video:
raise HTTPException(status_code=404, detail="Video not found")
uv = _get_uv(db, current_user.id, video_id)
old_rating = uv.rating or 0
new_rating = body.rating if body.rating in (1, -1) else None
uv.rating = new_rating
# Adjust discovery score for unfolowed channels
if video.channel_id:
uc = db.query(UserChannel).filter_by(
user_id=current_user.id, channel_id=video.channel_id,
).first()
if not uc or uc.status not in ("followed", "dismissed"):
delta = (body.rating if body.rating in (1, -1) else 0) * 15 - old_rating * 15
if delta != 0:
dq = db.query(DiscoveryQueue).filter_by(
user_id=current_user.id, channel_id=video.channel_id,
).first()
if dq:
dq.score = max(dq.score + delta, -50)
if dq.score < 0:
dq.seen = True
elif delta > 0:
db.add(DiscoveryQueue(
user_id=current_user.id,
channel_id=video.channel_id,
score=float(delta),
source="rated",
))
db.commit()
return {"rating": uv.rating}
@router.post("/{video_id}/queue")
def toggle_queue(
video_id: int,
db: Session = Depends(get_db),
current_user: User = Depends(get_current_user),
):
video = db.query(Video).filter(Video.id == video_id).first()
if not video:
raise HTTPException(status_code=404, detail="Video not found")
uv = _get_uv(db, current_user.id, video_id)
uv.queued = not uv.queued
db.commit()
return {"queued": uv.queued}

View File

View File

@@ -0,0 +1,614 @@
"""Discovery engine — search-based crawl, trending, community signal, category clustering."""
import json
import random
from datetime import datetime
from sqlalchemy.orm import Session
from sqlalchemy import text
from ..models import Channel, UserChannel, DiscoveryQueue, Video
from . import ytdlp
def _fetch_and_index_channel(db: Session, channel: Channel):
"""Fetch full metadata + recent videos for a discovered channel."""
try:
result = ytdlp.fetch_channel_metadata(channel.youtube_channel_id, max_videos=10)
if not result:
return
ch_data = result.get("channel", {})
for k, v in ch_data.items():
if hasattr(channel, k) and v is not None and v != "":
setattr(channel, k, v)
channel.crawled_at = datetime.utcnow()
videos = result.get("videos", [])
# For videos missing a date (RSS didn't cover them or flat-playlist had no timestamp),
# do individual fetches — capped at 3 to avoid slow-downs.
dateless = [v for v in videos if not v.get("published_at")]
individual_fetched: dict[str, dict] = {}
for vdata in dateless[:3]:
yt_id = vdata.get("youtube_video_id")
if not yt_id:
continue
try:
meta = ytdlp.fetch_video_metadata(yt_id)
if meta and meta.get("published_at"):
individual_fetched[yt_id] = meta
except Exception:
pass
for vdata in videos:
yt_id = vdata.get("youtube_video_id")
if not yt_id:
continue
# Prefer individually-fetched metadata if we retrieved it
if yt_id in individual_fetched:
vdata = individual_fetched[yt_id]
# Skip videos we still can't date — undated videos break feed ordering
if not vdata.get("published_at"):
continue
if not db.query(Video).filter_by(youtube_video_id=yt_id).first():
db.add(Video(
youtube_video_id=yt_id,
channel_id=channel.id,
title=vdata.get("title", ""),
description=vdata.get("description"),
thumbnail_url=vdata.get("thumbnail_url"),
duration_seconds=vdata.get("duration_seconds"),
published_at=vdata.get("published_at"),
tags=vdata.get("tags"),
category=vdata.get("category"),
))
db.commit()
except Exception:
db.rollback()
def _upsert_channel(db: Session, channel_data: dict) -> Channel | None:
yt_id = channel_data.get("youtube_channel_id")
if not yt_id:
return None
channel = db.query(Channel).filter_by(youtube_channel_id=yt_id).first()
if not channel:
channel = Channel(**channel_data)
db.add(channel)
db.flush()
return channel
def _add_to_discovery(
db: Session, user_id: int, channel_id: int, score: float, source: str,
preview_json: str | None = None,
):
existing = db.query(DiscoveryQueue).filter_by(user_id=user_id, channel_id=channel_id).first()
if existing:
# Accumulate scores across sources but cap to prevent one dominant signal
existing.score = existing.score + score * 0.5
if preview_json and not existing.preview_json:
existing.preview_json = preview_json
return
db.add(DiscoveryQueue(
user_id=user_id,
channel_id=channel_id,
score=score,
source=source,
preview_json=preview_json,
))
def _search_and_store(
db: Session, user_id: int, queries: list[str],
followed_yt_ids: set[str], score_multiplier: float, source: str,
):
"""Run YouTube searches for the given queries and add results to discovery."""
discovered: dict[str, dict] = {}
for query in queries:
try:
results = ytdlp.search_youtube(query, max_results=20)
for video in results:
ch = video.get("channel", {})
yt_id = ch.get("youtube_channel_id")
name = (ch.get("name") or "").strip()
if yt_id and name and yt_id not in followed_yt_ids:
if yt_id not in discovered:
discovered[yt_id] = {"name": name, "count": 0, "previews": []}
discovered[yt_id]["count"] += 1
previews = discovered[yt_id]["previews"]
if len(previews) < 3 and video.get("thumbnail_url") and video.get("title"):
previews.append({
"thumbnail_url": video["thumbnail_url"],
"title": video["title"],
})
except Exception:
continue
if not discovered:
return
candidates = sorted(discovered.items(), key=lambda x: -x[1]["count"])
needs_indexing: list[int] = []
for yt_id, info in candidates:
channel = db.query(Channel).filter_by(youtube_channel_id=yt_id).first()
is_new = channel is None
if not channel:
channel = Channel(
youtube_channel_id=yt_id,
name=info["name"],
description="",
thumbnail_url=None,
)
db.add(channel)
db.flush()
uc = db.query(UserChannel).filter_by(user_id=user_id, channel_id=channel.id).first()
if uc and uc.status in ("followed", "dismissed"):
continue
preview_json = json.dumps(info["previews"]) if info["previews"] else None
_add_to_discovery(
db, user_id, channel.id,
score=float(info["count"]) * score_multiplier,
source=source,
preview_json=preview_json,
)
if is_new or not channel.crawled_at:
needs_indexing.append(channel.id)
db.commit()
for channel_id in needs_indexing[:5]:
channel = db.query(Channel).filter_by(id=channel_id).first()
if channel:
_fetch_and_index_channel(db, channel)
def crawl_by_search(db: Session, user_id: int):
"""Discover channels by searching YouTube using tags, categories, and channel names."""
# All followed channels (names + yt_ids)
followed_rows = db.execute(
text("""
SELECT c.name, c.youtube_channel_id
FROM channels c
JOIN user_channels uc ON c.id = uc.channel_id
WHERE uc.user_id = :user_id AND uc.status = 'followed'
"""),
{"user_id": user_id},
).mappings().all()
followed_yt_ids = {row["youtube_channel_id"] for row in followed_rows}
followed_names = [row["name"] for row in followed_rows if row["name"]]
# Top tags from followed channels' indexed videos + liked videos
# SQLite requires LIMIT inside a subquery when used with UNION ALL
tag_rows = db.execute(
text("""
SELECT tags FROM (
SELECT v.tags
FROM videos v
JOIN user_channels uc ON v.channel_id = uc.channel_id
WHERE uc.user_id = :user_id AND uc.status = 'followed'
AND v.tags IS NOT NULL AND v.tags != '' AND v.tags != '[]'
LIMIT 300
)
UNION ALL
SELECT tags FROM (
SELECT v.tags
FROM user_videos uv
JOIN videos v ON uv.video_id = v.id
WHERE uv.user_id = :user_id AND uv.liked = 1
AND v.tags IS NOT NULL AND v.tags != '' AND v.tags != '[]'
LIMIT 100
)
"""),
{"user_id": user_id},
).mappings().all()
tag_counts: dict[str, int] = {}
for row in tag_rows:
try:
tags = json.loads(row["tags"])
for tag in tags:
if isinstance(tag, str):
t = tag.lower().strip()
if 3 <= len(t) <= 40:
tag_counts[t] = tag_counts.get(t, 0) + 1
except (json.JSONDecodeError, TypeError):
continue
# Top categories as fallback
cat_rows = db.execute(
text("""
SELECT v.category, COUNT(*) AS cnt
FROM videos v
JOIN user_channels uc ON v.channel_id = uc.channel_id
WHERE uc.user_id = :user_id AND uc.status = 'followed'
AND v.category IS NOT NULL
GROUP BY v.category
ORDER BY cnt DESC
LIMIT 5
"""),
{"user_id": user_id},
).mappings().all()
# Build query pool: top tags + random channel names + categories
top_tags = [t for t, _ in sorted(tag_counts.items(), key=lambda x: -x[1])[:6]]
top_cats = [r["category"] for r in cat_rows]
# Random sample of followed channel names — diversifies discovery each run
sampled_names: list[str] = []
if followed_names:
sampled_names = random.sample(followed_names, min(8, len(followed_names)))
# Combine: tags (most signal) + channel names (broad reach) + categories (fallback)
queries = list(dict.fromkeys(top_tags + sampled_names + top_cats))[:15]
if not queries:
return
_search_and_store(db, user_id, queries, followed_yt_ids, score_multiplier=5.0, source="search")
def update_community_signal(db: Session, user_id: int):
"""Surface channels that other users follow, weighted by follower count."""
rows = db.execute(
text("""
SELECT uc.channel_id, COUNT(DISTINCT uc.user_id) AS follower_count
FROM user_channels uc
WHERE uc.user_id != :user_id
AND uc.status = 'followed'
AND uc.channel_id NOT IN (
SELECT channel_id FROM user_channels
WHERE user_id = :user_id
)
GROUP BY uc.channel_id
ORDER BY follower_count DESC
LIMIT 100
"""),
{"user_id": user_id},
).mappings().all()
for row in rows:
_add_to_discovery(
db, user_id, row["channel_id"],
score=float(row["follower_count"]) * 5,
source="community",
)
db.commit()
def update_category_clusters(db: Session, user_id: int):
"""Find channels in categories the user watches heavily."""
rows = db.execute(
text("""
SELECT v.category, COUNT(*) AS watch_count
FROM user_videos uv
JOIN videos v ON uv.video_id = v.id
WHERE uv.user_id = :user_id AND uv.watched = 1 AND v.category IS NOT NULL
GROUP BY v.category
ORDER BY watch_count DESC
LIMIT 5
"""),
{"user_id": user_id},
).mappings().all()
top_categories = [r["category"] for r in rows]
if not top_categories:
return
placeholders = ",".join(f"'{c}'" for c in top_categories)
candidate_rows = db.execute(
text(f"""
SELECT DISTINCT v.channel_id
FROM videos v
WHERE v.category IN ({placeholders})
AND v.channel_id NOT IN (
SELECT channel_id FROM user_channels WHERE user_id = :user_id
)
LIMIT 100
"""),
{"user_id": user_id},
).mappings().all()
for row in candidate_rows:
_add_to_discovery(db, user_id, row["channel_id"], score=3.0, source="category")
db.commit()
def update_liked_signal(db: Session, user_id: int):
"""Search YouTube for channels related to topics extracted from liked videos."""
liked_rows = db.execute(
text("""
SELECT v.tags
FROM user_videos uv
JOIN videos v ON uv.video_id = v.id
WHERE uv.user_id = :user_id AND uv.liked = 1
AND v.tags IS NOT NULL AND v.tags != '' AND v.tags != '[]'
"""),
{"user_id": user_id},
).mappings().all()
if not liked_rows:
return
tag_counts: dict[str, int] = {}
for row in liked_rows:
try:
tags = json.loads(row["tags"])
for tag in tags:
if isinstance(tag, str):
t = tag.lower().strip()
if 3 <= len(t) <= 40:
tag_counts[t] = tag_counts.get(t, 0) + 2
except (json.JSONDecodeError, TypeError):
pass
if not tag_counts:
return
followed_yt_ids = set(db.execute(
text("""
SELECT c.youtube_channel_id FROM channels c
JOIN user_channels uc ON c.id = uc.channel_id
WHERE uc.user_id = :user_id AND uc.status = 'followed'
"""),
{"user_id": user_id},
).scalars().all())
top_tags = [t for t, _ in sorted(tag_counts.items(), key=lambda x: -x[1])[:6]]
_search_and_store(db, user_id, top_tags, followed_yt_ids, score_multiplier=10.0, source="liked")
def update_watch_signal(db: Session, user_id: int):
"""Discover channels from watched video topics, dampened so a single view has little effect.
A tag needs to appear in at least 3 distinct watched videos before it influences
discovery. Each qualifying tag contributes a modest score (×3 vs liked ×10),
so watching a single Tokyo video won't flood recommendations with Tokyo content.
"""
rows = db.execute(
text("""
SELECT v.tags
FROM user_videos uv
JOIN videos v ON uv.video_id = v.id
WHERE uv.user_id = :user_id AND uv.watched = 1
AND v.tags IS NOT NULL AND v.tags != '' AND v.tags != '[]'
"""),
{"user_id": user_id},
).mappings().all()
if not rows:
return
tag_counts: dict[str, int] = {}
for row in rows:
try:
tags = json.loads(row["tags"])
seen = set()
for tag in tags:
if isinstance(tag, str):
t = tag.lower().strip()
if 3 <= len(t) <= 40 and t not in seen:
tag_counts[t] = tag_counts.get(t, 0) + 1
seen.add(t)
except (json.JSONDecodeError, TypeError):
pass
# Only use tags that appear across 3+ distinct watched videos
qualified = {t: c for t, c in tag_counts.items() if c >= 3}
if not qualified:
return
followed_yt_ids = set(db.execute(
text("""
SELECT c.youtube_channel_id FROM channels c
JOIN user_channels uc ON c.id = uc.channel_id
WHERE uc.user_id = :user_id AND uc.status = 'followed'
"""),
{"user_id": user_id},
).scalars().all())
top_tags = [t for t, _ in sorted(qualified.items(), key=lambda x: -x[1])[:6]]
_search_and_store(db, user_id, top_tags, followed_yt_ids, score_multiplier=3.0, source="watched")
def _build_user_tag_profile(db: Session, user_id: int) -> dict[str, float]:
"""Return a weighted tag dict from liked (weight 3) + watched (weight 1) videos."""
rows = db.execute(
text("""
SELECT v.tags, MAX(uv.liked) AS liked
FROM user_videos uv
JOIN videos v ON uv.video_id = v.id
WHERE uv.user_id = :user_id AND (uv.liked = 1 OR uv.watched = 1)
AND v.tags IS NOT NULL AND v.tags != '' AND v.tags != '[]'
GROUP BY v.id
"""),
{"user_id": user_id},
).mappings().all()
profile: dict[str, float] = {}
for row in rows:
weight = 3.0 if row["liked"] else 1.0
try:
for tag in json.loads(row["tags"]):
if isinstance(tag, str):
t = tag.lower().strip()
if 3 <= len(t) <= 40:
profile[t] = profile.get(t, 0.0) + weight
except (json.JSONDecodeError, TypeError):
pass
return profile
def _tag_relevance_score(tag_profile: dict[str, float], tags_json: str | None) -> float:
"""Score a candidate channel's tags against the user's interest profile."""
if not tag_profile or not tags_json:
return 0.0
try:
tags = json.loads(tags_json)
except (json.JSONDecodeError, TypeError):
return 0.0
score = 0.0
for tag in tags:
if isinstance(tag, str):
t = tag.lower().strip()
score += tag_profile.get(t, 0.0)
return min(score, 50.0)
def _dismissed_channel_tags(db: Session, user_id: int) -> set[str]:
"""Collect tags of channels this user explicitly dismissed — used to avoid similar content."""
rows = db.execute(
text("""
SELECT v.tags
FROM user_channels uc
JOIN videos v ON v.channel_id = uc.channel_id
WHERE uc.user_id = :user_id AND uc.status = 'dismissed'
AND v.tags IS NOT NULL AND v.tags != '' AND v.tags != '[]'
LIMIT 500
"""),
{"user_id": user_id},
).mappings().all()
bad_tags: dict[str, int] = {}
for row in rows:
try:
for tag in json.loads(row["tags"]):
if isinstance(tag, str):
t = tag.lower().strip()
if 3 <= len(t) <= 40:
bad_tags[t] = bad_tags.get(t, 0) + 1
except (json.JSONDecodeError, TypeError):
pass
# Only include tags that appeared in 3+ dismissed-channel videos (strong signal)
return {t for t, c in bad_tags.items() if c >= 3}
def update_trending_signal(db: Session, user_id: int, regions: list[str]):
"""Fetch trending videos per region and score them by tag overlap with user interests."""
if not regions:
return
tag_profile = _build_user_tag_profile(db, user_id)
dismiss_tags = _dismissed_channel_tags(db, user_id)
followed_yt_ids = set(db.execute(
text("""
SELECT c.youtube_channel_id FROM channels c
JOIN user_channels uc ON c.id = uc.channel_id
WHERE uc.user_id = :user_id AND uc.status = 'followed'
"""),
{"user_id": user_id},
).scalars().all())
dismissed_channel_ids = set(db.execute(
text("""
SELECT channel_id FROM user_channels
WHERE user_id = :user_id AND status = 'dismissed'
"""),
{"user_id": user_id},
).scalars().all())
discovered: dict[str, dict] = {}
for region in regions:
try:
videos = ytdlp.fetch_trending(region=region, max_results=50)
for video in videos:
ch = video.get("channel", {})
yt_id = ch.get("youtube_channel_id")
name = (ch.get("name") or "").strip()
if not yt_id or not name or yt_id in followed_yt_ids:
continue
if yt_id not in discovered:
discovered[yt_id] = {"name": name, "count": 0, "regions": set(), "previews": []}
discovered[yt_id]["count"] += 1
discovered[yt_id]["regions"].add(region)
previews = discovered[yt_id]["previews"]
if len(previews) < 3 and video.get("thumbnail_url") and video.get("title"):
previews.append({
"thumbnail_url": video["thumbnail_url"],
"title": video["title"],
})
except Exception:
continue
if not discovered:
return
needs_indexing: list[int] = []
for yt_id, info in discovered.items():
channel = db.query(Channel).filter_by(youtube_channel_id=yt_id).first()
is_new = channel is None
if not channel:
channel = Channel(
youtube_channel_id=yt_id,
name=info["name"],
description="",
thumbnail_url=None,
)
db.add(channel)
db.flush()
if channel.id in dismissed_channel_ids:
continue
uc = db.query(UserChannel).filter_by(user_id=user_id, channel_id=channel.id).first()
if uc and uc.status in ("followed", "dismissed"):
continue
# Score: base ×4 per region × count, boosted by tag relevance, penalised by dismiss-tag overlap
base_score = float(info["count"]) * 4.0 * len(info["regions"])
# Tag relevance boost (requires channel to have indexed videos)
tag_boost = 0.0
if not is_new and channel.crawled_at:
tag_rows = db.execute(
text("SELECT tags FROM videos WHERE channel_id = :cid AND tags IS NOT NULL LIMIT 20"),
{"cid": channel.id},
).scalars().all()
for tags_json in tag_rows:
tag_boost += _tag_relevance_score(tag_profile, tags_json)
tag_boost = min(tag_boost, 30.0)
# Dismiss penalty: if channel's tags overlap heavily with dismissed content, reduce score
dismiss_penalty = 0.0
if dismiss_tags and not is_new:
tag_rows2 = db.execute(
text("SELECT tags FROM videos WHERE channel_id = :cid AND tags IS NOT NULL LIMIT 20"),
{"cid": channel.id},
).scalars().all()
for tags_json in tag_rows2:
try:
for tag in json.loads(tags_json or "[]"):
if isinstance(tag, str) and tag.lower().strip() in dismiss_tags:
dismiss_penalty += 5.0
except (json.JSONDecodeError, TypeError):
pass
dismiss_penalty = min(dismiss_penalty, base_score * 0.8)
final_score = base_score + tag_boost - dismiss_penalty
if final_score <= 0:
continue
preview_json = json.dumps(info["previews"]) if info["previews"] else None
_add_to_discovery(db, user_id, channel.id, score=final_score, source="trending", preview_json=preview_json)
if is_new or not channel.crawled_at:
needs_indexing.append(channel.id)
db.commit()
for channel_id in needs_indexing[:5]:
channel = db.query(Channel).filter_by(id=channel_id).first()
if channel:
_fetch_and_index_channel(db, channel)
def run_full_discovery(db: Session, user_id: int, regions: list[str] | None = None):
if regions is None:
regions = ["US", "SE"]
crawl_by_search(db, user_id)
update_community_signal(db, user_id)
update_category_clusters(db, user_id)
update_liked_signal(db, user_id)
update_watch_signal(db, user_id)
update_trending_signal(db, user_id, regions)

View File

@@ -0,0 +1,86 @@
"""Surprise Me scoring logic."""
import random
from datetime import datetime, time
from sqlalchemy.orm import Session
from sqlalchemy import text
SURPRISE_SQL = """
WITH candidate_scores AS (
SELECT
v.id AS video_id,
v.youtube_video_id,
v.title,
v.thumbnail_url,
v.duration_seconds,
v.channel_id,
c.name AS channel_name,
c.thumbnail_url AS channel_thumbnail_url,
uv.watched,
uv.watch_progress_seconds,
uv.downloaded,
uv.last_watched_at,
-- Unplayed download bonus
CASE WHEN uv.downloaded = 1 AND (uv.watched IS NULL OR uv.watched = 0) THEN 40 ELSE 0 END
-- Recency penalty
+ CASE
WHEN uv.last_watched_at IS NOT NULL
AND uv.last_watched_at > datetime('now', '-7 days') THEN -50
WHEN uv.last_watched_at IS NOT NULL
AND uv.last_watched_at > datetime('now', '-30 days') THEN -20
ELSE 0
END
-- Late evening duration bonus (applied in Python)
+ :duration_bonus_active * CASE WHEN v.duration_seconds > 2700 THEN 10 ELSE 0 END
-- Random jitter
+ (ABS(RANDOM()) % 11 - 5) AS base_score
FROM videos v
JOIN user_videos uv ON v.id = uv.video_id AND uv.user_id = :user_id
JOIN channels c ON v.channel_id = c.id
WHERE uv.downloaded = 1
)
SELECT * FROM candidate_scores
ORDER BY base_score DESC
LIMIT 50
"""
def get_surprise_videos(db: Session, user_id: int, limit: int = 10) -> list[dict]:
now = datetime.now()
late_evening = now.time() >= time(21, 0)
rows = db.execute(
text(SURPRISE_SQL),
{"user_id": user_id, "duration_bonus_active": 1 if late_evening else 0},
).mappings().all()
# Apply channel diversity penalty in Python
seen_channels: dict[int, int] = {}
results = []
for row in rows:
row = dict(row)
channel_id = row["channel_id"]
penalty = seen_channels.get(channel_id, 0) * 30
row["final_score"] = row["base_score"] - penalty
seen_channels[channel_id] = seen_channels.get(channel_id, 0) + 1
results.append(row)
results.sort(key=lambda r: r["final_score"], reverse=True)
return results[:limit]
def get_discovery_injection(db: Session, user_id: int) -> dict | None:
"""Return one unseen discovery queue item to inject into Surprise Me."""
row = db.execute(
text("""
SELECT dq.id, c.id AS channel_id, c.name, c.thumbnail_url,
dq.source, dq.score
FROM discovery_queue dq
JOIN channels c ON dq.channel_id = c.id
WHERE dq.user_id = :user_id AND dq.seen = 0
ORDER BY dq.score DESC
LIMIT 1
"""),
{"user_id": user_id},
).mappings().first()
return dict(row) if row else None

486
backend/services/ytdlp.py Normal file
View File

@@ -0,0 +1,486 @@
"""Subprocess wrapper for yt-dlp."""
import json
import re
import subprocess
import threading
import urllib.request
import xml.etree.ElementTree as ET
from datetime import datetime, timezone
from pathlib import Path
from typing import Any
from ..config import settings
def _run(args: list[str], timeout: int = 60) -> tuple[str, str, int]:
result = subprocess.run(args, capture_output=True, text=True, timeout=timeout)
return result.stdout, result.stderr, result.returncode
def _parse_date(date_str: str | None) -> datetime | None:
if not date_str:
return None
try:
return datetime.strptime(date_str, "%Y%m%d")
except ValueError:
return None
def _parse_published(info: dict) -> datetime | None:
"""Extract publish date from yt-dlp info dict.
Tries upload_date (YYYYMMDD string) first, then timestamp (Unix epoch),
then release_timestamp. Flat-playlist entries often omit upload_date but
include timestamp, so the fallback is important.
"""
d = _parse_date(info.get("upload_date"))
if d:
return d
for key in ("timestamp", "release_timestamp"):
ts = info.get(key)
if ts:
try:
return datetime.utcfromtimestamp(float(ts))
except (ValueError, OSError, OverflowError):
pass
return None
def _stable_thumbnail(video_id: str | None) -> str | None:
if not video_id:
return None
return f"https://i.ytimg.com/vi/{video_id}/hqdefault.jpg"
def _normalize_video(info: dict) -> dict:
video_id = info.get("id")
raw_chapters = info.get("chapters") or []
chapters = [
{
"start_time": int(ch.get("start_time") or 0),
"end_time": int(ch.get("end_time") or 0),
"title": ch.get("title") or "",
}
for ch in raw_chapters
if ch.get("title")
]
return {
"youtube_video_id": video_id,
"title": info.get("title", ""),
"description": info.get("description", ""),
"thumbnail_url": _stable_thumbnail(video_id),
"duration_seconds": info.get("duration"),
"published_at": _parse_published(info),
"tags": json.dumps(info.get("tags") or []),
"category": info.get("category") or (info.get("categories") or [None])[0],
"chapters": json.dumps(chapters) if chapters else None,
"channel": {
"youtube_channel_id": info.get("channel_id"),
"name": info.get("channel") or info.get("uploader", ""),
"thumbnail_url": None,
},
}
def _channel_avatar(thumbnails: list | None) -> str | None:
"""Pick the channel avatar from yt-dlp's thumbnails list.
YouTube returns banners and avatars in the same array. Avatars have id
'avatar_uncropped' or are roughly square (width ≈ height).
"""
if not thumbnails:
return None
for t in thumbnails:
if "avatar" in str(t.get("id") or "").lower():
return t.get("url")
# Fall back to the most square thumbnail
square = [t for t in thumbnails
if t.get("width") and t.get("height")
and t["width"] <= t["height"] * 1.2
and t["height"] <= t["width"] * 1.2]
if square:
return max(square, key=lambda t: t.get("width") or 0).get("url")
return None
def _normalize_channel(info: dict) -> dict:
return {
"youtube_channel_id": info.get("channel_id") or info.get("id"),
"name": info.get("channel") or info.get("title") or info.get("uploader") or None,
"description": info.get("description") or None,
"thumbnail_url": _channel_avatar(info.get("thumbnails")),
"banner_url": None,
"subscriber_count": info.get("channel_follower_count"),
}
def search_youtube(query: str, max_results: int = 40) -> list[dict]:
"""Search YouTube via yt-dlp. Uses --flat-playlist for fast results."""
stdout, _, code = _run([
"yt-dlp",
f"ytsearch{max_results}:{query}",
"--dump-json",
"--flat-playlist",
"--quiet",
*_cookie_args(),
], timeout=60)
results = []
for line in stdout.splitlines():
line = line.strip()
if not line:
continue
try:
info = json.loads(line)
# flat-playlist entries have _type="url" with basic fields
if info.get("_type") in ("url", None) and info.get("id"):
results.append({
"youtube_video_id": info.get("id"),
"title": info.get("title", ""),
"description": info.get("description") or "",
"thumbnail_url": _stable_thumbnail(info.get("id")),
"duration_seconds": info.get("duration"),
"published_at": _parse_published(info),
"tags": json.dumps(info.get("tags") or []),
"category": None,
"channel": {
"youtube_channel_id": info.get("channel_id"),
"name": info.get("channel") or info.get("uploader") or "",
"thumbnail_url": None,
},
})
except json.JSONDecodeError:
continue
return results
def fetch_trending(region: str = "US", max_results: int = 50) -> list[dict]:
"""Fetch trending videos for a region via yt-dlp search with date-sort filter.
Uses the YouTube search sort-by-upload-date URL that reliably returns regional
results. Falls back gracefully to an empty list on error.
"""
region = region.upper()
# CAI%3D = sort by upload date; gl= sets the region
url = f"https://www.youtube.com/results?search_query=trending&sp=CAI%253D&gl={region}"
stdout, _, code = _run([
"yt-dlp",
url,
"--dump-json",
"--flat-playlist",
"--quiet",
"--playlist-end", str(max_results),
*_cookie_args(),
], timeout=60)
results = []
for line in stdout.splitlines():
line = line.strip()
if not line:
continue
try:
info = json.loads(line)
if info.get("_type") in ("url", None) and info.get("id"):
results.append({
"youtube_video_id": info.get("id"),
"title": info.get("title", ""),
"thumbnail_url": _stable_thumbnail(info.get("id")),
"duration_seconds": info.get("duration"),
"published_at": _parse_published(info),
"tags": json.dumps(info.get("tags") or []),
"category": None,
"channel": {
"youtube_channel_id": info.get("channel_id"),
"name": info.get("channel") or info.get("uploader") or "",
"thumbnail_url": None,
},
})
except json.JSONDecodeError:
continue
return results
def _best_thumbnail(thumbnails: list | None) -> str | None:
if not thumbnails:
return None
# pick the one closest to 480px wide
best = sorted(thumbnails, key=lambda t: abs((t.get("width") or 0) - 480))
return best[0].get("url") if best else None
def fetch_video_metadata(video_id: str) -> dict | None:
"""Fetch metadata for a single video by YouTube ID."""
url = f"https://www.youtube.com/watch?v={video_id}"
stdout, _, code = _run([
"yt-dlp",
url,
"--dump-json",
"--no-download",
"--no-playlist",
"--quiet",
*_cookie_args(),
], timeout=30)
for line in stdout.splitlines():
line = line.strip()
if not line:
continue
try:
info = json.loads(line)
return _normalize_video(info)
except json.JSONDecodeError:
continue
return None
def _rss_dates(uc_channel_id: str) -> dict[str, datetime]:
"""Fetch publish dates for the 15 most recent videos from YouTube's RSS feed.
Fast, unauthenticated, and returns precise dates. Only works for UC… IDs.
"""
if not uc_channel_id or not uc_channel_id.startswith("UC"):
return {}
url = f"https://www.youtube.com/feeds/videos.xml?channel_id={uc_channel_id}"
try:
req = urllib.request.Request(url, headers={"User-Agent": "Mozilla/5.0"})
with urllib.request.urlopen(req, timeout=10) as resp:
xml_data = resp.read()
root = ET.fromstring(xml_data)
ns = {
"atom": "http://www.w3.org/2005/Atom",
"yt": "http://www.youtube.com/xml/schemas/2015",
}
dates: dict[str, datetime] = {}
for entry in root.findall("atom:entry", ns):
vid_el = entry.find("yt:videoId", ns)
pub_el = entry.find("atom:published", ns)
if vid_el is not None and pub_el is not None and vid_el.text and pub_el.text:
try:
dt = datetime.fromisoformat(pub_el.text.replace("Z", "+00:00"))
dates[vid_el.text] = dt.replace(tzinfo=None)
except ValueError:
pass
return dates
except Exception:
return {}
def fetch_channel_metadata(channel_id: str, max_videos: int = 30) -> dict | None:
"""Fetch channel info + recent videos.
Uses --dump-single-json --flat-playlist for speed, then enriches video dates
from YouTube's RSS feed (gives precise dates for the 15 most recent videos).
"""
if channel_id.startswith("@"):
url = f"https://www.youtube.com/{channel_id}/videos"
else:
url = f"https://www.youtube.com/channel/{channel_id}/videos"
args = [
"yt-dlp", url,
"--dump-single-json",
"--flat-playlist",
"--quiet",
*_cookie_args(),
]
if max_videos > 0:
args += ["--playlist-end", str(max_videos)]
stdout, _, code = _run(args, timeout=60)
if not stdout.strip():
return None
try:
info = json.loads(stdout.strip())
except json.JSONDecodeError:
return None
if not info.get("id") and not info.get("channel_id"):
return None
channel_info = _normalize_channel(info)
# Fetch RSS dates — fast single HTTP request, precise dates for ≤15 newest videos
uc_id = channel_info.get("youtube_channel_id") or ""
rss = _rss_dates(uc_id)
videos = []
for entry in info.get("entries") or []:
vid_id = entry.get("id")
if not vid_id:
continue
published_at = rss.get(vid_id) or _parse_published(entry)
videos.append({
"youtube_video_id": vid_id,
"title": entry.get("title") or "",
"description": entry.get("description") or None,
"thumbnail_url": _stable_thumbnail(vid_id),
"duration_seconds": entry.get("duration"),
"published_at": published_at,
"tags": json.dumps(entry.get("tags") or []),
"category": (entry.get("categories") or [None])[0],
"channel": {
"youtube_channel_id": channel_info.get("youtube_channel_id"),
"name": channel_info.get("name") or "",
"thumbnail_url": None,
},
})
return {"channel": channel_info, "videos": videos}
def fetch_channel_links(channel_id: str) -> list[str]:
"""Extract linked channel IDs from a channel's about/description."""
if channel_id.startswith("@"):
url = f"https://www.youtube.com/{channel_id}/about"
else:
url = f"https://www.youtube.com/channel/{channel_id}/about"
stdout, _, code = _run([
"yt-dlp",
url,
"--dump-json",
"--no-download",
"--flat-playlist",
"--playlist-end", "1",
"--quiet",
*_cookie_args(),
], timeout=30)
channel_ids = set()
for line in stdout.splitlines():
line = line.strip()
if not line:
continue
try:
info = json.loads(line)
desc = info.get("description", "") or ""
for match in re.finditer(r"youtube\.com/channel/(UC[\w-]+)", desc):
channel_ids.add(match.group(1))
for match in re.finditer(r"youtube\.com/@([\w-]+)", desc):
channel_ids.add(f"@{match.group(1)}")
except json.JSONDecodeError:
continue
return list(channel_ids)
QUALITY_FORMATS = {
"best": "bestvideo[ext=mp4][vcodec^=avc1]+bestaudio[ext=m4a]/bestvideo[ext=mp4]+bestaudio[ext=m4a]/22/18/bestvideo+bestaudio/best",
"2160p": "bestvideo[ext=mp4][height<=2160]+bestaudio[ext=m4a]/bestvideo[height<=2160]+bestaudio/best[height<=2160]",
"1440p": "bestvideo[ext=mp4][height<=1440]+bestaudio[ext=m4a]/bestvideo[height<=1440]+bestaudio/best[height<=1440]",
"1080p": "bestvideo[ext=mp4][vcodec^=avc1][height<=1080]+bestaudio[ext=m4a]/bestvideo[ext=mp4][height<=1080]+bestaudio[ext=m4a]/137+140/22/best[height<=1080]",
"720p": "bestvideo[ext=mp4][vcodec^=avc1][height<=720]+bestaudio[ext=m4a]/bestvideo[ext=mp4][height<=720]+bestaudio[ext=m4a]/22/best[height<=720]",
"480p": "bestvideo[ext=mp4][vcodec^=avc1][height<=480]+bestaudio[ext=m4a]/bestvideo[ext=mp4][height<=480]+bestaudio[ext=m4a]/18/best[height<=480]",
"360p": "bestvideo[ext=mp4][height<=360]+bestaudio[ext=m4a]/18/best[height<=360]",
"240p": "bestvideo[ext=mp4][height<=240]+bestaudio[ext=m4a]/best[height<=240]",
"144p": "bestvideo[ext=mp4][height<=144]+bestaudio[ext=m4a]/best[height<=144]",
}
def detect_resolution(file_path: str) -> str | None:
"""Use ffprobe to get the video stream height and return a label like '1080p'."""
try:
result = subprocess.run(
["ffprobe", "-v", "quiet", "-select_streams", "v:0",
"-show_entries", "stream=height", "-of", "csv=p=0", file_path],
capture_output=True, text=True, timeout=15,
)
height = int(result.stdout.strip())
if height >= 1080: return "1080p"
if height >= 720: return "720p"
if height >= 480: return "480p"
if height >= 360: return "360p"
return f"{height}p"
except Exception:
return None
def predicted_file_path(video_id: str) -> Path:
"""Return the expected output path for a video download."""
return Path(settings.download_path) / f"{video_id}.mp4"
_SEMAPHORE = threading.Semaphore(3)
_semaphore_lock = threading.Lock()
_cookies_browser: str = ""
_cookies_lock = threading.Lock()
def set_max_concurrent(n: int) -> None:
global _SEMAPHORE
with _semaphore_lock:
_SEMAPHORE = threading.Semaphore(max(1, min(n, 10)))
def set_cookies_browser(browser: str) -> None:
global _cookies_browser
with _cookies_lock:
_cookies_browser = browser.strip().lower()
def _cookie_args() -> list[str]:
with _cookies_lock:
b = _cookies_browser
return ["--cookies-from-browser", b] if b else []
def start_download(
video_id: str,
download_id: int,
on_progress: Any,
on_complete: Any,
on_error: Any,
quality: str = "best",
) -> None:
"""Start yt-dlp download in a background thread.
Uses a single progressive MP4 format so the file is playable as it downloads.
--no-part writes directly to the final filename (no .part rename at the end).
"""
url = f"https://www.youtube.com/watch?v={video_id}"
# Predictable output path — lets the player start before download finishes
output_template = str(Path(settings.download_path) / f"{video_id}.%(ext)s")
fmt = QUALITY_FORMATS.get(quality, QUALITY_FORMATS["best"])
def _run_download():
with _SEMAPHORE:
process = subprocess.Popen(
[
"yt-dlp", url,
"-f", fmt,
"--merge-output-format", "mp4",
"--postprocessor-args", "Merger+ffmpeg:-movflags +faststart",
"--embed-metadata", "--embed-thumbnail",
"--no-part", "--no-mtime",
"-o", output_template,
"--newline", "--progress", "--no-colors",
*_cookie_args(),
],
stdout=subprocess.PIPE,
stderr=subprocess.STDOUT,
text=True,
)
file_path = None
stream_index = 0
for line in process.stdout:
line = line.strip()
if re.search(r"\[download\] Destination:", line):
stream_index += 1
m = re.search(r"\[download\]\s+([\d.]+)%", line)
if m:
pct = float(m.group(1))
scaled = pct * 0.85 if stream_index <= 1 else 85.0 + pct * 0.10
on_progress(download_id, min(scaled, 95.0))
m2 = re.search(r"\[(?:download|Merger)\] Destination: (.+)", line)
if m2:
file_path = m2.group(1).strip()
process.wait()
if process.returncode == 0:
resolution = detect_resolution(file_path) if file_path else None
on_complete(download_id, file_path, resolution)
else:
on_error(download_id, f"yt-dlp exited with code {process.returncode}")
thread = threading.Thread(target=_run_download, daemon=True)
thread.start()